1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 static int aapcs_select_return_coproc (const_tree, const_tree);
175 #ifdef OBJECT_FORMAT_ELF
176 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
177 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
180 static void arm_encode_section_info (tree, rtx, int);
183 static void arm_file_end (void);
184 static void arm_file_start (void);
186 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
189 enum machine_mode, const_tree, bool);
190 static bool arm_promote_prototypes (const_tree);
191 static bool arm_default_short_enums (void);
192 static bool arm_align_anon_bitfield (void);
193 static bool arm_return_in_msb (const_tree);
194 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
195 static bool arm_return_in_memory (const_tree, const_tree);
197 static void arm_unwind_emit (FILE *, rtx);
198 static bool arm_output_ttype (rtx);
199 static void arm_asm_emit_except_personality (rtx);
200 static void arm_asm_init_sections (void);
202 static enum unwind_info_type arm_except_unwind_info (void);
203 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
204 static rtx arm_dwarf_register_span (rtx);
206 static tree arm_cxx_guard_type (void);
207 static bool arm_cxx_guard_mask_bit (void);
208 static tree arm_get_cookie_size (tree);
209 static bool arm_cookie_has_size (void);
210 static bool arm_cxx_cdtor_returns_this (void);
211 static bool arm_cxx_key_method_may_be_inline (void);
212 static void arm_cxx_determine_class_data_visibility (tree);
213 static bool arm_cxx_class_data_always_comdat (void);
214 static bool arm_cxx_use_aeabi_atexit (void);
215 static void arm_init_libfuncs (void);
216 static tree arm_build_builtin_va_list (void);
217 static void arm_expand_builtin_va_start (tree, rtx);
218 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
219 static void arm_option_override (void);
220 static void arm_option_optimization (int, int);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
224 static bool arm_cannot_copy_insn_p (rtx);
225 static bool arm_tls_symbol_p (rtx x);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t);
231 static const char *arm_invalid_return_type (const_tree t);
232 static tree arm_promoted_type (const_tree t);
233 static tree arm_convert_to_type (tree type, tree expr);
234 static bool arm_scalar_mode_supported_p (enum machine_mode);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx, tree, rtx);
239 static rtx arm_trampoline_adjust_address (rtx);
240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
244 static bool arm_class_likely_spilled_p (reg_class_t);
245 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
246 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
252 /* Table of machine attributes. */
253 static const struct attribute_spec arm_attribute_table[] =
255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
256 /* Function calls made to this symbol must be done indirectly, because
257 it may lie outside of the 26 bit addressing range of a normal function
259 { "long_call", 0, 0, false, true, true, NULL },
260 /* Whereas these functions are always known to reside within the 26 bit
262 { "short_call", 0, 0, false, true, true, NULL },
263 /* Specify the procedure call conventions for a function. */
264 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
265 /* Interrupt Service Routines have special prologue and epilogue requirements. */
266 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
267 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
268 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
270 /* ARM/PE has three new attributes:
272 dllexport - for exporting a function/variable that will live in a dll
273 dllimport - for importing a function/variable from a dll
275 Microsoft allows multiple declspecs in one __declspec, separating
276 them with spaces. We do NOT support this. Instead, use __declspec
279 { "dllimport", 0, 0, true, false, false, NULL },
280 { "dllexport", 0, 0, true, false, false, NULL },
281 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
282 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
283 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
284 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
285 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
287 { NULL, 0, 0, false, false, false, NULL }
290 /* Initialize the GCC target structure. */
291 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
292 #undef TARGET_MERGE_DECL_ATTRIBUTES
293 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
296 #undef TARGET_LEGITIMIZE_ADDRESS
297 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
299 #undef TARGET_ATTRIBUTE_TABLE
300 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
302 #undef TARGET_ASM_FILE_START
303 #define TARGET_ASM_FILE_START arm_file_start
304 #undef TARGET_ASM_FILE_END
305 #define TARGET_ASM_FILE_END arm_file_end
307 #undef TARGET_ASM_ALIGNED_SI_OP
308 #define TARGET_ASM_ALIGNED_SI_OP NULL
309 #undef TARGET_ASM_INTEGER
310 #define TARGET_ASM_INTEGER arm_assemble_integer
312 #undef TARGET_PRINT_OPERAND
313 #define TARGET_PRINT_OPERAND arm_print_operand
314 #undef TARGET_PRINT_OPERAND_ADDRESS
315 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
316 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
317 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
319 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
320 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
322 #undef TARGET_ASM_FUNCTION_PROLOGUE
323 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
325 #undef TARGET_ASM_FUNCTION_EPILOGUE
326 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
328 #undef TARGET_DEFAULT_TARGET_FLAGS
329 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
330 #undef TARGET_HANDLE_OPTION
331 #define TARGET_HANDLE_OPTION arm_handle_option
333 #define TARGET_HELP arm_target_help
334 #undef TARGET_OPTION_OVERRIDE
335 #define TARGET_OPTION_OVERRIDE arm_option_override
336 #undef TARGET_OPTION_OPTIMIZATION
337 #define TARGET_OPTION_OPTIMIZATION arm_option_optimization
339 #undef TARGET_COMP_TYPE_ATTRIBUTES
340 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
342 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
343 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
345 #undef TARGET_SCHED_ADJUST_COST
346 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
348 #undef TARGET_ENCODE_SECTION_INFO
350 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
352 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
355 #undef TARGET_STRIP_NAME_ENCODING
356 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
358 #undef TARGET_ASM_INTERNAL_LABEL
359 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
362 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
364 #undef TARGET_FUNCTION_VALUE
365 #define TARGET_FUNCTION_VALUE arm_function_value
367 #undef TARGET_LIBCALL_VALUE
368 #define TARGET_LIBCALL_VALUE arm_libcall_value
370 #undef TARGET_ASM_OUTPUT_MI_THUNK
371 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
372 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
373 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
375 #undef TARGET_RTX_COSTS
376 #define TARGET_RTX_COSTS arm_rtx_costs
377 #undef TARGET_ADDRESS_COST
378 #define TARGET_ADDRESS_COST arm_address_cost
380 #undef TARGET_SHIFT_TRUNCATION_MASK
381 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
382 #undef TARGET_VECTOR_MODE_SUPPORTED_P
383 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
384 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
385 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
387 #undef TARGET_MACHINE_DEPENDENT_REORG
388 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
390 #undef TARGET_INIT_BUILTINS
391 #define TARGET_INIT_BUILTINS arm_init_builtins
392 #undef TARGET_EXPAND_BUILTIN
393 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
395 #undef TARGET_INIT_LIBFUNCS
396 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
398 #undef TARGET_PROMOTE_FUNCTION_MODE
399 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
400 #undef TARGET_PROMOTE_PROTOTYPES
401 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
402 #undef TARGET_PASS_BY_REFERENCE
403 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
404 #undef TARGET_ARG_PARTIAL_BYTES
405 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
406 #undef TARGET_FUNCTION_ARG
407 #define TARGET_FUNCTION_ARG arm_function_arg
408 #undef TARGET_FUNCTION_ARG_ADVANCE
409 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
411 #undef TARGET_SETUP_INCOMING_VARARGS
412 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
414 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
415 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
417 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
418 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
419 #undef TARGET_TRAMPOLINE_INIT
420 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
421 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
422 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
424 #undef TARGET_DEFAULT_SHORT_ENUMS
425 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
427 #undef TARGET_ALIGN_ANON_BITFIELD
428 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
430 #undef TARGET_NARROW_VOLATILE_BITFIELD
431 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
433 #undef TARGET_CXX_GUARD_TYPE
434 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
436 #undef TARGET_CXX_GUARD_MASK_BIT
437 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
439 #undef TARGET_CXX_GET_COOKIE_SIZE
440 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
442 #undef TARGET_CXX_COOKIE_HAS_SIZE
443 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
445 #undef TARGET_CXX_CDTOR_RETURNS_THIS
446 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
448 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
449 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
451 #undef TARGET_CXX_USE_AEABI_ATEXIT
452 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
454 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
455 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
456 arm_cxx_determine_class_data_visibility
458 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
459 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
461 #undef TARGET_RETURN_IN_MSB
462 #define TARGET_RETURN_IN_MSB arm_return_in_msb
464 #undef TARGET_RETURN_IN_MEMORY
465 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
467 #undef TARGET_MUST_PASS_IN_STACK
468 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
471 #undef TARGET_ASM_UNWIND_EMIT
472 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
474 /* EABI unwinding tables use a different format for the typeinfo tables. */
475 #undef TARGET_ASM_TTYPE
476 #define TARGET_ASM_TTYPE arm_output_ttype
478 #undef TARGET_ARM_EABI_UNWINDER
479 #define TARGET_ARM_EABI_UNWINDER true
481 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
482 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
484 #undef TARGET_ASM_INIT_SECTIONS
485 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
486 #endif /* ARM_UNWIND_INFO */
488 #undef TARGET_EXCEPT_UNWIND_INFO
489 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
491 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
492 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
494 #undef TARGET_DWARF_REGISTER_SPAN
495 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
497 #undef TARGET_CANNOT_COPY_INSN_P
498 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
501 #undef TARGET_HAVE_TLS
502 #define TARGET_HAVE_TLS true
505 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
506 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
508 #undef TARGET_CANNOT_FORCE_CONST_MEM
509 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
511 #undef TARGET_MAX_ANCHOR_OFFSET
512 #define TARGET_MAX_ANCHOR_OFFSET 4095
514 /* The minimum is set such that the total size of the block
515 for a particular anchor is -4088 + 1 + 4095 bytes, which is
516 divisible by eight, ensuring natural spacing of anchors. */
517 #undef TARGET_MIN_ANCHOR_OFFSET
518 #define TARGET_MIN_ANCHOR_OFFSET -4088
520 #undef TARGET_SCHED_ISSUE_RATE
521 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
523 #undef TARGET_MANGLE_TYPE
524 #define TARGET_MANGLE_TYPE arm_mangle_type
526 #undef TARGET_BUILD_BUILTIN_VA_LIST
527 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
528 #undef TARGET_EXPAND_BUILTIN_VA_START
529 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
530 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
531 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
534 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
535 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
538 #undef TARGET_LEGITIMATE_ADDRESS_P
539 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
541 #undef TARGET_INVALID_PARAMETER_TYPE
542 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
544 #undef TARGET_INVALID_RETURN_TYPE
545 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
547 #undef TARGET_PROMOTED_TYPE
548 #define TARGET_PROMOTED_TYPE arm_promoted_type
550 #undef TARGET_CONVERT_TO_TYPE
551 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
553 #undef TARGET_SCALAR_MODE_SUPPORTED_P
554 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
556 #undef TARGET_FRAME_POINTER_REQUIRED
557 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
559 #undef TARGET_CAN_ELIMINATE
560 #define TARGET_CAN_ELIMINATE arm_can_eliminate
562 #undef TARGET_CLASS_LIKELY_SPILLED_P
563 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
565 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
566 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
567 arm_vector_alignment_reachable
569 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
570 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
571 arm_builtin_support_vector_misalignment
573 struct gcc_target targetm = TARGET_INITIALIZER;
575 /* Obstack for minipool constant handling. */
576 static struct obstack minipool_obstack;
577 static char * minipool_startobj;
579 /* The maximum number of insns skipped which
580 will be conditionalised if possible. */
581 static int max_insns_skipped = 5;
583 extern FILE * asm_out_file;
585 /* True if we are currently building a constant table. */
586 int making_const_table;
588 /* The processor for which instructions should be scheduled. */
589 enum processor_type arm_tune = arm_none;
591 /* The current tuning set. */
592 const struct tune_params *current_tune;
594 /* Which floating point hardware to schedule for. */
597 /* Which floating popint hardware to use. */
598 const struct arm_fpu_desc *arm_fpu_desc;
600 /* Whether to use floating point hardware. */
601 enum float_abi_type arm_float_abi;
603 /* Which __fp16 format to use. */
604 enum arm_fp16_format_type arm_fp16_format;
606 /* Which ABI to use. */
607 enum arm_abi_type arm_abi;
609 /* Which thread pointer model to use. */
610 enum arm_tp_type target_thread_pointer = TP_AUTO;
612 /* Used to parse -mstructure_size_boundary command line option. */
613 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
615 /* Used for Thumb call_via trampolines. */
616 rtx thumb_call_via_label[14];
617 static int thumb_call_reg_needed;
619 /* Bit values used to identify processor capabilities. */
620 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
621 #define FL_ARCH3M (1 << 1) /* Extended multiply */
622 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
623 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
624 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
625 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
626 #define FL_THUMB (1 << 6) /* Thumb aware */
627 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
628 #define FL_STRONG (1 << 8) /* StrongARM */
629 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
630 #define FL_XSCALE (1 << 10) /* XScale */
631 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
632 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
633 media instructions. */
634 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
635 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
636 Note: ARM6 & 7 derivatives only. */
637 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
638 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
639 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
641 #define FL_DIV (1 << 18) /* Hardware divide. */
642 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
643 #define FL_NEON (1 << 20) /* Neon instructions. */
644 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
646 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
648 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
650 /* Flags that only effect tuning, not available instructions. */
651 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
654 #define FL_FOR_ARCH2 FL_NOTM
655 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
656 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
657 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
658 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
659 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
660 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
661 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
662 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
663 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
664 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
665 #define FL_FOR_ARCH6J FL_FOR_ARCH6
666 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
667 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
668 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
669 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
670 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
671 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
672 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
673 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
674 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
675 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
677 /* The bits in this mask specify which
678 instructions we are allowed to generate. */
679 static unsigned long insn_flags = 0;
681 /* The bits in this mask specify which instruction scheduling options should
683 static unsigned long tune_flags = 0;
685 /* The following are used in the arm.md file as equivalents to bits
686 in the above two flag variables. */
688 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
691 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
694 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
697 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
700 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
703 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
706 /* Nonzero if this chip supports the ARM 6K extensions. */
709 /* Nonzero if this chip supports the ARM 7 extensions. */
712 /* Nonzero if instructions not present in the 'M' profile can be used. */
713 int arm_arch_notm = 0;
715 /* Nonzero if instructions present in ARMv7E-M can be used. */
718 /* Nonzero if this chip can benefit from load scheduling. */
719 int arm_ld_sched = 0;
721 /* Nonzero if this chip is a StrongARM. */
722 int arm_tune_strongarm = 0;
724 /* Nonzero if this chip is a Cirrus variant. */
725 int arm_arch_cirrus = 0;
727 /* Nonzero if this chip supports Intel Wireless MMX technology. */
728 int arm_arch_iwmmxt = 0;
730 /* Nonzero if this chip is an XScale. */
731 int arm_arch_xscale = 0;
733 /* Nonzero if tuning for XScale */
734 int arm_tune_xscale = 0;
736 /* Nonzero if we want to tune for stores that access the write-buffer.
737 This typically means an ARM6 or ARM7 with MMU or MPU. */
738 int arm_tune_wbuf = 0;
740 /* Nonzero if tuning for Cortex-A9. */
741 int arm_tune_cortex_a9 = 0;
743 /* Nonzero if generating Thumb instructions. */
746 /* Nonzero if generating Thumb-1 instructions. */
749 /* Nonzero if we should define __THUMB_INTERWORK__ in the
751 XXX This is a bit of a hack, it's intended to help work around
752 problems in GLD which doesn't understand that armv5t code is
753 interworking clean. */
754 int arm_cpp_interwork = 0;
756 /* Nonzero if chip supports Thumb 2. */
759 /* Nonzero if chip supports integer division instruction. */
762 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
763 we must report the mode of the memory reference from
764 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
765 enum machine_mode output_memory_reference_mode;
767 /* The register number to be used for the PIC offset register. */
768 unsigned arm_pic_register = INVALID_REGNUM;
770 /* Set to 1 after arm_reorg has started. Reset to start at the start of
771 the next function. */
772 static int after_arm_reorg = 0;
774 enum arm_pcs arm_pcs_default;
776 /* For an explanation of these variables, see final_prescan_insn below. */
778 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
779 enum arm_cond_code arm_current_cc;
782 int arm_target_label;
783 /* The number of conditionally executed insns, including the current insn. */
784 int arm_condexec_count = 0;
785 /* A bitmask specifying the patterns for the IT block.
786 Zero means do not output an IT block before this insn. */
787 int arm_condexec_mask = 0;
788 /* The number of bits used in arm_condexec_mask. */
789 int arm_condexec_masklen = 0;
791 /* The condition codes of the ARM, and the inverse function. */
792 static const char * const arm_condition_codes[] =
794 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
795 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
798 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
799 int arm_regs_in_sequence[] =
801 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
804 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
805 #define streq(string1, string2) (strcmp (string1, string2) == 0)
807 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
808 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
809 | (1 << PIC_OFFSET_TABLE_REGNUM)))
811 /* Initialization code. */
815 const char *const name;
816 enum processor_type core;
818 const unsigned long flags;
819 const struct tune_params *const tune;
822 const struct tune_params arm_slowmul_tune =
824 arm_slowmul_rtx_costs,
829 const struct tune_params arm_fastmul_tune =
831 arm_fastmul_rtx_costs,
836 const struct tune_params arm_xscale_tune =
838 arm_xscale_rtx_costs,
839 xscale_sched_adjust_cost,
843 const struct tune_params arm_9e_tune =
850 const struct tune_params arm_cortex_a9_tune =
853 cortex_a9_sched_adjust_cost,
858 /* Not all of these give usefully different compilation alternatives,
859 but there is no simple way of generalizing them. */
860 static const struct processors all_cores[] =
863 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
864 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
865 #include "arm-cores.def"
867 {NULL, arm_none, NULL, 0, NULL}
870 static const struct processors all_architectures[] =
872 /* ARM Architectures */
873 /* We don't specify tuning costs here as it will be figured out
876 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
877 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
878 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
879 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
880 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
881 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
882 implementations that support it, so we will leave it out for now. */
883 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
884 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
885 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
886 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
887 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
888 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
889 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
890 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
891 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
892 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
893 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
894 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
895 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
896 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
897 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
898 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
899 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
900 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
901 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
902 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
903 {NULL, arm_none, NULL, 0 , NULL}
907 /* These are populated as commandline arguments are processed, or NULL
909 static const struct processors *arm_selected_arch;
910 static const struct processors *arm_selected_cpu;
911 static const struct processors *arm_selected_tune;
913 /* The name of the preprocessor macro to define for this architecture. */
915 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
917 /* Available values for -mfpu=. */
919 static const struct arm_fpu_desc all_fpus[] =
921 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
922 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
923 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
924 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
925 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
926 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
927 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
928 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
929 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
930 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
931 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
932 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
933 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
934 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
935 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
936 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
937 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
938 /* Compatibility aliases. */
939 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
946 enum float_abi_type abi_type;
950 /* Available values for -mfloat-abi=. */
952 static const struct float_abi all_float_abis[] =
954 {"soft", ARM_FLOAT_ABI_SOFT},
955 {"softfp", ARM_FLOAT_ABI_SOFTFP},
956 {"hard", ARM_FLOAT_ABI_HARD}
963 enum arm_fp16_format_type fp16_format_type;
967 /* Available values for -mfp16-format=. */
969 static const struct fp16_format all_fp16_formats[] =
971 {"none", ARM_FP16_FORMAT_NONE},
972 {"ieee", ARM_FP16_FORMAT_IEEE},
973 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
980 enum arm_abi_type abi_type;
984 /* Available values for -mabi=. */
986 static const struct abi_name arm_all_abis[] =
988 {"apcs-gnu", ARM_ABI_APCS},
989 {"atpcs", ARM_ABI_ATPCS},
990 {"aapcs", ARM_ABI_AAPCS},
991 {"iwmmxt", ARM_ABI_IWMMXT},
992 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
995 /* Supported TLS relocations. */
1005 /* The maximum number of insns to be used when loading a constant. */
1007 arm_constant_limit (bool size_p)
1009 return size_p ? 1 : current_tune->constant_limit;
1012 /* Emit an insn that's a simple single-set. Both the operands must be known
1015 emit_set_insn (rtx x, rtx y)
1017 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1020 /* Return the number of bits set in VALUE. */
1022 bit_count (unsigned long value)
1024 unsigned long count = 0;
1029 value &= value - 1; /* Clear the least-significant set bit. */
1035 /* Set up library functions unique to ARM. */
1038 arm_init_libfuncs (void)
1040 /* There are no special library functions unless we are using the
1045 /* The functions below are described in Section 4 of the "Run-Time
1046 ABI for the ARM architecture", Version 1.0. */
1048 /* Double-precision floating-point arithmetic. Table 2. */
1049 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1050 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1051 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1052 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1053 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1055 /* Double-precision comparisons. Table 3. */
1056 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1057 set_optab_libfunc (ne_optab, DFmode, NULL);
1058 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1059 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1060 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1061 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1062 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1064 /* Single-precision floating-point arithmetic. Table 4. */
1065 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1066 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1067 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1068 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1069 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1071 /* Single-precision comparisons. Table 5. */
1072 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1073 set_optab_libfunc (ne_optab, SFmode, NULL);
1074 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1075 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1076 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1077 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1078 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1080 /* Floating-point to integer conversions. Table 6. */
1081 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1082 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1083 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1084 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1085 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1086 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1087 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1088 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1090 /* Conversions between floating types. Table 7. */
1091 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1092 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1094 /* Integer to floating-point conversions. Table 8. */
1095 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1096 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1097 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1098 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1099 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1100 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1101 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1102 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1104 /* Long long. Table 9. */
1105 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1106 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1107 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1108 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1109 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1110 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1111 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1112 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1114 /* Integer (32/32->32) division. \S 4.3.1. */
1115 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1116 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1118 /* The divmod functions are designed so that they can be used for
1119 plain division, even though they return both the quotient and the
1120 remainder. The quotient is returned in the usual location (i.e.,
1121 r0 for SImode, {r0, r1} for DImode), just as would be expected
1122 for an ordinary division routine. Because the AAPCS calling
1123 conventions specify that all of { r0, r1, r2, r3 } are
1124 callee-saved registers, there is no need to tell the compiler
1125 explicitly that those registers are clobbered by these
1127 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1128 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1130 /* For SImode division the ABI provides div-without-mod routines,
1131 which are faster. */
1132 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1133 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1135 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1136 divmod libcalls instead. */
1137 set_optab_libfunc (smod_optab, DImode, NULL);
1138 set_optab_libfunc (umod_optab, DImode, NULL);
1139 set_optab_libfunc (smod_optab, SImode, NULL);
1140 set_optab_libfunc (umod_optab, SImode, NULL);
1142 /* Half-precision float operations. The compiler handles all operations
1143 with NULL libfuncs by converting the SFmode. */
1144 switch (arm_fp16_format)
1146 case ARM_FP16_FORMAT_IEEE:
1147 case ARM_FP16_FORMAT_ALTERNATIVE:
1150 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1151 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1153 : "__gnu_f2h_alternative"));
1154 set_conv_libfunc (sext_optab, SFmode, HFmode,
1155 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1157 : "__gnu_h2f_alternative"));
1160 set_optab_libfunc (add_optab, HFmode, NULL);
1161 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1162 set_optab_libfunc (smul_optab, HFmode, NULL);
1163 set_optab_libfunc (neg_optab, HFmode, NULL);
1164 set_optab_libfunc (sub_optab, HFmode, NULL);
1167 set_optab_libfunc (eq_optab, HFmode, NULL);
1168 set_optab_libfunc (ne_optab, HFmode, NULL);
1169 set_optab_libfunc (lt_optab, HFmode, NULL);
1170 set_optab_libfunc (le_optab, HFmode, NULL);
1171 set_optab_libfunc (ge_optab, HFmode, NULL);
1172 set_optab_libfunc (gt_optab, HFmode, NULL);
1173 set_optab_libfunc (unord_optab, HFmode, NULL);
1180 if (TARGET_AAPCS_BASED)
1181 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1184 /* On AAPCS systems, this is the "struct __va_list". */
1185 static GTY(()) tree va_list_type;
1187 /* Return the type to use as __builtin_va_list. */
1189 arm_build_builtin_va_list (void)
1194 if (!TARGET_AAPCS_BASED)
1195 return std_build_builtin_va_list ();
1197 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1205 The C Library ABI further reinforces this definition in \S
1208 We must follow this definition exactly. The structure tag
1209 name is visible in C++ mangled names, and thus forms a part
1210 of the ABI. The field name may be used by people who
1211 #include <stdarg.h>. */
1212 /* Create the type. */
1213 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1214 /* Give it the required name. */
1215 va_list_name = build_decl (BUILTINS_LOCATION,
1217 get_identifier ("__va_list"),
1219 DECL_ARTIFICIAL (va_list_name) = 1;
1220 TYPE_NAME (va_list_type) = va_list_name;
1221 /* Create the __ap field. */
1222 ap_field = build_decl (BUILTINS_LOCATION,
1224 get_identifier ("__ap"),
1226 DECL_ARTIFICIAL (ap_field) = 1;
1227 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1228 TYPE_FIELDS (va_list_type) = ap_field;
1229 /* Compute its layout. */
1230 layout_type (va_list_type);
1232 return va_list_type;
1235 /* Return an expression of type "void *" pointing to the next
1236 available argument in a variable-argument list. VALIST is the
1237 user-level va_list object, of type __builtin_va_list. */
1239 arm_extract_valist_ptr (tree valist)
1241 if (TREE_TYPE (valist) == error_mark_node)
1242 return error_mark_node;
1244 /* On an AAPCS target, the pointer is stored within "struct
1246 if (TARGET_AAPCS_BASED)
1248 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1249 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1250 valist, ap_field, NULL_TREE);
1256 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1258 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1260 valist = arm_extract_valist_ptr (valist);
1261 std_expand_builtin_va_start (valist, nextarg);
1264 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1266 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1269 valist = arm_extract_valist_ptr (valist);
1270 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1273 /* Lookup NAME in SEL. */
1275 static const struct processors *
1276 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1278 if (!(name && *name))
1281 for (; sel->name != NULL; sel++)
1283 if (streq (name, sel->name))
1287 error ("bad value (%s) for %s switch", name, desc);
1291 /* Implement TARGET_HANDLE_OPTION. */
1294 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1299 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1303 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1306 case OPT_mhard_float:
1307 target_float_abi_name = "hard";
1310 case OPT_msoft_float:
1311 target_float_abi_name = "soft";
1315 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1324 arm_target_help (void)
1327 static int columns = 0;
1330 /* If we have not done so already, obtain the desired maximum width of
1331 the output. Note - this is a duplication of the code at the start of
1332 gcc/opts.c:print_specific_help() - the two copies should probably be
1333 replaced by a single function. */
1338 GET_ENVIRONMENT (p, "COLUMNS");
1341 int value = atoi (p);
1348 /* Use a reasonable default. */
1352 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1354 /* The - 2 is because we know that the last entry in the array is NULL. */
1355 i = ARRAY_SIZE (all_cores) - 2;
1357 printf (" %s", all_cores[i].name);
1358 remaining = columns - (strlen (all_cores[i].name) + 4);
1359 gcc_assert (remaining >= 0);
1363 int len = strlen (all_cores[i].name);
1365 if (remaining > len + 2)
1367 printf (", %s", all_cores[i].name);
1368 remaining -= len + 2;
1374 printf ("\n %s", all_cores[i].name);
1375 remaining = columns - (len + 4);
1379 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1381 i = ARRAY_SIZE (all_architectures) - 2;
1384 printf (" %s", all_architectures[i].name);
1385 remaining = columns - (strlen (all_architectures[i].name) + 4);
1386 gcc_assert (remaining >= 0);
1390 int len = strlen (all_architectures[i].name);
1392 if (remaining > len + 2)
1394 printf (", %s", all_architectures[i].name);
1395 remaining -= len + 2;
1401 printf ("\n %s", all_architectures[i].name);
1402 remaining = columns - (len + 4);
1409 /* Fix up any incompatible options that the user has specified. */
1411 arm_option_override (void)
1415 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1416 SUBTARGET_OVERRIDE_OPTIONS;
1419 if (arm_selected_arch)
1421 if (arm_selected_cpu)
1423 /* Check for conflict between mcpu and march. */
1424 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1426 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1427 arm_selected_cpu->name, arm_selected_arch->name);
1428 /* -march wins for code generation.
1429 -mcpu wins for default tuning. */
1430 if (!arm_selected_tune)
1431 arm_selected_tune = arm_selected_cpu;
1433 arm_selected_cpu = arm_selected_arch;
1437 arm_selected_arch = NULL;
1440 /* Pick a CPU based on the architecture. */
1441 arm_selected_cpu = arm_selected_arch;
1444 /* If the user did not specify a processor, choose one for them. */
1445 if (!arm_selected_cpu)
1447 const struct processors * sel;
1448 unsigned int sought;
1450 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1451 if (!arm_selected_cpu->name)
1453 #ifdef SUBTARGET_CPU_DEFAULT
1454 /* Use the subtarget default CPU if none was specified by
1456 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1458 /* Default to ARM6. */
1459 if (!arm_selected_cpu->name)
1460 arm_selected_cpu = &all_cores[arm6];
1463 sel = arm_selected_cpu;
1464 insn_flags = sel->flags;
1466 /* Now check to see if the user has specified some command line
1467 switch that require certain abilities from the cpu. */
1470 if (TARGET_INTERWORK || TARGET_THUMB)
1472 sought |= (FL_THUMB | FL_MODE32);
1474 /* There are no ARM processors that support both APCS-26 and
1475 interworking. Therefore we force FL_MODE26 to be removed
1476 from insn_flags here (if it was set), so that the search
1477 below will always be able to find a compatible processor. */
1478 insn_flags &= ~FL_MODE26;
1481 if (sought != 0 && ((sought & insn_flags) != sought))
1483 /* Try to locate a CPU type that supports all of the abilities
1484 of the default CPU, plus the extra abilities requested by
1486 for (sel = all_cores; sel->name != NULL; sel++)
1487 if ((sel->flags & sought) == (sought | insn_flags))
1490 if (sel->name == NULL)
1492 unsigned current_bit_count = 0;
1493 const struct processors * best_fit = NULL;
1495 /* Ideally we would like to issue an error message here
1496 saying that it was not possible to find a CPU compatible
1497 with the default CPU, but which also supports the command
1498 line options specified by the programmer, and so they
1499 ought to use the -mcpu=<name> command line option to
1500 override the default CPU type.
1502 If we cannot find a cpu that has both the
1503 characteristics of the default cpu and the given
1504 command line options we scan the array again looking
1505 for a best match. */
1506 for (sel = all_cores; sel->name != NULL; sel++)
1507 if ((sel->flags & sought) == sought)
1511 count = bit_count (sel->flags & insn_flags);
1513 if (count >= current_bit_count)
1516 current_bit_count = count;
1520 gcc_assert (best_fit);
1524 arm_selected_cpu = sel;
1528 gcc_assert (arm_selected_cpu);
1529 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1530 if (!arm_selected_tune)
1531 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1533 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1534 insn_flags = arm_selected_cpu->flags;
1536 arm_tune = arm_selected_tune->core;
1537 tune_flags = arm_selected_tune->flags;
1538 current_tune = arm_selected_tune->tune;
1540 if (target_fp16_format_name)
1542 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1544 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1546 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1550 if (i == ARRAY_SIZE (all_fp16_formats))
1551 error ("invalid __fp16 format option: -mfp16-format=%s",
1552 target_fp16_format_name);
1555 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1557 if (target_abi_name)
1559 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1561 if (streq (arm_all_abis[i].name, target_abi_name))
1563 arm_abi = arm_all_abis[i].abi_type;
1567 if (i == ARRAY_SIZE (arm_all_abis))
1568 error ("invalid ABI option: -mabi=%s", target_abi_name);
1571 arm_abi = ARM_DEFAULT_ABI;
1573 /* Make sure that the processor choice does not conflict with any of the
1574 other command line choices. */
1575 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1576 error ("target CPU does not support ARM mode");
1578 /* BPABI targets use linker tricks to allow interworking on cores
1579 without thumb support. */
1580 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1582 warning (0, "target CPU does not support interworking" );
1583 target_flags &= ~MASK_INTERWORK;
1586 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1588 warning (0, "target CPU does not support THUMB instructions");
1589 target_flags &= ~MASK_THUMB;
1592 if (TARGET_APCS_FRAME && TARGET_THUMB)
1594 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1595 target_flags &= ~MASK_APCS_FRAME;
1598 /* Callee super interworking implies thumb interworking. Adding
1599 this to the flags here simplifies the logic elsewhere. */
1600 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1601 target_flags |= MASK_INTERWORK;
1603 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1604 from here where no function is being compiled currently. */
1605 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1606 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1608 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1609 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1611 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1613 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1614 target_flags |= MASK_APCS_FRAME;
1617 if (TARGET_POKE_FUNCTION_NAME)
1618 target_flags |= MASK_APCS_FRAME;
1620 if (TARGET_APCS_REENT && flag_pic)
1621 error ("-fpic and -mapcs-reent are incompatible");
1623 if (TARGET_APCS_REENT)
1624 warning (0, "APCS reentrant code not supported. Ignored");
1626 /* If this target is normally configured to use APCS frames, warn if they
1627 are turned off and debugging is turned on. */
1629 && write_symbols != NO_DEBUG
1630 && !TARGET_APCS_FRAME
1631 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1632 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1634 if (TARGET_APCS_FLOAT)
1635 warning (0, "passing floating point arguments in fp regs not yet supported");
1637 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1638 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1639 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1640 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1641 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1642 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1643 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1644 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1645 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1646 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1647 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1648 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1649 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1650 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1652 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1653 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1654 thumb_code = TARGET_ARM == 0;
1655 thumb1_code = TARGET_THUMB1 != 0;
1656 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1657 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1658 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1659 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1660 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1662 /* If we are not using the default (ARM mode) section anchor offset
1663 ranges, then set the correct ranges now. */
1666 /* Thumb-1 LDR instructions cannot have negative offsets.
1667 Permissible positive offset ranges are 5-bit (for byte loads),
1668 6-bit (for halfword loads), or 7-bit (for word loads).
1669 Empirical results suggest a 7-bit anchor range gives the best
1670 overall code size. */
1671 targetm.min_anchor_offset = 0;
1672 targetm.max_anchor_offset = 127;
1674 else if (TARGET_THUMB2)
1676 /* The minimum is set such that the total size of the block
1677 for a particular anchor is 248 + 1 + 4095 bytes, which is
1678 divisible by eight, ensuring natural spacing of anchors. */
1679 targetm.min_anchor_offset = -248;
1680 targetm.max_anchor_offset = 4095;
1683 /* V5 code we generate is completely interworking capable, so we turn off
1684 TARGET_INTERWORK here to avoid many tests later on. */
1686 /* XXX However, we must pass the right pre-processor defines to CPP
1687 or GLD can get confused. This is a hack. */
1688 if (TARGET_INTERWORK)
1689 arm_cpp_interwork = 1;
1692 target_flags &= ~MASK_INTERWORK;
1694 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1695 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1697 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1698 error ("iwmmxt abi requires an iwmmxt capable cpu");
1700 if (target_fpu_name == NULL && target_fpe_name != NULL)
1702 if (streq (target_fpe_name, "2"))
1703 target_fpu_name = "fpe2";
1704 else if (streq (target_fpe_name, "3"))
1705 target_fpu_name = "fpe3";
1707 error ("invalid floating point emulation option: -mfpe=%s",
1711 if (target_fpu_name == NULL)
1713 #ifdef FPUTYPE_DEFAULT
1714 target_fpu_name = FPUTYPE_DEFAULT;
1716 if (arm_arch_cirrus)
1717 target_fpu_name = "maverick";
1719 target_fpu_name = "fpe2";
1723 arm_fpu_desc = NULL;
1724 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1726 if (streq (all_fpus[i].name, target_fpu_name))
1728 arm_fpu_desc = &all_fpus[i];
1735 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1739 switch (arm_fpu_desc->model)
1741 case ARM_FP_MODEL_FPA:
1742 if (arm_fpu_desc->rev == 2)
1743 arm_fpu_attr = FPU_FPE2;
1744 else if (arm_fpu_desc->rev == 3)
1745 arm_fpu_attr = FPU_FPE3;
1747 arm_fpu_attr = FPU_FPA;
1750 case ARM_FP_MODEL_MAVERICK:
1751 arm_fpu_attr = FPU_MAVERICK;
1754 case ARM_FP_MODEL_VFP:
1755 arm_fpu_attr = FPU_VFP;
1762 if (target_float_abi_name != NULL)
1764 /* The user specified a FP ABI. */
1765 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1767 if (streq (all_float_abis[i].name, target_float_abi_name))
1769 arm_float_abi = all_float_abis[i].abi_type;
1773 if (i == ARRAY_SIZE (all_float_abis))
1774 error ("invalid floating point abi: -mfloat-abi=%s",
1775 target_float_abi_name);
1778 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1780 if (TARGET_AAPCS_BASED
1781 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1782 error ("FPA is unsupported in the AAPCS");
1784 if (TARGET_AAPCS_BASED)
1786 if (TARGET_CALLER_INTERWORKING)
1787 error ("AAPCS does not support -mcaller-super-interworking");
1789 if (TARGET_CALLEE_INTERWORKING)
1790 error ("AAPCS does not support -mcallee-super-interworking");
1793 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1794 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1795 will ever exist. GCC makes no attempt to support this combination. */
1796 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1797 sorry ("iWMMXt and hardware floating point");
1799 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1800 if (TARGET_THUMB2 && TARGET_IWMMXT)
1801 sorry ("Thumb-2 iWMMXt");
1803 /* __fp16 support currently assumes the core has ldrh. */
1804 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1805 sorry ("__fp16 and no ldrh");
1807 /* If soft-float is specified then don't use FPU. */
1808 if (TARGET_SOFT_FLOAT)
1809 arm_fpu_attr = FPU_NONE;
1811 if (TARGET_AAPCS_BASED)
1813 if (arm_abi == ARM_ABI_IWMMXT)
1814 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1815 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1816 && TARGET_HARD_FLOAT
1818 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1820 arm_pcs_default = ARM_PCS_AAPCS;
1824 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1825 sorry ("-mfloat-abi=hard and VFP");
1827 if (arm_abi == ARM_ABI_APCS)
1828 arm_pcs_default = ARM_PCS_APCS;
1830 arm_pcs_default = ARM_PCS_ATPCS;
1833 /* For arm2/3 there is no need to do any scheduling if there is only
1834 a floating point emulator, or we are doing software floating-point. */
1835 if ((TARGET_SOFT_FLOAT
1836 || (TARGET_FPA && arm_fpu_desc->rev))
1837 && (tune_flags & FL_MODE32) == 0)
1838 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1840 if (target_thread_switch)
1842 if (strcmp (target_thread_switch, "soft") == 0)
1843 target_thread_pointer = TP_SOFT;
1844 else if (strcmp (target_thread_switch, "auto") == 0)
1845 target_thread_pointer = TP_AUTO;
1846 else if (strcmp (target_thread_switch, "cp15") == 0)
1847 target_thread_pointer = TP_CP15;
1849 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1852 /* Use the cp15 method if it is available. */
1853 if (target_thread_pointer == TP_AUTO)
1855 if (arm_arch6k && !TARGET_THUMB1)
1856 target_thread_pointer = TP_CP15;
1858 target_thread_pointer = TP_SOFT;
1861 if (TARGET_HARD_TP && TARGET_THUMB1)
1862 error ("can not use -mtp=cp15 with 16-bit Thumb");
1864 /* Override the default structure alignment for AAPCS ABI. */
1865 if (TARGET_AAPCS_BASED)
1866 arm_structure_size_boundary = 8;
1868 if (structure_size_string != NULL)
1870 int size = strtol (structure_size_string, NULL, 0);
1872 if (size == 8 || size == 32
1873 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1874 arm_structure_size_boundary = size;
1876 warning (0, "structure size boundary can only be set to %s",
1877 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1880 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1882 error ("RTP PIC is incompatible with Thumb");
1886 /* If stack checking is disabled, we can use r10 as the PIC register,
1887 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1888 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1890 if (TARGET_VXWORKS_RTP)
1891 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1892 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1895 if (flag_pic && TARGET_VXWORKS_RTP)
1896 arm_pic_register = 9;
1898 if (arm_pic_register_string != NULL)
1900 int pic_register = decode_reg_name (arm_pic_register_string);
1903 warning (0, "-mpic-register= is useless without -fpic");
1905 /* Prevent the user from choosing an obviously stupid PIC register. */
1906 else if (pic_register < 0 || call_used_regs[pic_register]
1907 || pic_register == HARD_FRAME_POINTER_REGNUM
1908 || pic_register == STACK_POINTER_REGNUM
1909 || pic_register >= PC_REGNUM
1910 || (TARGET_VXWORKS_RTP
1911 && (unsigned int) pic_register != arm_pic_register))
1912 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1914 arm_pic_register = pic_register;
1917 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1918 if (fix_cm3_ldrd == 2)
1920 if (arm_selected_cpu->core == cortexm3)
1926 if (TARGET_THUMB1 && flag_schedule_insns)
1928 /* Don't warn since it's on by default in -O2. */
1929 flag_schedule_insns = 0;
1934 /* If optimizing for size, bump the number of instructions that we
1935 are prepared to conditionally execute (even on a StrongARM). */
1936 max_insns_skipped = 6;
1940 /* StrongARM has early execution of branches, so a sequence
1941 that is worth skipping is shorter. */
1942 if (arm_tune_strongarm)
1943 max_insns_skipped = 3;
1946 /* Hot/Cold partitioning is not currently supported, since we can't
1947 handle literal pool placement in that case. */
1948 if (flag_reorder_blocks_and_partition)
1950 inform (input_location,
1951 "-freorder-blocks-and-partition not supported on this architecture");
1952 flag_reorder_blocks_and_partition = 0;
1953 flag_reorder_blocks = 1;
1957 /* Hoisting PIC address calculations more aggressively provides a small,
1958 but measurable, size reduction for PIC code. Therefore, we decrease
1959 the bar for unrestricted expression hoisting to the cost of PIC address
1960 calculation, which is 2 instructions. */
1961 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1962 global_options.x_param_values,
1963 global_options_set.x_param_values);
1965 /* Register global variables with the garbage collector. */
1966 arm_add_gc_roots ();
1970 arm_add_gc_roots (void)
1972 gcc_obstack_init(&minipool_obstack);
1973 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1976 /* A table of known ARM exception types.
1977 For use with the interrupt function attribute. */
1981 const char *const arg;
1982 const unsigned long return_value;
1986 static const isr_attribute_arg isr_attribute_args [] =
1988 { "IRQ", ARM_FT_ISR },
1989 { "irq", ARM_FT_ISR },
1990 { "FIQ", ARM_FT_FIQ },
1991 { "fiq", ARM_FT_FIQ },
1992 { "ABORT", ARM_FT_ISR },
1993 { "abort", ARM_FT_ISR },
1994 { "ABORT", ARM_FT_ISR },
1995 { "abort", ARM_FT_ISR },
1996 { "UNDEF", ARM_FT_EXCEPTION },
1997 { "undef", ARM_FT_EXCEPTION },
1998 { "SWI", ARM_FT_EXCEPTION },
1999 { "swi", ARM_FT_EXCEPTION },
2000 { NULL, ARM_FT_NORMAL }
2003 /* Returns the (interrupt) function type of the current
2004 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2006 static unsigned long
2007 arm_isr_value (tree argument)
2009 const isr_attribute_arg * ptr;
2013 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2015 /* No argument - default to IRQ. */
2016 if (argument == NULL_TREE)
2019 /* Get the value of the argument. */
2020 if (TREE_VALUE (argument) == NULL_TREE
2021 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2022 return ARM_FT_UNKNOWN;
2024 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2026 /* Check it against the list of known arguments. */
2027 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2028 if (streq (arg, ptr->arg))
2029 return ptr->return_value;
2031 /* An unrecognized interrupt type. */
2032 return ARM_FT_UNKNOWN;
2035 /* Computes the type of the current function. */
2037 static unsigned long
2038 arm_compute_func_type (void)
2040 unsigned long type = ARM_FT_UNKNOWN;
2044 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2046 /* Decide if the current function is volatile. Such functions
2047 never return, and many memory cycles can be saved by not storing
2048 register values that will never be needed again. This optimization
2049 was added to speed up context switching in a kernel application. */
2051 && (TREE_NOTHROW (current_function_decl)
2052 || !(flag_unwind_tables
2053 || (flag_exceptions && arm_except_unwind_info () != UI_SJLJ)))
2054 && TREE_THIS_VOLATILE (current_function_decl))
2055 type |= ARM_FT_VOLATILE;
2057 if (cfun->static_chain_decl != NULL)
2058 type |= ARM_FT_NESTED;
2060 attr = DECL_ATTRIBUTES (current_function_decl);
2062 a = lookup_attribute ("naked", attr);
2064 type |= ARM_FT_NAKED;
2066 a = lookup_attribute ("isr", attr);
2068 a = lookup_attribute ("interrupt", attr);
2071 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2073 type |= arm_isr_value (TREE_VALUE (a));
2078 /* Returns the type of the current function. */
2081 arm_current_func_type (void)
2083 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2084 cfun->machine->func_type = arm_compute_func_type ();
2086 return cfun->machine->func_type;
2090 arm_allocate_stack_slots_for_args (void)
2092 /* Naked functions should not allocate stack slots for arguments. */
2093 return !IS_NAKED (arm_current_func_type ());
2097 /* Output assembler code for a block containing the constant parts
2098 of a trampoline, leaving space for the variable parts.
2100 On the ARM, (if r8 is the static chain regnum, and remembering that
2101 referencing pc adds an offset of 8) the trampoline looks like:
2104 .word static chain value
2105 .word function's address
2106 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2109 arm_asm_trampoline_template (FILE *f)
2113 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2114 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2116 else if (TARGET_THUMB2)
2118 /* The Thumb-2 trampoline is similar to the arm implementation.
2119 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2120 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2121 STATIC_CHAIN_REGNUM, PC_REGNUM);
2122 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2126 ASM_OUTPUT_ALIGN (f, 2);
2127 fprintf (f, "\t.code\t16\n");
2128 fprintf (f, ".Ltrampoline_start:\n");
2129 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2130 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2131 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2132 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2133 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2134 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2136 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2137 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2140 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2143 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2145 rtx fnaddr, mem, a_tramp;
2147 emit_block_move (m_tramp, assemble_trampoline_template (),
2148 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2150 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2151 emit_move_insn (mem, chain_value);
2153 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2154 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2155 emit_move_insn (mem, fnaddr);
2157 a_tramp = XEXP (m_tramp, 0);
2158 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2159 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2160 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2163 /* Thumb trampolines should be entered in thumb mode, so set
2164 the bottom bit of the address. */
2167 arm_trampoline_adjust_address (rtx addr)
2170 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2171 NULL, 0, OPTAB_LIB_WIDEN);
2175 /* Return 1 if it is possible to return using a single instruction.
2176 If SIBLING is non-null, this is a test for a return before a sibling
2177 call. SIBLING is the call insn, so we can examine its register usage. */
2180 use_return_insn (int iscond, rtx sibling)
2183 unsigned int func_type;
2184 unsigned long saved_int_regs;
2185 unsigned HOST_WIDE_INT stack_adjust;
2186 arm_stack_offsets *offsets;
2188 /* Never use a return instruction before reload has run. */
2189 if (!reload_completed)
2192 func_type = arm_current_func_type ();
2194 /* Naked, volatile and stack alignment functions need special
2196 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2199 /* So do interrupt functions that use the frame pointer and Thumb
2200 interrupt functions. */
2201 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2204 offsets = arm_get_frame_offsets ();
2205 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2207 /* As do variadic functions. */
2208 if (crtl->args.pretend_args_size
2209 || cfun->machine->uses_anonymous_args
2210 /* Or if the function calls __builtin_eh_return () */
2211 || crtl->calls_eh_return
2212 /* Or if the function calls alloca */
2213 || cfun->calls_alloca
2214 /* Or if there is a stack adjustment. However, if the stack pointer
2215 is saved on the stack, we can use a pre-incrementing stack load. */
2216 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2217 && stack_adjust == 4)))
2220 saved_int_regs = offsets->saved_regs_mask;
2222 /* Unfortunately, the insn
2224 ldmib sp, {..., sp, ...}
2226 triggers a bug on most SA-110 based devices, such that the stack
2227 pointer won't be correctly restored if the instruction takes a
2228 page fault. We work around this problem by popping r3 along with
2229 the other registers, since that is never slower than executing
2230 another instruction.
2232 We test for !arm_arch5 here, because code for any architecture
2233 less than this could potentially be run on one of the buggy
2235 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2237 /* Validate that r3 is a call-clobbered register (always true in
2238 the default abi) ... */
2239 if (!call_used_regs[3])
2242 /* ... that it isn't being used for a return value ... */
2243 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2246 /* ... or for a tail-call argument ... */
2249 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2251 if (find_regno_fusage (sibling, USE, 3))
2255 /* ... and that there are no call-saved registers in r0-r2
2256 (always true in the default ABI). */
2257 if (saved_int_regs & 0x7)
2261 /* Can't be done if interworking with Thumb, and any registers have been
2263 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2266 /* On StrongARM, conditional returns are expensive if they aren't
2267 taken and multiple registers have been stacked. */
2268 if (iscond && arm_tune_strongarm)
2270 /* Conditional return when just the LR is stored is a simple
2271 conditional-load instruction, that's not expensive. */
2272 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2276 && arm_pic_register != INVALID_REGNUM
2277 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2281 /* If there are saved registers but the LR isn't saved, then we need
2282 two instructions for the return. */
2283 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2286 /* Can't be done if any of the FPA regs are pushed,
2287 since this also requires an insn. */
2288 if (TARGET_HARD_FLOAT && TARGET_FPA)
2289 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2290 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2293 /* Likewise VFP regs. */
2294 if (TARGET_HARD_FLOAT && TARGET_VFP)
2295 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2296 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2299 if (TARGET_REALLY_IWMMXT)
2300 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2301 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2307 /* Return TRUE if int I is a valid immediate ARM constant. */
2310 const_ok_for_arm (HOST_WIDE_INT i)
2314 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2315 be all zero, or all one. */
2316 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2317 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2318 != ((~(unsigned HOST_WIDE_INT) 0)
2319 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2322 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2324 /* Fast return for 0 and small values. We must do this for zero, since
2325 the code below can't handle that one case. */
2326 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2329 /* Get the number of trailing zeros. */
2330 lowbit = ffs((int) i) - 1;
2332 /* Only even shifts are allowed in ARM mode so round down to the
2333 nearest even number. */
2337 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2342 /* Allow rotated constants in ARM mode. */
2344 && ((i & ~0xc000003f) == 0
2345 || (i & ~0xf000000f) == 0
2346 || (i & ~0xfc000003) == 0))
2353 /* Allow repeated pattern. */
2356 if (i == v || i == (v | (v << 8)))
2363 /* Return true if I is a valid constant for the operation CODE. */
2365 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2367 if (const_ok_for_arm (i))
2391 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2393 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2399 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2403 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2410 /* Emit a sequence of insns to handle a large constant.
2411 CODE is the code of the operation required, it can be any of SET, PLUS,
2412 IOR, AND, XOR, MINUS;
2413 MODE is the mode in which the operation is being performed;
2414 VAL is the integer to operate on;
2415 SOURCE is the other operand (a register, or a null-pointer for SET);
2416 SUBTARGETS means it is safe to create scratch registers if that will
2417 either produce a simpler sequence, or we will want to cse the values.
2418 Return value is the number of insns emitted. */
2420 /* ??? Tweak this for thumb2. */
2422 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2423 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2427 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2428 cond = COND_EXEC_TEST (PATTERN (insn));
2432 if (subtargets || code == SET
2433 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2434 && REGNO (target) != REGNO (source)))
2436 /* After arm_reorg has been called, we can't fix up expensive
2437 constants by pushing them into memory so we must synthesize
2438 them in-line, regardless of the cost. This is only likely to
2439 be more costly on chips that have load delay slots and we are
2440 compiling without running the scheduler (so no splitting
2441 occurred before the final instruction emission).
2443 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2445 if (!after_arm_reorg
2447 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2449 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2454 /* Currently SET is the only monadic value for CODE, all
2455 the rest are diadic. */
2456 if (TARGET_USE_MOVT)
2457 arm_emit_movpair (target, GEN_INT (val));
2459 emit_set_insn (target, GEN_INT (val));
2465 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2467 if (TARGET_USE_MOVT)
2468 arm_emit_movpair (temp, GEN_INT (val));
2470 emit_set_insn (temp, GEN_INT (val));
2472 /* For MINUS, the value is subtracted from, since we never
2473 have subtraction of a constant. */
2475 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2477 emit_set_insn (target,
2478 gen_rtx_fmt_ee (code, mode, source, temp));
2484 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2488 /* Return the number of instructions required to synthesize the given
2489 constant, if we start emitting them from bit-position I. */
2491 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2493 HOST_WIDE_INT temp1;
2494 int step_size = TARGET_ARM ? 2 : 1;
2497 gcc_assert (TARGET_ARM || i == 0);
2505 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2510 temp1 = remainder & ((0x0ff << end)
2511 | ((i < end) ? (0xff >> (32 - end)) : 0));
2512 remainder &= ~temp1;
2517 } while (remainder);
2522 find_best_start (unsigned HOST_WIDE_INT remainder)
2524 int best_consecutive_zeros = 0;
2528 /* If we aren't targetting ARM, the best place to start is always at
2533 for (i = 0; i < 32; i += 2)
2535 int consecutive_zeros = 0;
2537 if (!(remainder & (3 << i)))
2539 while ((i < 32) && !(remainder & (3 << i)))
2541 consecutive_zeros += 2;
2544 if (consecutive_zeros > best_consecutive_zeros)
2546 best_consecutive_zeros = consecutive_zeros;
2547 best_start = i - consecutive_zeros;
2553 /* So long as it won't require any more insns to do so, it's
2554 desirable to emit a small constant (in bits 0...9) in the last
2555 insn. This way there is more chance that it can be combined with
2556 a later addressing insn to form a pre-indexed load or store
2557 operation. Consider:
2559 *((volatile int *)0xe0000100) = 1;
2560 *((volatile int *)0xe0000110) = 2;
2562 We want this to wind up as:
2566 str rB, [rA, #0x100]
2568 str rB, [rA, #0x110]
2570 rather than having to synthesize both large constants from scratch.
2572 Therefore, we calculate how many insns would be required to emit
2573 the constant starting from `best_start', and also starting from
2574 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2575 yield a shorter sequence, we may as well use zero. */
2577 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2578 && (count_insns_for_constant (remainder, 0) <=
2579 count_insns_for_constant (remainder, best_start)))
2585 /* Emit an instruction with the indicated PATTERN. If COND is
2586 non-NULL, conditionalize the execution of the instruction on COND
2590 emit_constant_insn (rtx cond, rtx pattern)
2593 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2594 emit_insn (pattern);
2597 /* As above, but extra parameter GENERATE which, if clear, suppresses
2599 /* ??? This needs more work for thumb2. */
2602 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2603 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2608 int final_invert = 0;
2609 int can_negate_initial = 0;
2611 int num_bits_set = 0;
2612 int set_sign_bit_copies = 0;
2613 int clear_sign_bit_copies = 0;
2614 int clear_zero_bit_copies = 0;
2615 int set_zero_bit_copies = 0;
2617 unsigned HOST_WIDE_INT temp1, temp2;
2618 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2619 int step_size = TARGET_ARM ? 2 : 1;
2621 /* Find out which operations are safe for a given CODE. Also do a quick
2622 check for degenerate cases; these can occur when DImode operations
2633 can_negate_initial = 1;
2637 if (remainder == 0xffffffff)
2640 emit_constant_insn (cond,
2641 gen_rtx_SET (VOIDmode, target,
2642 GEN_INT (ARM_SIGN_EXTEND (val))));
2648 if (reload_completed && rtx_equal_p (target, source))
2652 emit_constant_insn (cond,
2653 gen_rtx_SET (VOIDmode, target, source));
2665 emit_constant_insn (cond,
2666 gen_rtx_SET (VOIDmode, target, const0_rtx));
2669 if (remainder == 0xffffffff)
2671 if (reload_completed && rtx_equal_p (target, source))
2674 emit_constant_insn (cond,
2675 gen_rtx_SET (VOIDmode, target, source));
2684 if (reload_completed && rtx_equal_p (target, source))
2687 emit_constant_insn (cond,
2688 gen_rtx_SET (VOIDmode, target, source));
2692 if (remainder == 0xffffffff)
2695 emit_constant_insn (cond,
2696 gen_rtx_SET (VOIDmode, target,
2697 gen_rtx_NOT (mode, source)));
2703 /* We treat MINUS as (val - source), since (source - val) is always
2704 passed as (source + (-val)). */
2708 emit_constant_insn (cond,
2709 gen_rtx_SET (VOIDmode, target,
2710 gen_rtx_NEG (mode, source)));
2713 if (const_ok_for_arm (val))
2716 emit_constant_insn (cond,
2717 gen_rtx_SET (VOIDmode, target,
2718 gen_rtx_MINUS (mode, GEN_INT (val),
2730 /* If we can do it in one insn get out quickly. */
2731 if (const_ok_for_arm (val)
2732 || (can_negate_initial && const_ok_for_arm (-val))
2733 || (can_invert && const_ok_for_arm (~val)))
2736 emit_constant_insn (cond,
2737 gen_rtx_SET (VOIDmode, target,
2739 ? gen_rtx_fmt_ee (code, mode, source,
2745 /* Calculate a few attributes that may be useful for specific
2747 /* Count number of leading zeros. */
2748 for (i = 31; i >= 0; i--)
2750 if ((remainder & (1 << i)) == 0)
2751 clear_sign_bit_copies++;
2756 /* Count number of leading 1's. */
2757 for (i = 31; i >= 0; i--)
2759 if ((remainder & (1 << i)) != 0)
2760 set_sign_bit_copies++;
2765 /* Count number of trailing zero's. */
2766 for (i = 0; i <= 31; i++)
2768 if ((remainder & (1 << i)) == 0)
2769 clear_zero_bit_copies++;
2774 /* Count number of trailing 1's. */
2775 for (i = 0; i <= 31; i++)
2777 if ((remainder & (1 << i)) != 0)
2778 set_zero_bit_copies++;
2786 /* See if we can use movw. */
2787 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2790 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2795 /* See if we can do this by sign_extending a constant that is known
2796 to be negative. This is a good, way of doing it, since the shift
2797 may well merge into a subsequent insn. */
2798 if (set_sign_bit_copies > 1)
2800 if (const_ok_for_arm
2801 (temp1 = ARM_SIGN_EXTEND (remainder
2802 << (set_sign_bit_copies - 1))))
2806 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2807 emit_constant_insn (cond,
2808 gen_rtx_SET (VOIDmode, new_src,
2810 emit_constant_insn (cond,
2811 gen_ashrsi3 (target, new_src,
2812 GEN_INT (set_sign_bit_copies - 1)));
2816 /* For an inverted constant, we will need to set the low bits,
2817 these will be shifted out of harm's way. */
2818 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2819 if (const_ok_for_arm (~temp1))
2823 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2824 emit_constant_insn (cond,
2825 gen_rtx_SET (VOIDmode, new_src,
2827 emit_constant_insn (cond,
2828 gen_ashrsi3 (target, new_src,
2829 GEN_INT (set_sign_bit_copies - 1)));
2835 /* See if we can calculate the value as the difference between two
2836 valid immediates. */
2837 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2839 int topshift = clear_sign_bit_copies & ~1;
2841 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2842 & (0xff000000 >> topshift));
2844 /* If temp1 is zero, then that means the 9 most significant
2845 bits of remainder were 1 and we've caused it to overflow.
2846 When topshift is 0 we don't need to do anything since we
2847 can borrow from 'bit 32'. */
2848 if (temp1 == 0 && topshift != 0)
2849 temp1 = 0x80000000 >> (topshift - 1);
2851 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2853 if (const_ok_for_arm (temp2))
2857 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2858 emit_constant_insn (cond,
2859 gen_rtx_SET (VOIDmode, new_src,
2861 emit_constant_insn (cond,
2862 gen_addsi3 (target, new_src,
2870 /* See if we can generate this by setting the bottom (or the top)
2871 16 bits, and then shifting these into the other half of the
2872 word. We only look for the simplest cases, to do more would cost
2873 too much. Be careful, however, not to generate this when the
2874 alternative would take fewer insns. */
2875 if (val & 0xffff0000)
2877 temp1 = remainder & 0xffff0000;
2878 temp2 = remainder & 0x0000ffff;
2880 /* Overlaps outside this range are best done using other methods. */
2881 for (i = 9; i < 24; i++)
2883 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2884 && !const_ok_for_arm (temp2))
2886 rtx new_src = (subtargets
2887 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2889 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2890 source, subtargets, generate);
2898 gen_rtx_ASHIFT (mode, source,
2905 /* Don't duplicate cases already considered. */
2906 for (i = 17; i < 24; i++)
2908 if (((temp1 | (temp1 >> i)) == remainder)
2909 && !const_ok_for_arm (temp1))
2911 rtx new_src = (subtargets
2912 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2914 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2915 source, subtargets, generate);
2920 gen_rtx_SET (VOIDmode, target,
2923 gen_rtx_LSHIFTRT (mode, source,
2934 /* If we have IOR or XOR, and the constant can be loaded in a
2935 single instruction, and we can find a temporary to put it in,
2936 then this can be done in two instructions instead of 3-4. */
2938 /* TARGET can't be NULL if SUBTARGETS is 0 */
2939 || (reload_completed && !reg_mentioned_p (target, source)))
2941 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2945 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2947 emit_constant_insn (cond,
2948 gen_rtx_SET (VOIDmode, sub,
2950 emit_constant_insn (cond,
2951 gen_rtx_SET (VOIDmode, target,
2952 gen_rtx_fmt_ee (code, mode,
2963 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2964 and the remainder 0s for e.g. 0xfff00000)
2965 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2967 This can be done in 2 instructions by using shifts with mov or mvn.
2972 mvn r0, r0, lsr #12 */
2973 if (set_sign_bit_copies > 8
2974 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2978 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2979 rtx shift = GEN_INT (set_sign_bit_copies);
2983 gen_rtx_SET (VOIDmode, sub,
2985 gen_rtx_ASHIFT (mode,
2990 gen_rtx_SET (VOIDmode, target,
2992 gen_rtx_LSHIFTRT (mode, sub,
2999 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3001 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3003 For eg. r0 = r0 | 0xfff
3008 if (set_zero_bit_copies > 8
3009 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3013 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3014 rtx shift = GEN_INT (set_zero_bit_copies);
3018 gen_rtx_SET (VOIDmode, sub,
3020 gen_rtx_LSHIFTRT (mode,
3025 gen_rtx_SET (VOIDmode, target,
3027 gen_rtx_ASHIFT (mode, sub,
3033 /* This will never be reached for Thumb2 because orn is a valid
3034 instruction. This is for Thumb1 and the ARM 32 bit cases.
3036 x = y | constant (such that ~constant is a valid constant)
3038 x = ~(~y & ~constant).
3040 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3044 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3045 emit_constant_insn (cond,
3046 gen_rtx_SET (VOIDmode, sub,
3047 gen_rtx_NOT (mode, source)));
3050 sub = gen_reg_rtx (mode);
3051 emit_constant_insn (cond,
3052 gen_rtx_SET (VOIDmode, sub,
3053 gen_rtx_AND (mode, source,
3055 emit_constant_insn (cond,
3056 gen_rtx_SET (VOIDmode, target,
3057 gen_rtx_NOT (mode, sub)));
3064 /* See if two shifts will do 2 or more insn's worth of work. */
3065 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3067 HOST_WIDE_INT shift_mask = ((0xffffffff
3068 << (32 - clear_sign_bit_copies))
3071 if ((remainder | shift_mask) != 0xffffffff)
3075 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3076 insns = arm_gen_constant (AND, mode, cond,
3077 remainder | shift_mask,
3078 new_src, source, subtargets, 1);
3083 rtx targ = subtargets ? NULL_RTX : target;
3084 insns = arm_gen_constant (AND, mode, cond,
3085 remainder | shift_mask,
3086 targ, source, subtargets, 0);
3092 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3093 rtx shift = GEN_INT (clear_sign_bit_copies);
3095 emit_insn (gen_ashlsi3 (new_src, source, shift));
3096 emit_insn (gen_lshrsi3 (target, new_src, shift));
3102 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3104 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3106 if ((remainder | shift_mask) != 0xffffffff)
3110 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3112 insns = arm_gen_constant (AND, mode, cond,
3113 remainder | shift_mask,
3114 new_src, source, subtargets, 1);
3119 rtx targ = subtargets ? NULL_RTX : target;
3121 insns = arm_gen_constant (AND, mode, cond,
3122 remainder | shift_mask,
3123 targ, source, subtargets, 0);
3129 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3130 rtx shift = GEN_INT (clear_zero_bit_copies);
3132 emit_insn (gen_lshrsi3 (new_src, source, shift));
3133 emit_insn (gen_ashlsi3 (target, new_src, shift));
3145 for (i = 0; i < 32; i++)
3146 if (remainder & (1 << i))
3150 || (code != IOR && can_invert && num_bits_set > 16))
3151 remainder ^= 0xffffffff;
3152 else if (code == PLUS && num_bits_set > 16)
3153 remainder = (-remainder) & 0xffffffff;
3155 /* For XOR, if more than half the bits are set and there's a sequence
3156 of more than 8 consecutive ones in the pattern then we can XOR by the
3157 inverted constant and then invert the final result; this may save an
3158 instruction and might also lead to the final mvn being merged with
3159 some other operation. */
3160 else if (code == XOR && num_bits_set > 16
3161 && (count_insns_for_constant (remainder ^ 0xffffffff,
3163 (remainder ^ 0xffffffff))
3164 < count_insns_for_constant (remainder,
3165 find_best_start (remainder))))
3167 remainder ^= 0xffffffff;
3176 /* Now try and find a way of doing the job in either two or three
3178 We start by looking for the largest block of zeros that are aligned on
3179 a 2-bit boundary, we then fill up the temps, wrapping around to the
3180 top of the word when we drop off the bottom.
3181 In the worst case this code should produce no more than four insns.
3182 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3183 best place to start. */
3185 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3188 /* Now start emitting the insns. */
3189 i = find_best_start (remainder);
3196 if (remainder & (3 << (i - 2)))
3201 temp1 = remainder & ((0x0ff << end)
3202 | ((i < end) ? (0xff >> (32 - end)) : 0));
3203 remainder &= ~temp1;
3207 rtx new_src, temp1_rtx;
3209 if (code == SET || code == MINUS)
3211 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3212 if (can_invert && code != MINUS)
3217 if ((final_invert || remainder) && subtargets)
3218 new_src = gen_reg_rtx (mode);
3223 else if (can_negate)
3227 temp1 = trunc_int_for_mode (temp1, mode);
3228 temp1_rtx = GEN_INT (temp1);
3232 else if (code == MINUS)
3233 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3235 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3237 emit_constant_insn (cond,
3238 gen_rtx_SET (VOIDmode, new_src,
3248 else if (code == MINUS)
3254 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3264 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3265 gen_rtx_NOT (mode, source)));
3272 /* Canonicalize a comparison so that we are more likely to recognize it.
3273 This can be done for a few constant compares, where we can make the
3274 immediate value easier to load. */
3277 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3279 enum machine_mode mode;
3280 unsigned HOST_WIDE_INT i, maxval;
3282 mode = GET_MODE (*op0);
3283 if (mode == VOIDmode)
3284 mode = GET_MODE (*op1);
3286 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3288 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3289 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3290 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3291 for GTU/LEU in Thumb mode. */
3296 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3298 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3301 if (code == GT || code == LE
3302 || (!TARGET_ARM && (code == GTU || code == LEU)))
3304 /* Missing comparison. First try to use an available
3306 if (GET_CODE (*op1) == CONST_INT)
3314 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3316 *op1 = GEN_INT (i + 1);
3317 return code == GT ? GE : LT;
3322 if (i != ~((unsigned HOST_WIDE_INT) 0)
3323 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3325 *op1 = GEN_INT (i + 1);
3326 return code == GTU ? GEU : LTU;
3334 /* If that did not work, reverse the condition. */
3338 return swap_condition (code);
3344 /* Comparisons smaller than DImode. Only adjust comparisons against
3345 an out-of-range constant. */
3346 if (GET_CODE (*op1) != CONST_INT
3347 || const_ok_for_arm (INTVAL (*op1))
3348 || const_ok_for_arm (- INTVAL (*op1)))
3362 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3364 *op1 = GEN_INT (i + 1);
3365 return code == GT ? GE : LT;
3372 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3374 *op1 = GEN_INT (i - 1);
3375 return code == GE ? GT : LE;
3381 if (i != ~((unsigned HOST_WIDE_INT) 0)
3382 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3384 *op1 = GEN_INT (i + 1);
3385 return code == GTU ? GEU : LTU;
3392 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3394 *op1 = GEN_INT (i - 1);
3395 return code == GEU ? GTU : LEU;
3407 /* Define how to find the value returned by a function. */
3410 arm_function_value(const_tree type, const_tree func,
3411 bool outgoing ATTRIBUTE_UNUSED)
3413 enum machine_mode mode;
3414 int unsignedp ATTRIBUTE_UNUSED;
3415 rtx r ATTRIBUTE_UNUSED;
3417 mode = TYPE_MODE (type);
3419 if (TARGET_AAPCS_BASED)
3420 return aapcs_allocate_return_reg (mode, type, func);
3422 /* Promote integer types. */
3423 if (INTEGRAL_TYPE_P (type))
3424 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3426 /* Promotes small structs returned in a register to full-word size
3427 for big-endian AAPCS. */
3428 if (arm_return_in_msb (type))
3430 HOST_WIDE_INT size = int_size_in_bytes (type);
3431 if (size % UNITS_PER_WORD != 0)
3433 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3434 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3438 return LIBCALL_VALUE (mode);
3442 libcall_eq (const void *p1, const void *p2)
3444 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3448 libcall_hash (const void *p1)
3450 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3454 add_libcall (htab_t htab, rtx libcall)
3456 *htab_find_slot (htab, libcall, INSERT) = libcall;
3460 arm_libcall_uses_aapcs_base (const_rtx libcall)
3462 static bool init_done = false;
3463 static htab_t libcall_htab;
3469 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3471 add_libcall (libcall_htab,
3472 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3473 add_libcall (libcall_htab,
3474 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3475 add_libcall (libcall_htab,
3476 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3477 add_libcall (libcall_htab,
3478 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3480 add_libcall (libcall_htab,
3481 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3482 add_libcall (libcall_htab,
3483 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3484 add_libcall (libcall_htab,
3485 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3486 add_libcall (libcall_htab,
3487 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3489 add_libcall (libcall_htab,
3490 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3491 add_libcall (libcall_htab,
3492 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3493 add_libcall (libcall_htab,
3494 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3495 add_libcall (libcall_htab,
3496 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3497 add_libcall (libcall_htab,
3498 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3499 add_libcall (libcall_htab,
3500 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3503 return libcall && htab_find (libcall_htab, libcall) != NULL;
3507 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3509 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3510 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3512 /* The following libcalls return their result in integer registers,
3513 even though they return a floating point value. */
3514 if (arm_libcall_uses_aapcs_base (libcall))
3515 return gen_rtx_REG (mode, ARG_REGISTER(1));
3519 return LIBCALL_VALUE (mode);
3522 /* Determine the amount of memory needed to store the possible return
3523 registers of an untyped call. */
3525 arm_apply_result_size (void)
3531 if (TARGET_HARD_FLOAT_ABI)
3537 if (TARGET_MAVERICK)
3540 if (TARGET_IWMMXT_ABI)
3547 /* Decide whether TYPE should be returned in memory (true)
3548 or in a register (false). FNTYPE is the type of the function making
3551 arm_return_in_memory (const_tree type, const_tree fntype)
3555 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3557 if (TARGET_AAPCS_BASED)
3559 /* Simple, non-aggregate types (ie not including vectors and
3560 complex) are always returned in a register (or registers).
3561 We don't care about which register here, so we can short-cut
3562 some of the detail. */
3563 if (!AGGREGATE_TYPE_P (type)
3564 && TREE_CODE (type) != VECTOR_TYPE
3565 && TREE_CODE (type) != COMPLEX_TYPE)
3568 /* Any return value that is no larger than one word can be
3570 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3573 /* Check any available co-processors to see if they accept the
3574 type as a register candidate (VFP, for example, can return
3575 some aggregates in consecutive registers). These aren't
3576 available if the call is variadic. */
3577 if (aapcs_select_return_coproc (type, fntype) >= 0)
3580 /* Vector values should be returned using ARM registers, not
3581 memory (unless they're over 16 bytes, which will break since
3582 we only have four call-clobbered registers to play with). */
3583 if (TREE_CODE (type) == VECTOR_TYPE)
3584 return (size < 0 || size > (4 * UNITS_PER_WORD));
3586 /* The rest go in memory. */
3590 if (TREE_CODE (type) == VECTOR_TYPE)
3591 return (size < 0 || size > (4 * UNITS_PER_WORD));
3593 if (!AGGREGATE_TYPE_P (type) &&
3594 (TREE_CODE (type) != VECTOR_TYPE))
3595 /* All simple types are returned in registers. */
3598 if (arm_abi != ARM_ABI_APCS)
3600 /* ATPCS and later return aggregate types in memory only if they are
3601 larger than a word (or are variable size). */
3602 return (size < 0 || size > UNITS_PER_WORD);
3605 /* For the arm-wince targets we choose to be compatible with Microsoft's
3606 ARM and Thumb compilers, which always return aggregates in memory. */
3608 /* All structures/unions bigger than one word are returned in memory.
3609 Also catch the case where int_size_in_bytes returns -1. In this case
3610 the aggregate is either huge or of variable size, and in either case
3611 we will want to return it via memory and not in a register. */
3612 if (size < 0 || size > UNITS_PER_WORD)
3615 if (TREE_CODE (type) == RECORD_TYPE)
3619 /* For a struct the APCS says that we only return in a register
3620 if the type is 'integer like' and every addressable element
3621 has an offset of zero. For practical purposes this means
3622 that the structure can have at most one non bit-field element
3623 and that this element must be the first one in the structure. */
3625 /* Find the first field, ignoring non FIELD_DECL things which will
3626 have been created by C++. */
3627 for (field = TYPE_FIELDS (type);
3628 field && TREE_CODE (field) != FIELD_DECL;
3629 field = DECL_CHAIN (field))
3633 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3635 /* Check that the first field is valid for returning in a register. */
3637 /* ... Floats are not allowed */
3638 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3641 /* ... Aggregates that are not themselves valid for returning in
3642 a register are not allowed. */
3643 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3646 /* Now check the remaining fields, if any. Only bitfields are allowed,
3647 since they are not addressable. */
3648 for (field = DECL_CHAIN (field);
3650 field = DECL_CHAIN (field))
3652 if (TREE_CODE (field) != FIELD_DECL)
3655 if (!DECL_BIT_FIELD_TYPE (field))
3662 if (TREE_CODE (type) == UNION_TYPE)
3666 /* Unions can be returned in registers if every element is
3667 integral, or can be returned in an integer register. */
3668 for (field = TYPE_FIELDS (type);
3670 field = DECL_CHAIN (field))
3672 if (TREE_CODE (field) != FIELD_DECL)
3675 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3678 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3684 #endif /* not ARM_WINCE */
3686 /* Return all other types in memory. */
3690 /* Indicate whether or not words of a double are in big-endian order. */
3693 arm_float_words_big_endian (void)
3695 if (TARGET_MAVERICK)
3698 /* For FPA, float words are always big-endian. For VFP, floats words
3699 follow the memory system mode. */
3707 return (TARGET_BIG_END ? 1 : 0);
3712 const struct pcs_attribute_arg
3716 } pcs_attribute_args[] =
3718 {"aapcs", ARM_PCS_AAPCS},
3719 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3721 /* We could recognize these, but changes would be needed elsewhere
3722 * to implement them. */
3723 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3724 {"atpcs", ARM_PCS_ATPCS},
3725 {"apcs", ARM_PCS_APCS},
3727 {NULL, ARM_PCS_UNKNOWN}
3731 arm_pcs_from_attribute (tree attr)
3733 const struct pcs_attribute_arg *ptr;
3736 /* Get the value of the argument. */
3737 if (TREE_VALUE (attr) == NULL_TREE
3738 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3739 return ARM_PCS_UNKNOWN;
3741 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3743 /* Check it against the list of known arguments. */
3744 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3745 if (streq (arg, ptr->arg))
3748 /* An unrecognized interrupt type. */
3749 return ARM_PCS_UNKNOWN;
3752 /* Get the PCS variant to use for this call. TYPE is the function's type
3753 specification, DECL is the specific declartion. DECL may be null if
3754 the call could be indirect or if this is a library call. */
3756 arm_get_pcs_model (const_tree type, const_tree decl)
3758 bool user_convention = false;
3759 enum arm_pcs user_pcs = arm_pcs_default;
3764 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3767 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3768 user_convention = true;
3771 if (TARGET_AAPCS_BASED)
3773 /* Detect varargs functions. These always use the base rules
3774 (no argument is ever a candidate for a co-processor
3776 bool base_rules = stdarg_p (type);
3778 if (user_convention)
3780 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3781 sorry ("Non-AAPCS derived PCS variant");
3782 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3783 error ("Variadic functions must use the base AAPCS variant");
3787 return ARM_PCS_AAPCS;
3788 else if (user_convention)
3790 else if (decl && flag_unit_at_a_time)
3792 /* Local functions never leak outside this compilation unit,
3793 so we are free to use whatever conventions are
3795 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3796 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3798 return ARM_PCS_AAPCS_LOCAL;
3801 else if (user_convention && user_pcs != arm_pcs_default)
3802 sorry ("PCS variant");
3804 /* For everything else we use the target's default. */
3805 return arm_pcs_default;
3810 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3811 const_tree fntype ATTRIBUTE_UNUSED,
3812 rtx libcall ATTRIBUTE_UNUSED,
3813 const_tree fndecl ATTRIBUTE_UNUSED)
3815 /* Record the unallocated VFP registers. */
3816 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3817 pcum->aapcs_vfp_reg_alloc = 0;
3820 /* Walk down the type tree of TYPE counting consecutive base elements.
3821 If *MODEP is VOIDmode, then set it to the first valid floating point
3822 type. If a non-floating point type is found, or if a floating point
3823 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3824 otherwise return the count in the sub-tree. */
3826 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3828 enum machine_mode mode;
3831 switch (TREE_CODE (type))
3834 mode = TYPE_MODE (type);
3835 if (mode != DFmode && mode != SFmode)
3838 if (*modep == VOIDmode)
3847 mode = TYPE_MODE (TREE_TYPE (type));
3848 if (mode != DFmode && mode != SFmode)
3851 if (*modep == VOIDmode)
3860 /* Use V2SImode and V4SImode as representatives of all 64-bit
3861 and 128-bit vector types, whether or not those modes are
3862 supported with the present options. */
3863 size = int_size_in_bytes (type);
3876 if (*modep == VOIDmode)
3879 /* Vector modes are considered to be opaque: two vectors are
3880 equivalent for the purposes of being homogeneous aggregates
3881 if they are the same size. */
3890 tree index = TYPE_DOMAIN (type);
3892 /* Can't handle incomplete types. */
3893 if (!COMPLETE_TYPE_P(type))
3896 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3899 || !TYPE_MAX_VALUE (index)
3900 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3901 || !TYPE_MIN_VALUE (index)
3902 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3906 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3907 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3909 /* There must be no padding. */
3910 if (!host_integerp (TYPE_SIZE (type), 1)
3911 || (tree_low_cst (TYPE_SIZE (type), 1)
3912 != count * GET_MODE_BITSIZE (*modep)))
3924 /* Can't handle incomplete types. */
3925 if (!COMPLETE_TYPE_P(type))
3928 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3930 if (TREE_CODE (field) != FIELD_DECL)
3933 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3939 /* There must be no padding. */
3940 if (!host_integerp (TYPE_SIZE (type), 1)
3941 || (tree_low_cst (TYPE_SIZE (type), 1)
3942 != count * GET_MODE_BITSIZE (*modep)))
3949 case QUAL_UNION_TYPE:
3951 /* These aren't very interesting except in a degenerate case. */
3956 /* Can't handle incomplete types. */
3957 if (!COMPLETE_TYPE_P(type))
3960 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3962 if (TREE_CODE (field) != FIELD_DECL)
3965 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3968 count = count > sub_count ? count : sub_count;
3971 /* There must be no padding. */
3972 if (!host_integerp (TYPE_SIZE (type), 1)
3973 || (tree_low_cst (TYPE_SIZE (type), 1)
3974 != count * GET_MODE_BITSIZE (*modep)))
3987 /* Return true if PCS_VARIANT should use VFP registers. */
3989 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3991 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3993 static bool seen_thumb1_vfp = false;
3995 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3997 sorry ("Thumb-1 hard-float VFP ABI");
3998 /* sorry() is not immediately fatal, so only display this once. */
3999 seen_thumb1_vfp = true;
4005 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4008 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4009 (TARGET_VFP_DOUBLE || !is_double));
4013 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4014 enum machine_mode mode, const_tree type,
4015 enum machine_mode *base_mode, int *count)
4017 enum machine_mode new_mode = VOIDmode;
4019 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4020 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4021 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4026 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4029 new_mode = (mode == DCmode ? DFmode : SFmode);
4031 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4033 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4035 if (ag_count > 0 && ag_count <= 4)
4044 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4047 *base_mode = new_mode;
4052 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4053 enum machine_mode mode, const_tree type)
4055 int count ATTRIBUTE_UNUSED;
4056 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4058 if (!use_vfp_abi (pcs_variant, false))
4060 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4065 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4068 if (!use_vfp_abi (pcum->pcs_variant, false))
4071 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4072 &pcum->aapcs_vfp_rmode,
4073 &pcum->aapcs_vfp_rcount);
4077 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4078 const_tree type ATTRIBUTE_UNUSED)
4080 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4081 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4084 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4085 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4087 pcum->aapcs_vfp_reg_alloc = mask << regno;
4088 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4091 int rcount = pcum->aapcs_vfp_rcount;
4093 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4097 /* Avoid using unsupported vector modes. */
4098 if (rmode == V2SImode)
4100 else if (rmode == V4SImode)
4107 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4108 for (i = 0; i < rcount; i++)
4110 rtx tmp = gen_rtx_REG (rmode,
4111 FIRST_VFP_REGNUM + regno + i * rshift);
4112 tmp = gen_rtx_EXPR_LIST
4114 GEN_INT (i * GET_MODE_SIZE (rmode)));
4115 XVECEXP (par, 0, i) = tmp;
4118 pcum->aapcs_reg = par;
4121 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4128 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4129 enum machine_mode mode,
4130 const_tree type ATTRIBUTE_UNUSED)
4132 if (!use_vfp_abi (pcs_variant, false))
4135 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4138 enum machine_mode ag_mode;
4143 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4148 if (ag_mode == V2SImode)
4150 else if (ag_mode == V4SImode)
4156 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4157 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4158 for (i = 0; i < count; i++)
4160 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4161 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4162 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4163 XVECEXP (par, 0, i) = tmp;
4169 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4173 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4174 enum machine_mode mode ATTRIBUTE_UNUSED,
4175 const_tree type ATTRIBUTE_UNUSED)
4177 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4178 pcum->aapcs_vfp_reg_alloc = 0;
4182 #define AAPCS_CP(X) \
4184 aapcs_ ## X ## _cum_init, \
4185 aapcs_ ## X ## _is_call_candidate, \
4186 aapcs_ ## X ## _allocate, \
4187 aapcs_ ## X ## _is_return_candidate, \
4188 aapcs_ ## X ## _allocate_return_reg, \
4189 aapcs_ ## X ## _advance \
4192 /* Table of co-processors that can be used to pass arguments in
4193 registers. Idealy no arugment should be a candidate for more than
4194 one co-processor table entry, but the table is processed in order
4195 and stops after the first match. If that entry then fails to put
4196 the argument into a co-processor register, the argument will go on
4200 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4201 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4203 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4204 BLKmode) is a candidate for this co-processor's registers; this
4205 function should ignore any position-dependent state in
4206 CUMULATIVE_ARGS and only use call-type dependent information. */
4207 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4209 /* Return true if the argument does get a co-processor register; it
4210 should set aapcs_reg to an RTX of the register allocated as is
4211 required for a return from FUNCTION_ARG. */
4212 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4214 /* Return true if a result of mode MODE (or type TYPE if MODE is
4215 BLKmode) is can be returned in this co-processor's registers. */
4216 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4218 /* Allocate and return an RTX element to hold the return type of a
4219 call, this routine must not fail and will only be called if
4220 is_return_candidate returned true with the same parameters. */
4221 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4223 /* Finish processing this argument and prepare to start processing
4225 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4226 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4234 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4239 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4240 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4247 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4249 /* We aren't passed a decl, so we can't check that a call is local.
4250 However, it isn't clear that that would be a win anyway, since it
4251 might limit some tail-calling opportunities. */
4252 enum arm_pcs pcs_variant;
4256 const_tree fndecl = NULL_TREE;
4258 if (TREE_CODE (fntype) == FUNCTION_DECL)
4261 fntype = TREE_TYPE (fntype);
4264 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4267 pcs_variant = arm_pcs_default;
4269 if (pcs_variant != ARM_PCS_AAPCS)
4273 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4274 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4283 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4286 /* We aren't passed a decl, so we can't check that a call is local.
4287 However, it isn't clear that that would be a win anyway, since it
4288 might limit some tail-calling opportunities. */
4289 enum arm_pcs pcs_variant;
4290 int unsignedp ATTRIBUTE_UNUSED;
4294 const_tree fndecl = NULL_TREE;
4296 if (TREE_CODE (fntype) == FUNCTION_DECL)
4299 fntype = TREE_TYPE (fntype);
4302 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4305 pcs_variant = arm_pcs_default;
4307 /* Promote integer types. */
4308 if (type && INTEGRAL_TYPE_P (type))
4309 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4311 if (pcs_variant != ARM_PCS_AAPCS)
4315 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4316 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4318 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4322 /* Promotes small structs returned in a register to full-word size
4323 for big-endian AAPCS. */
4324 if (type && arm_return_in_msb (type))
4326 HOST_WIDE_INT size = int_size_in_bytes (type);
4327 if (size % UNITS_PER_WORD != 0)
4329 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4330 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4334 return gen_rtx_REG (mode, R0_REGNUM);
4338 aapcs_libcall_value (enum machine_mode mode)
4340 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4343 /* Lay out a function argument using the AAPCS rules. The rule
4344 numbers referred to here are those in the AAPCS. */
4346 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4347 const_tree type, bool named)
4352 /* We only need to do this once per argument. */
4353 if (pcum->aapcs_arg_processed)
4356 pcum->aapcs_arg_processed = true;
4358 /* Special case: if named is false then we are handling an incoming
4359 anonymous argument which is on the stack. */
4363 /* Is this a potential co-processor register candidate? */
4364 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4366 int slot = aapcs_select_call_coproc (pcum, mode, type);
4367 pcum->aapcs_cprc_slot = slot;
4369 /* We don't have to apply any of the rules from part B of the
4370 preparation phase, these are handled elsewhere in the
4375 /* A Co-processor register candidate goes either in its own
4376 class of registers or on the stack. */
4377 if (!pcum->aapcs_cprc_failed[slot])
4379 /* C1.cp - Try to allocate the argument to co-processor
4381 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4384 /* C2.cp - Put the argument on the stack and note that we
4385 can't assign any more candidates in this slot. We also
4386 need to note that we have allocated stack space, so that
4387 we won't later try to split a non-cprc candidate between
4388 core registers and the stack. */
4389 pcum->aapcs_cprc_failed[slot] = true;
4390 pcum->can_split = false;
4393 /* We didn't get a register, so this argument goes on the
4395 gcc_assert (pcum->can_split == false);
4400 /* C3 - For double-word aligned arguments, round the NCRN up to the
4401 next even number. */
4402 ncrn = pcum->aapcs_ncrn;
4403 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4406 nregs = ARM_NUM_REGS2(mode, type);
4408 /* Sigh, this test should really assert that nregs > 0, but a GCC
4409 extension allows empty structs and then gives them empty size; it
4410 then allows such a structure to be passed by value. For some of
4411 the code below we have to pretend that such an argument has
4412 non-zero size so that we 'locate' it correctly either in
4413 registers or on the stack. */
4414 gcc_assert (nregs >= 0);
4416 nregs2 = nregs ? nregs : 1;
4418 /* C4 - Argument fits entirely in core registers. */
4419 if (ncrn + nregs2 <= NUM_ARG_REGS)
4421 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4422 pcum->aapcs_next_ncrn = ncrn + nregs;
4426 /* C5 - Some core registers left and there are no arguments already
4427 on the stack: split this argument between the remaining core
4428 registers and the stack. */
4429 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4431 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4432 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4433 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4437 /* C6 - NCRN is set to 4. */
4438 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4440 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4444 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4445 for a call to a function whose data type is FNTYPE.
4446 For a library call, FNTYPE is NULL. */
4448 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4450 tree fndecl ATTRIBUTE_UNUSED)
4452 /* Long call handling. */
4454 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4456 pcum->pcs_variant = arm_pcs_default;
4458 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4460 if (arm_libcall_uses_aapcs_base (libname))
4461 pcum->pcs_variant = ARM_PCS_AAPCS;
4463 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4464 pcum->aapcs_reg = NULL_RTX;
4465 pcum->aapcs_partial = 0;
4466 pcum->aapcs_arg_processed = false;
4467 pcum->aapcs_cprc_slot = -1;
4468 pcum->can_split = true;
4470 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4474 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4476 pcum->aapcs_cprc_failed[i] = false;
4477 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4485 /* On the ARM, the offset starts at 0. */
4487 pcum->iwmmxt_nregs = 0;
4488 pcum->can_split = true;
4490 /* Varargs vectors are treated the same as long long.
4491 named_count avoids having to change the way arm handles 'named' */
4492 pcum->named_count = 0;
4495 if (TARGET_REALLY_IWMMXT && fntype)
4499 for (fn_arg = TYPE_ARG_TYPES (fntype);
4501 fn_arg = TREE_CHAIN (fn_arg))
4502 pcum->named_count += 1;
4504 if (! pcum->named_count)
4505 pcum->named_count = INT_MAX;
4510 /* Return true if mode/type need doubleword alignment. */
4512 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4514 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4515 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4519 /* Determine where to put an argument to a function.
4520 Value is zero to push the argument on the stack,
4521 or a hard register in which to store the argument.
4523 MODE is the argument's machine mode.
4524 TYPE is the data type of the argument (as a tree).
4525 This is null for libcalls where that information may
4527 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4528 the preceding args and about the function being called.
4529 NAMED is nonzero if this argument is a named parameter
4530 (otherwise it is an extra parameter matching an ellipsis).
4532 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4533 other arguments are passed on the stack. If (NAMED == 0) (which happens
4534 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4535 defined), say it is passed in the stack (function_prologue will
4536 indeed make it pass in the stack if necessary). */
4539 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4540 const_tree type, bool named)
4544 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4545 a call insn (op3 of a call_value insn). */
4546 if (mode == VOIDmode)
4549 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4551 aapcs_layout_arg (pcum, mode, type, named);
4552 return pcum->aapcs_reg;
4555 /* Varargs vectors are treated the same as long long.
4556 named_count avoids having to change the way arm handles 'named' */
4557 if (TARGET_IWMMXT_ABI
4558 && arm_vector_mode_supported_p (mode)
4559 && pcum->named_count > pcum->nargs + 1)
4561 if (pcum->iwmmxt_nregs <= 9)
4562 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4565 pcum->can_split = false;
4570 /* Put doubleword aligned quantities in even register pairs. */
4572 && ARM_DOUBLEWORD_ALIGN
4573 && arm_needs_doubleword_align (mode, type))
4576 /* Only allow splitting an arg between regs and memory if all preceding
4577 args were allocated to regs. For args passed by reference we only count
4578 the reference pointer. */
4579 if (pcum->can_split)
4582 nregs = ARM_NUM_REGS2 (mode, type);
4584 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4587 return gen_rtx_REG (mode, pcum->nregs);
4591 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4592 tree type, bool named)
4594 int nregs = pcum->nregs;
4596 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4598 aapcs_layout_arg (pcum, mode, type, named);
4599 return pcum->aapcs_partial;
4602 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4605 if (NUM_ARG_REGS > nregs
4606 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4608 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4613 /* Update the data in PCUM to advance over an argument
4614 of mode MODE and data type TYPE.
4615 (TYPE is null for libcalls where that information may not be available.) */
4618 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4619 const_tree type, bool named)
4621 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4623 aapcs_layout_arg (pcum, mode, type, named);
4625 if (pcum->aapcs_cprc_slot >= 0)
4627 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4629 pcum->aapcs_cprc_slot = -1;
4632 /* Generic stuff. */
4633 pcum->aapcs_arg_processed = false;
4634 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4635 pcum->aapcs_reg = NULL_RTX;
4636 pcum->aapcs_partial = 0;
4641 if (arm_vector_mode_supported_p (mode)
4642 && pcum->named_count > pcum->nargs
4643 && TARGET_IWMMXT_ABI)
4644 pcum->iwmmxt_nregs += 1;
4646 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4650 /* Variable sized types are passed by reference. This is a GCC
4651 extension to the ARM ABI. */
4654 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4655 enum machine_mode mode ATTRIBUTE_UNUSED,
4656 const_tree type, bool named ATTRIBUTE_UNUSED)
4658 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4661 /* Encode the current state of the #pragma [no_]long_calls. */
4664 OFF, /* No #pragma [no_]long_calls is in effect. */
4665 LONG, /* #pragma long_calls is in effect. */
4666 SHORT /* #pragma no_long_calls is in effect. */
4669 static arm_pragma_enum arm_pragma_long_calls = OFF;
4672 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4674 arm_pragma_long_calls = LONG;
4678 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4680 arm_pragma_long_calls = SHORT;
4684 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4686 arm_pragma_long_calls = OFF;
4689 /* Handle an attribute requiring a FUNCTION_DECL;
4690 arguments as in struct attribute_spec.handler. */
4692 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4693 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4695 if (TREE_CODE (*node) != FUNCTION_DECL)
4697 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4699 *no_add_attrs = true;
4705 /* Handle an "interrupt" or "isr" attribute;
4706 arguments as in struct attribute_spec.handler. */
4708 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4713 if (TREE_CODE (*node) != FUNCTION_DECL)
4715 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4717 *no_add_attrs = true;
4719 /* FIXME: the argument if any is checked for type attributes;
4720 should it be checked for decl ones? */
4724 if (TREE_CODE (*node) == FUNCTION_TYPE
4725 || TREE_CODE (*node) == METHOD_TYPE)
4727 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4729 warning (OPT_Wattributes, "%qE attribute ignored",
4731 *no_add_attrs = true;
4734 else if (TREE_CODE (*node) == POINTER_TYPE
4735 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4736 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4737 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4739 *node = build_variant_type_copy (*node);
4740 TREE_TYPE (*node) = build_type_attribute_variant
4742 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4743 *no_add_attrs = true;
4747 /* Possibly pass this attribute on from the type to a decl. */
4748 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4749 | (int) ATTR_FLAG_FUNCTION_NEXT
4750 | (int) ATTR_FLAG_ARRAY_NEXT))
4752 *no_add_attrs = true;
4753 return tree_cons (name, args, NULL_TREE);
4757 warning (OPT_Wattributes, "%qE attribute ignored",
4766 /* Handle a "pcs" attribute; arguments as in struct
4767 attribute_spec.handler. */
4769 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4770 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4772 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4774 warning (OPT_Wattributes, "%qE attribute ignored", name);
4775 *no_add_attrs = true;
4780 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4781 /* Handle the "notshared" attribute. This attribute is another way of
4782 requesting hidden visibility. ARM's compiler supports
4783 "__declspec(notshared)"; we support the same thing via an
4787 arm_handle_notshared_attribute (tree *node,
4788 tree name ATTRIBUTE_UNUSED,
4789 tree args ATTRIBUTE_UNUSED,
4790 int flags ATTRIBUTE_UNUSED,
4793 tree decl = TYPE_NAME (*node);
4797 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4798 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4799 *no_add_attrs = false;
4805 /* Return 0 if the attributes for two types are incompatible, 1 if they
4806 are compatible, and 2 if they are nearly compatible (which causes a
4807 warning to be generated). */
4809 arm_comp_type_attributes (const_tree type1, const_tree type2)
4813 /* Check for mismatch of non-default calling convention. */
4814 if (TREE_CODE (type1) != FUNCTION_TYPE)
4817 /* Check for mismatched call attributes. */
4818 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4819 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4820 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4821 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4823 /* Only bother to check if an attribute is defined. */
4824 if (l1 | l2 | s1 | s2)
4826 /* If one type has an attribute, the other must have the same attribute. */
4827 if ((l1 != l2) || (s1 != s2))
4830 /* Disallow mixed attributes. */
4831 if ((l1 & s2) || (l2 & s1))
4835 /* Check for mismatched ISR attribute. */
4836 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4838 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4839 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4841 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4848 /* Assigns default attributes to newly defined type. This is used to
4849 set short_call/long_call attributes for function types of
4850 functions defined inside corresponding #pragma scopes. */
4852 arm_set_default_type_attributes (tree type)
4854 /* Add __attribute__ ((long_call)) to all functions, when
4855 inside #pragma long_calls or __attribute__ ((short_call)),
4856 when inside #pragma no_long_calls. */
4857 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4859 tree type_attr_list, attr_name;
4860 type_attr_list = TYPE_ATTRIBUTES (type);
4862 if (arm_pragma_long_calls == LONG)
4863 attr_name = get_identifier ("long_call");
4864 else if (arm_pragma_long_calls == SHORT)
4865 attr_name = get_identifier ("short_call");
4869 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4870 TYPE_ATTRIBUTES (type) = type_attr_list;
4874 /* Return true if DECL is known to be linked into section SECTION. */
4877 arm_function_in_section_p (tree decl, section *section)
4879 /* We can only be certain about functions defined in the same
4880 compilation unit. */
4881 if (!TREE_STATIC (decl))
4884 /* Make sure that SYMBOL always binds to the definition in this
4885 compilation unit. */
4886 if (!targetm.binds_local_p (decl))
4889 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4890 if (!DECL_SECTION_NAME (decl))
4892 /* Make sure that we will not create a unique section for DECL. */
4893 if (flag_function_sections || DECL_ONE_ONLY (decl))
4897 return function_section (decl) == section;
4900 /* Return nonzero if a 32-bit "long_call" should be generated for
4901 a call from the current function to DECL. We generate a long_call
4904 a. has an __attribute__((long call))
4905 or b. is within the scope of a #pragma long_calls
4906 or c. the -mlong-calls command line switch has been specified
4908 However we do not generate a long call if the function:
4910 d. has an __attribute__ ((short_call))
4911 or e. is inside the scope of a #pragma no_long_calls
4912 or f. is defined in the same section as the current function. */
4915 arm_is_long_call_p (tree decl)
4920 return TARGET_LONG_CALLS;
4922 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4923 if (lookup_attribute ("short_call", attrs))
4926 /* For "f", be conservative, and only cater for cases in which the
4927 whole of the current function is placed in the same section. */
4928 if (!flag_reorder_blocks_and_partition
4929 && TREE_CODE (decl) == FUNCTION_DECL
4930 && arm_function_in_section_p (decl, current_function_section ()))
4933 if (lookup_attribute ("long_call", attrs))
4936 return TARGET_LONG_CALLS;
4939 /* Return nonzero if it is ok to make a tail-call to DECL. */
4941 arm_function_ok_for_sibcall (tree decl, tree exp)
4943 unsigned long func_type;
4945 if (cfun->machine->sibcall_blocked)
4948 /* Never tailcall something for which we have no decl, or if we
4949 are generating code for Thumb-1. */
4950 if (decl == NULL || TARGET_THUMB1)
4953 /* The PIC register is live on entry to VxWorks PLT entries, so we
4954 must make the call before restoring the PIC register. */
4955 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4958 /* Cannot tail-call to long calls, since these are out of range of
4959 a branch instruction. */
4960 if (arm_is_long_call_p (decl))
4963 /* If we are interworking and the function is not declared static
4964 then we can't tail-call it unless we know that it exists in this
4965 compilation unit (since it might be a Thumb routine). */
4966 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4969 func_type = arm_current_func_type ();
4970 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4971 if (IS_INTERRUPT (func_type))
4974 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4976 /* Check that the return value locations are the same. For
4977 example that we aren't returning a value from the sibling in
4978 a VFP register but then need to transfer it to a core
4982 a = arm_function_value (TREE_TYPE (exp), decl, false);
4983 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4985 if (!rtx_equal_p (a, b))
4989 /* Never tailcall if function may be called with a misaligned SP. */
4990 if (IS_STACKALIGN (func_type))
4993 /* Everything else is ok. */
4998 /* Addressing mode support functions. */
5000 /* Return nonzero if X is a legitimate immediate operand when compiling
5001 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5003 legitimate_pic_operand_p (rtx x)
5005 if (GET_CODE (x) == SYMBOL_REF
5006 || (GET_CODE (x) == CONST
5007 && GET_CODE (XEXP (x, 0)) == PLUS
5008 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5014 /* Record that the current function needs a PIC register. Initialize
5015 cfun->machine->pic_reg if we have not already done so. */
5018 require_pic_register (void)
5020 /* A lot of the logic here is made obscure by the fact that this
5021 routine gets called as part of the rtx cost estimation process.
5022 We don't want those calls to affect any assumptions about the real
5023 function; and further, we can't call entry_of_function() until we
5024 start the real expansion process. */
5025 if (!crtl->uses_pic_offset_table)
5027 gcc_assert (can_create_pseudo_p ());
5028 if (arm_pic_register != INVALID_REGNUM)
5030 if (!cfun->machine->pic_reg)
5031 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5033 /* Play games to avoid marking the function as needing pic
5034 if we are being called as part of the cost-estimation
5036 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5037 crtl->uses_pic_offset_table = 1;
5043 if (!cfun->machine->pic_reg)
5044 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5046 /* Play games to avoid marking the function as needing pic
5047 if we are being called as part of the cost-estimation
5049 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5051 crtl->uses_pic_offset_table = 1;
5054 arm_load_pic_register (0UL);
5058 /* We can be called during expansion of PHI nodes, where
5059 we can't yet emit instructions directly in the final
5060 insn stream. Queue the insns on the entry edge, they will
5061 be committed after everything else is expanded. */
5062 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5069 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5071 if (GET_CODE (orig) == SYMBOL_REF
5072 || GET_CODE (orig) == LABEL_REF)
5078 gcc_assert (can_create_pseudo_p ());
5079 reg = gen_reg_rtx (Pmode);
5082 /* VxWorks does not impose a fixed gap between segments; the run-time
5083 gap can be different from the object-file gap. We therefore can't
5084 use GOTOFF unless we are absolutely sure that the symbol is in the
5085 same segment as the GOT. Unfortunately, the flexibility of linker
5086 scripts means that we can't be sure of that in general, so assume
5087 that GOTOFF is never valid on VxWorks. */
5088 if ((GET_CODE (orig) == LABEL_REF
5089 || (GET_CODE (orig) == SYMBOL_REF &&
5090 SYMBOL_REF_LOCAL_P (orig)))
5092 && !TARGET_VXWORKS_RTP)
5093 insn = arm_pic_static_addr (orig, reg);
5099 /* If this function doesn't have a pic register, create one now. */
5100 require_pic_register ();
5102 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5104 /* Make the MEM as close to a constant as possible. */
5105 mem = SET_SRC (pat);
5106 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5107 MEM_READONLY_P (mem) = 1;
5108 MEM_NOTRAP_P (mem) = 1;
5110 insn = emit_insn (pat);
5113 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5115 set_unique_reg_note (insn, REG_EQUAL, orig);
5119 else if (GET_CODE (orig) == CONST)
5123 if (GET_CODE (XEXP (orig, 0)) == PLUS
5124 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5127 /* Handle the case where we have: const (UNSPEC_TLS). */
5128 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5129 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5132 /* Handle the case where we have:
5133 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5135 if (GET_CODE (XEXP (orig, 0)) == PLUS
5136 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5137 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5139 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5145 gcc_assert (can_create_pseudo_p ());
5146 reg = gen_reg_rtx (Pmode);
5149 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5151 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5152 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5153 base == reg ? 0 : reg);
5155 if (GET_CODE (offset) == CONST_INT)
5157 /* The base register doesn't really matter, we only want to
5158 test the index for the appropriate mode. */
5159 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5161 gcc_assert (can_create_pseudo_p ());
5162 offset = force_reg (Pmode, offset);
5165 if (GET_CODE (offset) == CONST_INT)
5166 return plus_constant (base, INTVAL (offset));
5169 if (GET_MODE_SIZE (mode) > 4
5170 && (GET_MODE_CLASS (mode) == MODE_INT
5171 || TARGET_SOFT_FLOAT))
5173 emit_insn (gen_addsi3 (reg, base, offset));
5177 return gen_rtx_PLUS (Pmode, base, offset);
5184 /* Find a spare register to use during the prolog of a function. */
5187 thumb_find_work_register (unsigned long pushed_regs_mask)
5191 /* Check the argument registers first as these are call-used. The
5192 register allocation order means that sometimes r3 might be used
5193 but earlier argument registers might not, so check them all. */
5194 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5195 if (!df_regs_ever_live_p (reg))
5198 /* Before going on to check the call-saved registers we can try a couple
5199 more ways of deducing that r3 is available. The first is when we are
5200 pushing anonymous arguments onto the stack and we have less than 4
5201 registers worth of fixed arguments(*). In this case r3 will be part of
5202 the variable argument list and so we can be sure that it will be
5203 pushed right at the start of the function. Hence it will be available
5204 for the rest of the prologue.
5205 (*): ie crtl->args.pretend_args_size is greater than 0. */
5206 if (cfun->machine->uses_anonymous_args
5207 && crtl->args.pretend_args_size > 0)
5208 return LAST_ARG_REGNUM;
5210 /* The other case is when we have fixed arguments but less than 4 registers
5211 worth. In this case r3 might be used in the body of the function, but
5212 it is not being used to convey an argument into the function. In theory
5213 we could just check crtl->args.size to see how many bytes are
5214 being passed in argument registers, but it seems that it is unreliable.
5215 Sometimes it will have the value 0 when in fact arguments are being
5216 passed. (See testcase execute/20021111-1.c for an example). So we also
5217 check the args_info.nregs field as well. The problem with this field is
5218 that it makes no allowances for arguments that are passed to the
5219 function but which are not used. Hence we could miss an opportunity
5220 when a function has an unused argument in r3. But it is better to be
5221 safe than to be sorry. */
5222 if (! cfun->machine->uses_anonymous_args
5223 && crtl->args.size >= 0
5224 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5225 && crtl->args.info.nregs < 4)
5226 return LAST_ARG_REGNUM;
5228 /* Otherwise look for a call-saved register that is going to be pushed. */
5229 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5230 if (pushed_regs_mask & (1 << reg))
5235 /* Thumb-2 can use high regs. */
5236 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5237 if (pushed_regs_mask & (1 << reg))
5240 /* Something went wrong - thumb_compute_save_reg_mask()
5241 should have arranged for a suitable register to be pushed. */
5245 static GTY(()) int pic_labelno;
5247 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5251 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5253 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5255 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5258 gcc_assert (flag_pic);
5260 pic_reg = cfun->machine->pic_reg;
5261 if (TARGET_VXWORKS_RTP)
5263 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5264 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5265 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5267 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5269 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5270 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5274 /* We use an UNSPEC rather than a LABEL_REF because this label
5275 never appears in the code stream. */
5277 labelno = GEN_INT (pic_labelno++);
5278 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5279 l1 = gen_rtx_CONST (VOIDmode, l1);
5281 /* On the ARM the PC register contains 'dot + 8' at the time of the
5282 addition, on the Thumb it is 'dot + 4'. */
5283 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5284 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5286 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5290 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5292 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5294 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5296 else /* TARGET_THUMB1 */
5298 if (arm_pic_register != INVALID_REGNUM
5299 && REGNO (pic_reg) > LAST_LO_REGNUM)
5301 /* We will have pushed the pic register, so we should always be
5302 able to find a work register. */
5303 pic_tmp = gen_rtx_REG (SImode,
5304 thumb_find_work_register (saved_regs));
5305 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5306 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5309 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5310 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5314 /* Need to emit this whether or not we obey regdecls,
5315 since setjmp/longjmp can cause life info to screw up. */
5319 /* Generate code to load the address of a static var when flag_pic is set. */
5321 arm_pic_static_addr (rtx orig, rtx reg)
5323 rtx l1, labelno, offset_rtx, insn;
5325 gcc_assert (flag_pic);
5327 /* We use an UNSPEC rather than a LABEL_REF because this label
5328 never appears in the code stream. */
5329 labelno = GEN_INT (pic_labelno++);
5330 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5331 l1 = gen_rtx_CONST (VOIDmode, l1);
5333 /* On the ARM the PC register contains 'dot + 8' at the time of the
5334 addition, on the Thumb it is 'dot + 4'. */
5335 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5336 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5337 UNSPEC_SYMBOL_OFFSET);
5338 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5342 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5344 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5346 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5348 else /* TARGET_THUMB1 */
5350 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5351 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5357 /* Return nonzero if X is valid as an ARM state addressing register. */
5359 arm_address_register_rtx_p (rtx x, int strict_p)
5363 if (GET_CODE (x) != REG)
5369 return ARM_REGNO_OK_FOR_BASE_P (regno);
5371 return (regno <= LAST_ARM_REGNUM
5372 || regno >= FIRST_PSEUDO_REGISTER
5373 || regno == FRAME_POINTER_REGNUM
5374 || regno == ARG_POINTER_REGNUM);
5377 /* Return TRUE if this rtx is the difference of a symbol and a label,
5378 and will reduce to a PC-relative relocation in the object file.
5379 Expressions like this can be left alone when generating PIC, rather
5380 than forced through the GOT. */
5382 pcrel_constant_p (rtx x)
5384 if (GET_CODE (x) == MINUS)
5385 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5390 /* Return true if X will surely end up in an index register after next
5393 will_be_in_index_register (const_rtx x)
5395 /* arm.md: calculate_pic_address will split this into a register. */
5396 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5399 /* Return nonzero if X is a valid ARM state address operand. */
5401 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5405 enum rtx_code code = GET_CODE (x);
5407 if (arm_address_register_rtx_p (x, strict_p))
5410 use_ldrd = (TARGET_LDRD
5412 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5414 if (code == POST_INC || code == PRE_DEC
5415 || ((code == PRE_INC || code == POST_DEC)
5416 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5417 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5419 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5420 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5421 && GET_CODE (XEXP (x, 1)) == PLUS
5422 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5424 rtx addend = XEXP (XEXP (x, 1), 1);
5426 /* Don't allow ldrd post increment by register because it's hard
5427 to fixup invalid register choices. */
5429 && GET_CODE (x) == POST_MODIFY
5430 && GET_CODE (addend) == REG)
5433 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5434 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5437 /* After reload constants split into minipools will have addresses
5438 from a LABEL_REF. */
5439 else if (reload_completed
5440 && (code == LABEL_REF
5442 && GET_CODE (XEXP (x, 0)) == PLUS
5443 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5444 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5447 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5450 else if (code == PLUS)
5452 rtx xop0 = XEXP (x, 0);
5453 rtx xop1 = XEXP (x, 1);
5455 return ((arm_address_register_rtx_p (xop0, strict_p)
5456 && ((GET_CODE(xop1) == CONST_INT
5457 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5458 || (!strict_p && will_be_in_index_register (xop1))))
5459 || (arm_address_register_rtx_p (xop1, strict_p)
5460 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5464 /* Reload currently can't handle MINUS, so disable this for now */
5465 else if (GET_CODE (x) == MINUS)
5467 rtx xop0 = XEXP (x, 0);
5468 rtx xop1 = XEXP (x, 1);
5470 return (arm_address_register_rtx_p (xop0, strict_p)
5471 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5475 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5476 && code == SYMBOL_REF
5477 && CONSTANT_POOL_ADDRESS_P (x)
5479 && symbol_mentioned_p (get_pool_constant (x))
5480 && ! pcrel_constant_p (get_pool_constant (x))))
5486 /* Return nonzero if X is a valid Thumb-2 address operand. */
5488 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5491 enum rtx_code code = GET_CODE (x);
5493 if (arm_address_register_rtx_p (x, strict_p))
5496 use_ldrd = (TARGET_LDRD
5498 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5500 if (code == POST_INC || code == PRE_DEC
5501 || ((code == PRE_INC || code == POST_DEC)
5502 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5503 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5505 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5506 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5507 && GET_CODE (XEXP (x, 1)) == PLUS
5508 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5510 /* Thumb-2 only has autoincrement by constant. */
5511 rtx addend = XEXP (XEXP (x, 1), 1);
5512 HOST_WIDE_INT offset;
5514 if (GET_CODE (addend) != CONST_INT)
5517 offset = INTVAL(addend);
5518 if (GET_MODE_SIZE (mode) <= 4)
5519 return (offset > -256 && offset < 256);
5521 return (use_ldrd && offset > -1024 && offset < 1024
5522 && (offset & 3) == 0);
5525 /* After reload constants split into minipools will have addresses
5526 from a LABEL_REF. */
5527 else if (reload_completed
5528 && (code == LABEL_REF
5530 && GET_CODE (XEXP (x, 0)) == PLUS
5531 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5532 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5535 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5538 else if (code == PLUS)
5540 rtx xop0 = XEXP (x, 0);
5541 rtx xop1 = XEXP (x, 1);
5543 return ((arm_address_register_rtx_p (xop0, strict_p)
5544 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5545 || (!strict_p && will_be_in_index_register (xop1))))
5546 || (arm_address_register_rtx_p (xop1, strict_p)
5547 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5550 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5551 && code == SYMBOL_REF
5552 && CONSTANT_POOL_ADDRESS_P (x)
5554 && symbol_mentioned_p (get_pool_constant (x))
5555 && ! pcrel_constant_p (get_pool_constant (x))))
5561 /* Return nonzero if INDEX is valid for an address index operand in
5564 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5567 HOST_WIDE_INT range;
5568 enum rtx_code code = GET_CODE (index);
5570 /* Standard coprocessor addressing modes. */
5571 if (TARGET_HARD_FLOAT
5572 && (TARGET_FPA || TARGET_MAVERICK)
5573 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5574 || (TARGET_MAVERICK && mode == DImode)))
5575 return (code == CONST_INT && INTVAL (index) < 1024
5576 && INTVAL (index) > -1024
5577 && (INTVAL (index) & 3) == 0);
5580 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5581 return (code == CONST_INT
5582 && INTVAL (index) < 1016
5583 && INTVAL (index) > -1024
5584 && (INTVAL (index) & 3) == 0);
5586 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5587 return (code == CONST_INT
5588 && INTVAL (index) < 1024
5589 && INTVAL (index) > -1024
5590 && (INTVAL (index) & 3) == 0);
5592 if (arm_address_register_rtx_p (index, strict_p)
5593 && (GET_MODE_SIZE (mode) <= 4))
5596 if (mode == DImode || mode == DFmode)
5598 if (code == CONST_INT)
5600 HOST_WIDE_INT val = INTVAL (index);
5603 return val > -256 && val < 256;
5605 return val > -4096 && val < 4092;
5608 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5611 if (GET_MODE_SIZE (mode) <= 4
5615 || (mode == QImode && outer == SIGN_EXTEND))))
5619 rtx xiop0 = XEXP (index, 0);
5620 rtx xiop1 = XEXP (index, 1);
5622 return ((arm_address_register_rtx_p (xiop0, strict_p)
5623 && power_of_two_operand (xiop1, SImode))
5624 || (arm_address_register_rtx_p (xiop1, strict_p)
5625 && power_of_two_operand (xiop0, SImode)));
5627 else if (code == LSHIFTRT || code == ASHIFTRT
5628 || code == ASHIFT || code == ROTATERT)
5630 rtx op = XEXP (index, 1);
5632 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5633 && GET_CODE (op) == CONST_INT
5635 && INTVAL (op) <= 31);
5639 /* For ARM v4 we may be doing a sign-extend operation during the
5645 || (outer == SIGN_EXTEND && mode == QImode))
5651 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5653 return (code == CONST_INT
5654 && INTVAL (index) < range
5655 && INTVAL (index) > -range);
5658 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5659 index operand. i.e. 1, 2, 4 or 8. */
5661 thumb2_index_mul_operand (rtx op)
5665 if (GET_CODE(op) != CONST_INT)
5669 return (val == 1 || val == 2 || val == 4 || val == 8);
5672 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5674 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5676 enum rtx_code code = GET_CODE (index);
5678 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5679 /* Standard coprocessor addressing modes. */
5680 if (TARGET_HARD_FLOAT
5681 && (TARGET_FPA || TARGET_MAVERICK)
5682 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5683 || (TARGET_MAVERICK && mode == DImode)))
5684 return (code == CONST_INT && INTVAL (index) < 1024
5685 && INTVAL (index) > -1024
5686 && (INTVAL (index) & 3) == 0);
5688 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5690 /* For DImode assume values will usually live in core regs
5691 and only allow LDRD addressing modes. */
5692 if (!TARGET_LDRD || mode != DImode)
5693 return (code == CONST_INT
5694 && INTVAL (index) < 1024
5695 && INTVAL (index) > -1024
5696 && (INTVAL (index) & 3) == 0);
5700 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5701 return (code == CONST_INT
5702 && INTVAL (index) < 1016
5703 && INTVAL (index) > -1024
5704 && (INTVAL (index) & 3) == 0);
5706 if (arm_address_register_rtx_p (index, strict_p)
5707 && (GET_MODE_SIZE (mode) <= 4))
5710 if (mode == DImode || mode == DFmode)
5712 if (code == CONST_INT)
5714 HOST_WIDE_INT val = INTVAL (index);
5715 /* ??? Can we assume ldrd for thumb2? */
5716 /* Thumb-2 ldrd only has reg+const addressing modes. */
5717 /* ldrd supports offsets of +-1020.
5718 However the ldr fallback does not. */
5719 return val > -256 && val < 256 && (val & 3) == 0;
5727 rtx xiop0 = XEXP (index, 0);
5728 rtx xiop1 = XEXP (index, 1);
5730 return ((arm_address_register_rtx_p (xiop0, strict_p)
5731 && thumb2_index_mul_operand (xiop1))
5732 || (arm_address_register_rtx_p (xiop1, strict_p)
5733 && thumb2_index_mul_operand (xiop0)));
5735 else if (code == ASHIFT)
5737 rtx op = XEXP (index, 1);
5739 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5740 && GET_CODE (op) == CONST_INT
5742 && INTVAL (op) <= 3);
5745 return (code == CONST_INT
5746 && INTVAL (index) < 4096
5747 && INTVAL (index) > -256);
5750 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5752 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5756 if (GET_CODE (x) != REG)
5762 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5764 return (regno <= LAST_LO_REGNUM
5765 || regno > LAST_VIRTUAL_REGISTER
5766 || regno == FRAME_POINTER_REGNUM
5767 || (GET_MODE_SIZE (mode) >= 4
5768 && (regno == STACK_POINTER_REGNUM
5769 || regno >= FIRST_PSEUDO_REGISTER
5770 || x == hard_frame_pointer_rtx
5771 || x == arg_pointer_rtx)));
5774 /* Return nonzero if x is a legitimate index register. This is the case
5775 for any base register that can access a QImode object. */
5777 thumb1_index_register_rtx_p (rtx x, int strict_p)
5779 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5782 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5784 The AP may be eliminated to either the SP or the FP, so we use the
5785 least common denominator, e.g. SImode, and offsets from 0 to 64.
5787 ??? Verify whether the above is the right approach.
5789 ??? Also, the FP may be eliminated to the SP, so perhaps that
5790 needs special handling also.
5792 ??? Look at how the mips16 port solves this problem. It probably uses
5793 better ways to solve some of these problems.
5795 Although it is not incorrect, we don't accept QImode and HImode
5796 addresses based on the frame pointer or arg pointer until the
5797 reload pass starts. This is so that eliminating such addresses
5798 into stack based ones won't produce impossible code. */
5800 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5802 /* ??? Not clear if this is right. Experiment. */
5803 if (GET_MODE_SIZE (mode) < 4
5804 && !(reload_in_progress || reload_completed)
5805 && (reg_mentioned_p (frame_pointer_rtx, x)
5806 || reg_mentioned_p (arg_pointer_rtx, x)
5807 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5808 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5809 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5810 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5813 /* Accept any base register. SP only in SImode or larger. */
5814 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5817 /* This is PC relative data before arm_reorg runs. */
5818 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5819 && GET_CODE (x) == SYMBOL_REF
5820 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5823 /* This is PC relative data after arm_reorg runs. */
5824 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5826 && (GET_CODE (x) == LABEL_REF
5827 || (GET_CODE (x) == CONST
5828 && GET_CODE (XEXP (x, 0)) == PLUS
5829 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5830 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5833 /* Post-inc indexing only supported for SImode and larger. */
5834 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5835 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5838 else if (GET_CODE (x) == PLUS)
5840 /* REG+REG address can be any two index registers. */
5841 /* We disallow FRAME+REG addressing since we know that FRAME
5842 will be replaced with STACK, and SP relative addressing only
5843 permits SP+OFFSET. */
5844 if (GET_MODE_SIZE (mode) <= 4
5845 && XEXP (x, 0) != frame_pointer_rtx
5846 && XEXP (x, 1) != frame_pointer_rtx
5847 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5848 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5849 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5852 /* REG+const has 5-7 bit offset for non-SP registers. */
5853 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5854 || XEXP (x, 0) == arg_pointer_rtx)
5855 && GET_CODE (XEXP (x, 1)) == CONST_INT
5856 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5859 /* REG+const has 10-bit offset for SP, but only SImode and
5860 larger is supported. */
5861 /* ??? Should probably check for DI/DFmode overflow here
5862 just like GO_IF_LEGITIMATE_OFFSET does. */
5863 else if (GET_CODE (XEXP (x, 0)) == REG
5864 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5865 && GET_MODE_SIZE (mode) >= 4
5866 && GET_CODE (XEXP (x, 1)) == CONST_INT
5867 && INTVAL (XEXP (x, 1)) >= 0
5868 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5869 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5872 else if (GET_CODE (XEXP (x, 0)) == REG
5873 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5874 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5875 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5876 && REGNO (XEXP (x, 0))
5877 <= LAST_VIRTUAL_POINTER_REGISTER))
5878 && GET_MODE_SIZE (mode) >= 4
5879 && GET_CODE (XEXP (x, 1)) == CONST_INT
5880 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5884 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5885 && GET_MODE_SIZE (mode) == 4
5886 && GET_CODE (x) == SYMBOL_REF
5887 && CONSTANT_POOL_ADDRESS_P (x)
5889 && symbol_mentioned_p (get_pool_constant (x))
5890 && ! pcrel_constant_p (get_pool_constant (x))))
5896 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5897 instruction of mode MODE. */
5899 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5901 switch (GET_MODE_SIZE (mode))
5904 return val >= 0 && val < 32;
5907 return val >= 0 && val < 64 && (val & 1) == 0;
5911 && (val + GET_MODE_SIZE (mode)) <= 128
5917 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5920 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5921 else if (TARGET_THUMB2)
5922 return thumb2_legitimate_address_p (mode, x, strict_p);
5923 else /* if (TARGET_THUMB1) */
5924 return thumb1_legitimate_address_p (mode, x, strict_p);
5927 /* Build the SYMBOL_REF for __tls_get_addr. */
5929 static GTY(()) rtx tls_get_addr_libfunc;
5932 get_tls_get_addr (void)
5934 if (!tls_get_addr_libfunc)
5935 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5936 return tls_get_addr_libfunc;
5940 arm_load_tp (rtx target)
5943 target = gen_reg_rtx (SImode);
5947 /* Can return in any reg. */
5948 emit_insn (gen_load_tp_hard (target));
5952 /* Always returned in r0. Immediately copy the result into a pseudo,
5953 otherwise other uses of r0 (e.g. setting up function arguments) may
5954 clobber the value. */
5958 emit_insn (gen_load_tp_soft ());
5960 tmp = gen_rtx_REG (SImode, 0);
5961 emit_move_insn (target, tmp);
5967 load_tls_operand (rtx x, rtx reg)
5971 if (reg == NULL_RTX)
5972 reg = gen_reg_rtx (SImode);
5974 tmp = gen_rtx_CONST (SImode, x);
5976 emit_move_insn (reg, tmp);
5982 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5984 rtx insns, label, labelno, sum;
5988 labelno = GEN_INT (pic_labelno++);
5989 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5990 label = gen_rtx_CONST (VOIDmode, label);
5992 sum = gen_rtx_UNSPEC (Pmode,
5993 gen_rtvec (4, x, GEN_INT (reloc), label,
5994 GEN_INT (TARGET_ARM ? 8 : 4)),
5996 reg = load_tls_operand (sum, reg);
5999 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6000 else if (TARGET_THUMB2)
6001 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6002 else /* TARGET_THUMB1 */
6003 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6005 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6006 Pmode, 1, reg, Pmode);
6008 insns = get_insns ();
6015 legitimize_tls_address (rtx x, rtx reg)
6017 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6018 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6022 case TLS_MODEL_GLOBAL_DYNAMIC:
6023 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6024 dest = gen_reg_rtx (Pmode);
6025 emit_libcall_block (insns, dest, ret, x);
6028 case TLS_MODEL_LOCAL_DYNAMIC:
6029 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6031 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6032 share the LDM result with other LD model accesses. */
6033 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6035 dest = gen_reg_rtx (Pmode);
6036 emit_libcall_block (insns, dest, ret, eqv);
6038 /* Load the addend. */
6039 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6041 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6042 return gen_rtx_PLUS (Pmode, dest, addend);
6044 case TLS_MODEL_INITIAL_EXEC:
6045 labelno = GEN_INT (pic_labelno++);
6046 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6047 label = gen_rtx_CONST (VOIDmode, label);
6048 sum = gen_rtx_UNSPEC (Pmode,
6049 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6050 GEN_INT (TARGET_ARM ? 8 : 4)),
6052 reg = load_tls_operand (sum, reg);
6055 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6056 else if (TARGET_THUMB2)
6057 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6060 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6061 emit_move_insn (reg, gen_const_mem (SImode, reg));
6064 tp = arm_load_tp (NULL_RTX);
6066 return gen_rtx_PLUS (Pmode, tp, reg);
6068 case TLS_MODEL_LOCAL_EXEC:
6069 tp = arm_load_tp (NULL_RTX);
6071 reg = gen_rtx_UNSPEC (Pmode,
6072 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6074 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6076 return gen_rtx_PLUS (Pmode, tp, reg);
6083 /* Try machine-dependent ways of modifying an illegitimate address
6084 to be legitimate. If we find one, return the new, valid address. */
6086 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6090 /* TODO: legitimize_address for Thumb2. */
6093 return thumb_legitimize_address (x, orig_x, mode);
6096 if (arm_tls_symbol_p (x))
6097 return legitimize_tls_address (x, NULL_RTX);
6099 if (GET_CODE (x) == PLUS)
6101 rtx xop0 = XEXP (x, 0);
6102 rtx xop1 = XEXP (x, 1);
6104 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6105 xop0 = force_reg (SImode, xop0);
6107 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6108 xop1 = force_reg (SImode, xop1);
6110 if (ARM_BASE_REGISTER_RTX_P (xop0)
6111 && GET_CODE (xop1) == CONST_INT)
6113 HOST_WIDE_INT n, low_n;
6117 /* VFP addressing modes actually allow greater offsets, but for
6118 now we just stick with the lowest common denominator. */
6120 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6132 low_n = ((mode) == TImode ? 0
6133 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6137 base_reg = gen_reg_rtx (SImode);
6138 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6139 emit_move_insn (base_reg, val);
6140 x = plus_constant (base_reg, low_n);
6142 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6143 x = gen_rtx_PLUS (SImode, xop0, xop1);
6146 /* XXX We don't allow MINUS any more -- see comment in
6147 arm_legitimate_address_outer_p (). */
6148 else if (GET_CODE (x) == MINUS)
6150 rtx xop0 = XEXP (x, 0);
6151 rtx xop1 = XEXP (x, 1);
6153 if (CONSTANT_P (xop0))
6154 xop0 = force_reg (SImode, xop0);
6156 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6157 xop1 = force_reg (SImode, xop1);
6159 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6160 x = gen_rtx_MINUS (SImode, xop0, xop1);
6163 /* Make sure to take full advantage of the pre-indexed addressing mode
6164 with absolute addresses which often allows for the base register to
6165 be factorized for multiple adjacent memory references, and it might
6166 even allows for the mini pool to be avoided entirely. */
6167 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6170 HOST_WIDE_INT mask, base, index;
6173 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6174 use a 8-bit index. So let's use a 12-bit index for SImode only and
6175 hope that arm_gen_constant will enable ldrb to use more bits. */
6176 bits = (mode == SImode) ? 12 : 8;
6177 mask = (1 << bits) - 1;
6178 base = INTVAL (x) & ~mask;
6179 index = INTVAL (x) & mask;
6180 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6182 /* It'll most probably be more efficient to generate the base
6183 with more bits set and use a negative index instead. */
6187 base_reg = force_reg (SImode, GEN_INT (base));
6188 x = plus_constant (base_reg, index);
6193 /* We need to find and carefully transform any SYMBOL and LABEL
6194 references; so go back to the original address expression. */
6195 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6197 if (new_x != orig_x)
6205 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6206 to be legitimate. If we find one, return the new, valid address. */
6208 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6210 if (arm_tls_symbol_p (x))
6211 return legitimize_tls_address (x, NULL_RTX);
6213 if (GET_CODE (x) == PLUS
6214 && GET_CODE (XEXP (x, 1)) == CONST_INT
6215 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6216 || INTVAL (XEXP (x, 1)) < 0))
6218 rtx xop0 = XEXP (x, 0);
6219 rtx xop1 = XEXP (x, 1);
6220 HOST_WIDE_INT offset = INTVAL (xop1);
6222 /* Try and fold the offset into a biasing of the base register and
6223 then offsetting that. Don't do this when optimizing for space
6224 since it can cause too many CSEs. */
6225 if (optimize_size && offset >= 0
6226 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6228 HOST_WIDE_INT delta;
6231 delta = offset - (256 - GET_MODE_SIZE (mode));
6232 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6233 delta = 31 * GET_MODE_SIZE (mode);
6235 delta = offset & (~31 * GET_MODE_SIZE (mode));
6237 xop0 = force_operand (plus_constant (xop0, offset - delta),
6239 x = plus_constant (xop0, delta);
6241 else if (offset < 0 && offset > -256)
6242 /* Small negative offsets are best done with a subtract before the
6243 dereference, forcing these into a register normally takes two
6245 x = force_operand (x, NULL_RTX);
6248 /* For the remaining cases, force the constant into a register. */
6249 xop1 = force_reg (SImode, xop1);
6250 x = gen_rtx_PLUS (SImode, xop0, xop1);
6253 else if (GET_CODE (x) == PLUS
6254 && s_register_operand (XEXP (x, 1), SImode)
6255 && !s_register_operand (XEXP (x, 0), SImode))
6257 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6259 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6264 /* We need to find and carefully transform any SYMBOL and LABEL
6265 references; so go back to the original address expression. */
6266 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6268 if (new_x != orig_x)
6276 thumb_legitimize_reload_address (rtx *x_p,
6277 enum machine_mode mode,
6278 int opnum, int type,
6279 int ind_levels ATTRIBUTE_UNUSED)
6283 if (GET_CODE (x) == PLUS
6284 && GET_MODE_SIZE (mode) < 4
6285 && REG_P (XEXP (x, 0))
6286 && XEXP (x, 0) == stack_pointer_rtx
6287 && GET_CODE (XEXP (x, 1)) == CONST_INT
6288 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6293 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6294 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6298 /* If both registers are hi-regs, then it's better to reload the
6299 entire expression rather than each register individually. That
6300 only requires one reload register rather than two. */
6301 if (GET_CODE (x) == PLUS
6302 && REG_P (XEXP (x, 0))
6303 && REG_P (XEXP (x, 1))
6304 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6305 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6310 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6311 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6318 /* Test for various thread-local symbols. */
6320 /* Return TRUE if X is a thread-local symbol. */
6323 arm_tls_symbol_p (rtx x)
6325 if (! TARGET_HAVE_TLS)
6328 if (GET_CODE (x) != SYMBOL_REF)
6331 return SYMBOL_REF_TLS_MODEL (x) != 0;
6334 /* Helper for arm_tls_referenced_p. */
6337 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6339 if (GET_CODE (*x) == SYMBOL_REF)
6340 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6342 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6343 TLS offsets, not real symbol references. */
6344 if (GET_CODE (*x) == UNSPEC
6345 && XINT (*x, 1) == UNSPEC_TLS)
6351 /* Return TRUE if X contains any TLS symbol references. */
6354 arm_tls_referenced_p (rtx x)
6356 if (! TARGET_HAVE_TLS)
6359 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6362 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6365 arm_cannot_force_const_mem (rtx x)
6369 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6371 split_const (x, &base, &offset);
6372 if (GET_CODE (base) == SYMBOL_REF
6373 && !offset_within_block_p (base, INTVAL (offset)))
6376 return arm_tls_referenced_p (x);
6379 #define REG_OR_SUBREG_REG(X) \
6380 (GET_CODE (X) == REG \
6381 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6383 #define REG_OR_SUBREG_RTX(X) \
6384 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6387 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6389 enum machine_mode mode = GET_MODE (x);
6403 return COSTS_N_INSNS (1);
6406 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6409 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6416 return COSTS_N_INSNS (2) + cycles;
6418 return COSTS_N_INSNS (1) + 16;
6421 return (COSTS_N_INSNS (1)
6422 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6423 + GET_CODE (SET_DEST (x)) == MEM));
6428 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6430 if (thumb_shiftable_const (INTVAL (x)))
6431 return COSTS_N_INSNS (2);
6432 return COSTS_N_INSNS (3);
6434 else if ((outer == PLUS || outer == COMPARE)
6435 && INTVAL (x) < 256 && INTVAL (x) > -256)
6437 else if ((outer == IOR || outer == XOR || outer == AND)
6438 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6439 return COSTS_N_INSNS (1);
6440 else if (outer == AND)
6443 /* This duplicates the tests in the andsi3 expander. */
6444 for (i = 9; i <= 31; i++)
6445 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6446 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6447 return COSTS_N_INSNS (2);
6449 else if (outer == ASHIFT || outer == ASHIFTRT
6450 || outer == LSHIFTRT)
6452 return COSTS_N_INSNS (2);
6458 return COSTS_N_INSNS (3);
6476 /* XXX another guess. */
6477 /* Memory costs quite a lot for the first word, but subsequent words
6478 load at the equivalent of a single insn each. */
6479 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6480 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6485 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6491 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6492 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6498 return total + COSTS_N_INSNS (1);
6500 /* Assume a two-shift sequence. Increase the cost slightly so
6501 we prefer actual shifts over an extend operation. */
6502 return total + 1 + COSTS_N_INSNS (2);
6510 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6512 enum machine_mode mode = GET_MODE (x);
6513 enum rtx_code subcode;
6515 enum rtx_code code = GET_CODE (x);
6521 /* Memory costs quite a lot for the first word, but subsequent words
6522 load at the equivalent of a single insn each. */
6523 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6530 if (TARGET_HARD_FLOAT && mode == SFmode)
6531 *total = COSTS_N_INSNS (2);
6532 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6533 *total = COSTS_N_INSNS (4);
6535 *total = COSTS_N_INSNS (20);
6539 if (GET_CODE (XEXP (x, 1)) == REG)
6540 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6541 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6542 *total = rtx_cost (XEXP (x, 1), code, speed);
6548 *total += COSTS_N_INSNS (4);
6553 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6554 *total += rtx_cost (XEXP (x, 0), code, speed);
6557 *total += COSTS_N_INSNS (3);
6561 *total += COSTS_N_INSNS (1);
6562 /* Increase the cost of complex shifts because they aren't any faster,
6563 and reduce dual issue opportunities. */
6564 if (arm_tune_cortex_a9
6565 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6573 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6574 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6575 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6577 *total += rtx_cost (XEXP (x, 1), code, speed);
6581 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6582 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6584 *total += rtx_cost (XEXP (x, 0), code, speed);
6591 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6593 if (TARGET_HARD_FLOAT
6595 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6597 *total = COSTS_N_INSNS (1);
6598 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6599 && arm_const_double_rtx (XEXP (x, 0)))
6601 *total += rtx_cost (XEXP (x, 1), code, speed);
6605 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6606 && arm_const_double_rtx (XEXP (x, 1)))
6608 *total += rtx_cost (XEXP (x, 0), code, speed);
6614 *total = COSTS_N_INSNS (20);
6618 *total = COSTS_N_INSNS (1);
6619 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6620 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6622 *total += rtx_cost (XEXP (x, 1), code, speed);
6626 subcode = GET_CODE (XEXP (x, 1));
6627 if (subcode == ASHIFT || subcode == ASHIFTRT
6628 || subcode == LSHIFTRT
6629 || subcode == ROTATE || subcode == ROTATERT)
6631 *total += rtx_cost (XEXP (x, 0), code, speed);
6632 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6636 /* A shift as a part of RSB costs no more than RSB itself. */
6637 if (GET_CODE (XEXP (x, 0)) == MULT
6638 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6640 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6641 *total += rtx_cost (XEXP (x, 1), code, speed);
6646 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6648 *total += rtx_cost (XEXP (x, 0), code, speed);
6649 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6653 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6654 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6656 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6657 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6658 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6659 *total += COSTS_N_INSNS (1);
6667 if (code == PLUS && arm_arch6 && mode == SImode
6668 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6669 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6671 *total = COSTS_N_INSNS (1);
6672 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6674 *total += rtx_cost (XEXP (x, 1), code, speed);
6678 /* MLA: All arguments must be registers. We filter out
6679 multiplication by a power of two, so that we fall down into
6681 if (GET_CODE (XEXP (x, 0)) == MULT
6682 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6684 /* The cost comes from the cost of the multiply. */
6688 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6690 if (TARGET_HARD_FLOAT
6692 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6694 *total = COSTS_N_INSNS (1);
6695 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6696 && arm_const_double_rtx (XEXP (x, 1)))
6698 *total += rtx_cost (XEXP (x, 0), code, speed);
6705 *total = COSTS_N_INSNS (20);
6709 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6710 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6712 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6713 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6714 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6715 *total += COSTS_N_INSNS (1);
6721 case AND: case XOR: case IOR:
6723 /* Normally the frame registers will be spilt into reg+const during
6724 reload, so it is a bad idea to combine them with other instructions,
6725 since then they might not be moved outside of loops. As a compromise
6726 we allow integration with ops that have a constant as their second
6728 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6729 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6730 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6731 *total = COSTS_N_INSNS (1);
6735 *total += COSTS_N_INSNS (2);
6736 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6737 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6739 *total += rtx_cost (XEXP (x, 0), code, speed);
6746 *total += COSTS_N_INSNS (1);
6747 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6748 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6750 *total += rtx_cost (XEXP (x, 0), code, speed);
6753 subcode = GET_CODE (XEXP (x, 0));
6754 if (subcode == ASHIFT || subcode == ASHIFTRT
6755 || subcode == LSHIFTRT
6756 || subcode == ROTATE || subcode == ROTATERT)
6758 *total += rtx_cost (XEXP (x, 1), code, speed);
6759 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6764 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6766 *total += rtx_cost (XEXP (x, 1), code, speed);
6767 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6771 if (subcode == UMIN || subcode == UMAX
6772 || subcode == SMIN || subcode == SMAX)
6774 *total = COSTS_N_INSNS (3);
6781 /* This should have been handled by the CPU specific routines. */
6785 if (arm_arch3m && mode == SImode
6786 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6787 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6788 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6789 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6790 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6791 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6793 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6796 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6800 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6802 if (TARGET_HARD_FLOAT
6804 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6806 *total = COSTS_N_INSNS (1);
6809 *total = COSTS_N_INSNS (2);
6815 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6816 if (mode == SImode && code == NOT)
6818 subcode = GET_CODE (XEXP (x, 0));
6819 if (subcode == ASHIFT || subcode == ASHIFTRT
6820 || subcode == LSHIFTRT
6821 || subcode == ROTATE || subcode == ROTATERT
6823 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6825 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6826 /* Register shifts cost an extra cycle. */
6827 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6828 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6837 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6839 *total = COSTS_N_INSNS (4);
6843 operand = XEXP (x, 0);
6845 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6846 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6847 && GET_CODE (XEXP (operand, 0)) == REG
6848 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6849 *total += COSTS_N_INSNS (1);
6850 *total += (rtx_cost (XEXP (x, 1), code, speed)
6851 + rtx_cost (XEXP (x, 2), code, speed));
6855 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6857 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6863 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6864 && mode == SImode && XEXP (x, 1) == const0_rtx)
6866 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6872 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6873 && mode == SImode && XEXP (x, 1) == const0_rtx)
6875 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6895 /* SCC insns. In the case where the comparison has already been
6896 performed, then they cost 2 instructions. Otherwise they need
6897 an additional comparison before them. */
6898 *total = COSTS_N_INSNS (2);
6899 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6906 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6912 *total += COSTS_N_INSNS (1);
6913 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6914 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6916 *total += rtx_cost (XEXP (x, 0), code, speed);
6920 subcode = GET_CODE (XEXP (x, 0));
6921 if (subcode == ASHIFT || subcode == ASHIFTRT
6922 || subcode == LSHIFTRT
6923 || subcode == ROTATE || subcode == ROTATERT)
6925 *total += rtx_cost (XEXP (x, 1), code, speed);
6926 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6931 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6933 *total += rtx_cost (XEXP (x, 1), code, speed);
6934 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6944 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6945 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6946 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6947 *total += rtx_cost (XEXP (x, 1), code, speed);
6951 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6953 if (TARGET_HARD_FLOAT
6955 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6957 *total = COSTS_N_INSNS (1);
6960 *total = COSTS_N_INSNS (20);
6963 *total = COSTS_N_INSNS (1);
6965 *total += COSTS_N_INSNS (3);
6971 if (GET_MODE_CLASS (mode) == MODE_INT)
6973 rtx op = XEXP (x, 0);
6974 enum machine_mode opmode = GET_MODE (op);
6977 *total += COSTS_N_INSNS (1);
6979 if (opmode != SImode)
6983 /* If !arm_arch4, we use one of the extendhisi2_mem
6984 or movhi_bytes patterns for HImode. For a QImode
6985 sign extension, we first zero-extend from memory
6986 and then perform a shift sequence. */
6987 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6988 *total += COSTS_N_INSNS (2);
6991 *total += COSTS_N_INSNS (1);
6993 /* We don't have the necessary insn, so we need to perform some
6995 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6996 /* An and with constant 255. */
6997 *total += COSTS_N_INSNS (1);
6999 /* A shift sequence. Increase costs slightly to avoid
7000 combining two shifts into an extend operation. */
7001 *total += COSTS_N_INSNS (2) + 1;
7007 switch (GET_MODE (XEXP (x, 0)))
7014 *total = COSTS_N_INSNS (1);
7024 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7028 if (const_ok_for_arm (INTVAL (x))
7029 || const_ok_for_arm (~INTVAL (x)))
7030 *total = COSTS_N_INSNS (1);
7032 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7033 INTVAL (x), NULL_RTX,
7040 *total = COSTS_N_INSNS (3);
7044 *total = COSTS_N_INSNS (1);
7048 *total = COSTS_N_INSNS (1);
7049 *total += rtx_cost (XEXP (x, 0), code, speed);
7053 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7054 && (mode == SFmode || !TARGET_VFP_SINGLE))
7055 *total = COSTS_N_INSNS (1);
7057 *total = COSTS_N_INSNS (4);
7061 *total = COSTS_N_INSNS (4);
7066 /* Estimates the size cost of thumb1 instructions.
7067 For now most of the code is copied from thumb1_rtx_costs. We need more
7068 fine grain tuning when we have more related test cases. */
7070 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7072 enum machine_mode mode = GET_MODE (x);
7085 return COSTS_N_INSNS (1);
7088 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7090 /* Thumb1 mul instruction can't operate on const. We must Load it
7091 into a register first. */
7092 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7093 return COSTS_N_INSNS (1) + const_size;
7095 return COSTS_N_INSNS (1);
7098 return (COSTS_N_INSNS (1)
7099 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7100 + GET_CODE (SET_DEST (x)) == MEM));
7105 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7106 return COSTS_N_INSNS (1);
7107 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7108 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7109 return COSTS_N_INSNS (2);
7110 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7111 if (thumb_shiftable_const (INTVAL (x)))
7112 return COSTS_N_INSNS (2);
7113 return COSTS_N_INSNS (3);
7115 else if ((outer == PLUS || outer == COMPARE)
7116 && INTVAL (x) < 256 && INTVAL (x) > -256)
7118 else if ((outer == IOR || outer == XOR || outer == AND)
7119 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7120 return COSTS_N_INSNS (1);
7121 else if (outer == AND)
7124 /* This duplicates the tests in the andsi3 expander. */
7125 for (i = 9; i <= 31; i++)
7126 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7127 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7128 return COSTS_N_INSNS (2);
7130 else if (outer == ASHIFT || outer == ASHIFTRT
7131 || outer == LSHIFTRT)
7133 return COSTS_N_INSNS (2);
7139 return COSTS_N_INSNS (3);
7157 /* XXX another guess. */
7158 /* Memory costs quite a lot for the first word, but subsequent words
7159 load at the equivalent of a single insn each. */
7160 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7161 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7166 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7171 /* XXX still guessing. */
7172 switch (GET_MODE (XEXP (x, 0)))
7175 return (1 + (mode == DImode ? 4 : 0)
7176 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7179 return (4 + (mode == DImode ? 4 : 0)
7180 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7183 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7194 /* RTX costs when optimizing for size. */
7196 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7199 enum machine_mode mode = GET_MODE (x);
7202 *total = thumb1_size_rtx_costs (x, code, outer_code);
7206 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7210 /* A memory access costs 1 insn if the mode is small, or the address is
7211 a single register, otherwise it costs one insn per word. */
7212 if (REG_P (XEXP (x, 0)))
7213 *total = COSTS_N_INSNS (1);
7215 && GET_CODE (XEXP (x, 0)) == PLUS
7216 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7217 /* This will be split into two instructions.
7218 See arm.md:calculate_pic_address. */
7219 *total = COSTS_N_INSNS (2);
7221 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7228 /* Needs a libcall, so it costs about this. */
7229 *total = COSTS_N_INSNS (2);
7233 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7235 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7243 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7245 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7248 else if (mode == SImode)
7250 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7251 /* Slightly disparage register shifts, but not by much. */
7252 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7253 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7257 /* Needs a libcall. */
7258 *total = COSTS_N_INSNS (2);
7262 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7263 && (mode == SFmode || !TARGET_VFP_SINGLE))
7265 *total = COSTS_N_INSNS (1);
7271 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7272 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7274 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7275 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7276 || subcode1 == ROTATE || subcode1 == ROTATERT
7277 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7278 || subcode1 == ASHIFTRT)
7280 /* It's just the cost of the two operands. */
7285 *total = COSTS_N_INSNS (1);
7289 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7293 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7294 && (mode == SFmode || !TARGET_VFP_SINGLE))
7296 *total = COSTS_N_INSNS (1);
7300 /* A shift as a part of ADD costs nothing. */
7301 if (GET_CODE (XEXP (x, 0)) == MULT
7302 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7304 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7305 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7306 *total += rtx_cost (XEXP (x, 1), code, false);
7311 case AND: case XOR: case IOR:
7314 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7316 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7317 || subcode == LSHIFTRT || subcode == ASHIFTRT
7318 || (code == AND && subcode == NOT))
7320 /* It's just the cost of the two operands. */
7326 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7330 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7334 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7335 && (mode == SFmode || !TARGET_VFP_SINGLE))
7337 *total = COSTS_N_INSNS (1);
7343 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7352 if (cc_register (XEXP (x, 0), VOIDmode))
7355 *total = COSTS_N_INSNS (1);
7359 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7360 && (mode == SFmode || !TARGET_VFP_SINGLE))
7361 *total = COSTS_N_INSNS (1);
7363 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7368 return arm_rtx_costs_1 (x, outer_code, total, 0);
7371 if (const_ok_for_arm (INTVAL (x)))
7372 /* A multiplication by a constant requires another instruction
7373 to load the constant to a register. */
7374 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7376 else if (const_ok_for_arm (~INTVAL (x)))
7377 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7378 else if (const_ok_for_arm (-INTVAL (x)))
7380 if (outer_code == COMPARE || outer_code == PLUS
7381 || outer_code == MINUS)
7384 *total = COSTS_N_INSNS (1);
7387 *total = COSTS_N_INSNS (2);
7393 *total = COSTS_N_INSNS (2);
7397 *total = COSTS_N_INSNS (4);
7402 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7403 cost of these slightly. */
7404 *total = COSTS_N_INSNS (1) + 1;
7408 if (mode != VOIDmode)
7409 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7411 *total = COSTS_N_INSNS (4); /* How knows? */
7416 /* RTX costs when optimizing for size. */
7418 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7422 return arm_size_rtx_costs (x, (enum rtx_code) code,
7423 (enum rtx_code) outer_code, total);
7425 return current_tune->rtx_costs (x, (enum rtx_code) code,
7426 (enum rtx_code) outer_code,
7430 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7431 supported on any "slowmul" cores, so it can be ignored. */
7434 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7435 int *total, bool speed)
7437 enum machine_mode mode = GET_MODE (x);
7441 *total = thumb1_rtx_costs (x, code, outer_code);
7448 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7451 *total = COSTS_N_INSNS (20);
7455 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7457 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7458 & (unsigned HOST_WIDE_INT) 0xffffffff);
7459 int cost, const_ok = const_ok_for_arm (i);
7460 int j, booth_unit_size;
7462 /* Tune as appropriate. */
7463 cost = const_ok ? 4 : 8;
7464 booth_unit_size = 2;
7465 for (j = 0; i && j < 32; j += booth_unit_size)
7467 i >>= booth_unit_size;
7471 *total = COSTS_N_INSNS (cost);
7472 *total += rtx_cost (XEXP (x, 0), code, speed);
7476 *total = COSTS_N_INSNS (20);
7480 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7485 /* RTX cost for cores with a fast multiply unit (M variants). */
7488 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7489 int *total, bool speed)
7491 enum machine_mode mode = GET_MODE (x);
7495 *total = thumb1_rtx_costs (x, code, outer_code);
7499 /* ??? should thumb2 use different costs? */
7503 /* There is no point basing this on the tuning, since it is always the
7504 fast variant if it exists at all. */
7506 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7507 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7508 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7510 *total = COSTS_N_INSNS(2);
7517 *total = COSTS_N_INSNS (5);
7521 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7523 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7524 & (unsigned HOST_WIDE_INT) 0xffffffff);
7525 int cost, const_ok = const_ok_for_arm (i);
7526 int j, booth_unit_size;
7528 /* Tune as appropriate. */
7529 cost = const_ok ? 4 : 8;
7530 booth_unit_size = 8;
7531 for (j = 0; i && j < 32; j += booth_unit_size)
7533 i >>= booth_unit_size;
7537 *total = COSTS_N_INSNS(cost);
7543 *total = COSTS_N_INSNS (4);
7547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7549 if (TARGET_HARD_FLOAT
7551 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7553 *total = COSTS_N_INSNS (1);
7558 /* Requires a lib call */
7559 *total = COSTS_N_INSNS (20);
7563 return arm_rtx_costs_1 (x, outer_code, total, speed);
7568 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7569 so it can be ignored. */
7572 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7573 int *total, bool speed)
7575 enum machine_mode mode = GET_MODE (x);
7579 *total = thumb1_rtx_costs (x, code, outer_code);
7586 if (GET_CODE (XEXP (x, 0)) != MULT)
7587 return arm_rtx_costs_1 (x, outer_code, total, speed);
7589 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7590 will stall until the multiplication is complete. */
7591 *total = COSTS_N_INSNS (3);
7595 /* There is no point basing this on the tuning, since it is always the
7596 fast variant if it exists at all. */
7598 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7599 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7600 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7602 *total = COSTS_N_INSNS (2);
7609 *total = COSTS_N_INSNS (5);
7613 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7615 /* If operand 1 is a constant we can more accurately
7616 calculate the cost of the multiply. The multiplier can
7617 retire 15 bits on the first cycle and a further 12 on the
7618 second. We do, of course, have to load the constant into
7619 a register first. */
7620 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7621 /* There's a general overhead of one cycle. */
7623 unsigned HOST_WIDE_INT masked_const;
7628 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7630 masked_const = i & 0xffff8000;
7631 if (masked_const != 0)
7634 masked_const = i & 0xf8000000;
7635 if (masked_const != 0)
7638 *total = COSTS_N_INSNS (cost);
7644 *total = COSTS_N_INSNS (3);
7648 /* Requires a lib call */
7649 *total = COSTS_N_INSNS (20);
7653 return arm_rtx_costs_1 (x, outer_code, total, speed);
7658 /* RTX costs for 9e (and later) cores. */
7661 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7662 int *total, bool speed)
7664 enum machine_mode mode = GET_MODE (x);
7671 *total = COSTS_N_INSNS (3);
7675 *total = thumb1_rtx_costs (x, code, outer_code);
7683 /* There is no point basing this on the tuning, since it is always the
7684 fast variant if it exists at all. */
7686 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7687 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7688 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7690 *total = COSTS_N_INSNS (2);
7697 *total = COSTS_N_INSNS (5);
7703 *total = COSTS_N_INSNS (2);
7707 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7709 if (TARGET_HARD_FLOAT
7711 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7713 *total = COSTS_N_INSNS (1);
7718 *total = COSTS_N_INSNS (20);
7722 return arm_rtx_costs_1 (x, outer_code, total, speed);
7725 /* All address computations that can be done are free, but rtx cost returns
7726 the same for practically all of them. So we weight the different types
7727 of address here in the order (most pref first):
7728 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7730 arm_arm_address_cost (rtx x)
7732 enum rtx_code c = GET_CODE (x);
7734 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7736 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7741 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7744 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7754 arm_thumb_address_cost (rtx x)
7756 enum rtx_code c = GET_CODE (x);
7761 && GET_CODE (XEXP (x, 0)) == REG
7762 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7769 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7771 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7774 /* Adjust cost hook for XScale. */
7776 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7778 /* Some true dependencies can have a higher cost depending
7779 on precisely how certain input operands are used. */
7780 if (REG_NOTE_KIND(link) == 0
7781 && recog_memoized (insn) >= 0
7782 && recog_memoized (dep) >= 0)
7784 int shift_opnum = get_attr_shift (insn);
7785 enum attr_type attr_type = get_attr_type (dep);
7787 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7788 operand for INSN. If we have a shifted input operand and the
7789 instruction we depend on is another ALU instruction, then we may
7790 have to account for an additional stall. */
7791 if (shift_opnum != 0
7792 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7794 rtx shifted_operand;
7797 /* Get the shifted operand. */
7798 extract_insn (insn);
7799 shifted_operand = recog_data.operand[shift_opnum];
7801 /* Iterate over all the operands in DEP. If we write an operand
7802 that overlaps with SHIFTED_OPERAND, then we have increase the
7803 cost of this dependency. */
7805 preprocess_constraints ();
7806 for (opno = 0; opno < recog_data.n_operands; opno++)
7808 /* We can ignore strict inputs. */
7809 if (recog_data.operand_type[opno] == OP_IN)
7812 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7824 /* Adjust cost hook for Cortex A9. */
7826 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7828 switch (REG_NOTE_KIND (link))
7835 case REG_DEP_OUTPUT:
7836 if (recog_memoized (insn) >= 0
7837 && recog_memoized (dep) >= 0)
7839 if (GET_CODE (PATTERN (insn)) == SET)
7842 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7844 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7846 enum attr_type attr_type_insn = get_attr_type (insn);
7847 enum attr_type attr_type_dep = get_attr_type (dep);
7849 /* By default all dependencies of the form
7852 have an extra latency of 1 cycle because
7853 of the input and output dependency in this
7854 case. However this gets modeled as an true
7855 dependency and hence all these checks. */
7856 if (REG_P (SET_DEST (PATTERN (insn)))
7857 && REG_P (SET_DEST (PATTERN (dep)))
7858 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7859 SET_DEST (PATTERN (dep))))
7861 /* FMACS is a special case where the dependant
7862 instruction can be issued 3 cycles before
7863 the normal latency in case of an output
7865 if ((attr_type_insn == TYPE_FMACS
7866 || attr_type_insn == TYPE_FMACD)
7867 && (attr_type_dep == TYPE_FMACS
7868 || attr_type_dep == TYPE_FMACD))
7870 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7871 *cost = insn_default_latency (dep) - 3;
7873 *cost = insn_default_latency (dep);
7878 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7879 *cost = insn_default_latency (dep) + 1;
7881 *cost = insn_default_latency (dep);
7897 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7898 It corrects the value of COST based on the relationship between
7899 INSN and DEP through the dependence LINK. It returns the new
7900 value. There is a per-core adjust_cost hook to adjust scheduler costs
7901 and the per-core hook can choose to completely override the generic
7902 adjust_cost function. Only put bits of code into arm_adjust_cost that
7903 are common across all cores. */
7905 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7909 /* When generating Thumb-1 code, we want to place flag-setting operations
7910 close to a conditional branch which depends on them, so that we can
7911 omit the comparison. */
7913 && REG_NOTE_KIND (link) == 0
7914 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7915 && recog_memoized (dep) >= 0
7916 && get_attr_conds (dep) == CONDS_SET)
7919 if (current_tune->sched_adjust_cost != NULL)
7921 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7925 /* XXX This is not strictly true for the FPA. */
7926 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7927 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7930 /* Call insns don't incur a stall, even if they follow a load. */
7931 if (REG_NOTE_KIND (link) == 0
7932 && GET_CODE (insn) == CALL_INSN)
7935 if ((i_pat = single_set (insn)) != NULL
7936 && GET_CODE (SET_SRC (i_pat)) == MEM
7937 && (d_pat = single_set (dep)) != NULL
7938 && GET_CODE (SET_DEST (d_pat)) == MEM)
7940 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7941 /* This is a load after a store, there is no conflict if the load reads
7942 from a cached area. Assume that loads from the stack, and from the
7943 constant pool are cached, and that others will miss. This is a
7946 if ((GET_CODE (src_mem) == SYMBOL_REF
7947 && CONSTANT_POOL_ADDRESS_P (src_mem))
7948 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7949 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7950 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7957 static int fp_consts_inited = 0;
7959 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7960 static const char * const strings_fp[8] =
7963 "4", "5", "0.5", "10"
7966 static REAL_VALUE_TYPE values_fp[8];
7969 init_fp_table (void)
7975 fp_consts_inited = 1;
7977 fp_consts_inited = 8;
7979 for (i = 0; i < fp_consts_inited; i++)
7981 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7986 /* Return TRUE if rtx X is a valid immediate FP constant. */
7988 arm_const_double_rtx (rtx x)
7993 if (!fp_consts_inited)
7996 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7997 if (REAL_VALUE_MINUS_ZERO (r))
8000 for (i = 0; i < fp_consts_inited; i++)
8001 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8007 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8009 neg_const_double_rtx_ok_for_fpa (rtx x)
8014 if (!fp_consts_inited)
8017 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8018 r = real_value_negate (&r);
8019 if (REAL_VALUE_MINUS_ZERO (r))
8022 for (i = 0; i < 8; i++)
8023 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8030 /* VFPv3 has a fairly wide range of representable immediates, formed from
8031 "quarter-precision" floating-point values. These can be evaluated using this
8032 formula (with ^ for exponentiation):
8036 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8037 16 <= n <= 31 and 0 <= r <= 7.
8039 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8041 - A (most-significant) is the sign bit.
8042 - BCD are the exponent (encoded as r XOR 3).
8043 - EFGH are the mantissa (encoded as n - 16).
8046 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8047 fconst[sd] instruction, or -1 if X isn't suitable. */
8049 vfp3_const_double_index (rtx x)
8051 REAL_VALUE_TYPE r, m;
8053 unsigned HOST_WIDE_INT mantissa, mant_hi;
8054 unsigned HOST_WIDE_INT mask;
8055 HOST_WIDE_INT m1, m2;
8056 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8058 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8061 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8063 /* We can't represent these things, so detect them first. */
8064 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8067 /* Extract sign, exponent and mantissa. */
8068 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8069 r = real_value_abs (&r);
8070 exponent = REAL_EXP (&r);
8071 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8072 highest (sign) bit, with a fixed binary point at bit point_pos.
8073 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8074 bits for the mantissa, this may fail (low bits would be lost). */
8075 real_ldexp (&m, &r, point_pos - exponent);
8076 REAL_VALUE_TO_INT (&m1, &m2, m);
8080 /* If there are bits set in the low part of the mantissa, we can't
8081 represent this value. */
8085 /* Now make it so that mantissa contains the most-significant bits, and move
8086 the point_pos to indicate that the least-significant bits have been
8088 point_pos -= HOST_BITS_PER_WIDE_INT;
8091 /* We can permit four significant bits of mantissa only, plus a high bit
8092 which is always 1. */
8093 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8094 if ((mantissa & mask) != 0)
8097 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8098 mantissa >>= point_pos - 5;
8100 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8101 floating-point immediate zero with Neon using an integer-zero load, but
8102 that case is handled elsewhere.) */
8106 gcc_assert (mantissa >= 16 && mantissa <= 31);
8108 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8109 normalized significands are in the range [1, 2). (Our mantissa is shifted
8110 left 4 places at this point relative to normalized IEEE754 values). GCC
8111 internally uses [0.5, 1) (see real.c), so the exponent returned from
8112 REAL_EXP must be altered. */
8113 exponent = 5 - exponent;
8115 if (exponent < 0 || exponent > 7)
8118 /* Sign, mantissa and exponent are now in the correct form to plug into the
8119 formula described in the comment above. */
8120 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8123 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8125 vfp3_const_double_rtx (rtx x)
8130 return vfp3_const_double_index (x) != -1;
8133 /* Recognize immediates which can be used in various Neon instructions. Legal
8134 immediates are described by the following table (for VMVN variants, the
8135 bitwise inverse of the constant shown is recognized. In either case, VMOV
8136 is output and the correct instruction to use for a given constant is chosen
8137 by the assembler). The constant shown is replicated across all elements of
8138 the destination vector.
8140 insn elems variant constant (binary)
8141 ---- ----- ------- -----------------
8142 vmov i32 0 00000000 00000000 00000000 abcdefgh
8143 vmov i32 1 00000000 00000000 abcdefgh 00000000
8144 vmov i32 2 00000000 abcdefgh 00000000 00000000
8145 vmov i32 3 abcdefgh 00000000 00000000 00000000
8146 vmov i16 4 00000000 abcdefgh
8147 vmov i16 5 abcdefgh 00000000
8148 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8149 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8150 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8151 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8152 vmvn i16 10 00000000 abcdefgh
8153 vmvn i16 11 abcdefgh 00000000
8154 vmov i32 12 00000000 00000000 abcdefgh 11111111
8155 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8156 vmov i32 14 00000000 abcdefgh 11111111 11111111
8157 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8159 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8160 eeeeeeee ffffffff gggggggg hhhhhhhh
8161 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8163 For case 18, B = !b. Representable values are exactly those accepted by
8164 vfp3_const_double_index, but are output as floating-point numbers rather
8167 Variants 0-5 (inclusive) may also be used as immediates for the second
8168 operand of VORR/VBIC instructions.
8170 The INVERSE argument causes the bitwise inverse of the given operand to be
8171 recognized instead (used for recognizing legal immediates for the VAND/VORN
8172 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8173 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8174 output, rather than the real insns vbic/vorr).
8176 INVERSE makes no difference to the recognition of float vectors.
8178 The return value is the variant of immediate as shown in the above table, or
8179 -1 if the given value doesn't match any of the listed patterns.
8182 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8183 rtx *modconst, int *elementwidth)
8185 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8187 for (i = 0; i < idx; i += (STRIDE)) \
8192 immtype = (CLASS); \
8193 elsize = (ELSIZE); \
8197 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8198 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8199 unsigned char bytes[16];
8200 int immtype = -1, matches;
8201 unsigned int invmask = inverse ? 0xff : 0;
8203 /* Vectors of float constants. */
8204 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8206 rtx el0 = CONST_VECTOR_ELT (op, 0);
8209 if (!vfp3_const_double_rtx (el0))
8212 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8214 for (i = 1; i < n_elts; i++)
8216 rtx elt = CONST_VECTOR_ELT (op, i);
8219 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8221 if (!REAL_VALUES_EQUAL (r0, re))
8226 *modconst = CONST_VECTOR_ELT (op, 0);
8234 /* Splat vector constant out into a byte vector. */
8235 for (i = 0; i < n_elts; i++)
8237 rtx el = CONST_VECTOR_ELT (op, i);
8238 unsigned HOST_WIDE_INT elpart;
8239 unsigned int part, parts;
8241 if (GET_CODE (el) == CONST_INT)
8243 elpart = INTVAL (el);
8246 else if (GET_CODE (el) == CONST_DOUBLE)
8248 elpart = CONST_DOUBLE_LOW (el);
8254 for (part = 0; part < parts; part++)
8257 for (byte = 0; byte < innersize; byte++)
8259 bytes[idx++] = (elpart & 0xff) ^ invmask;
8260 elpart >>= BITS_PER_UNIT;
8262 if (GET_CODE (el) == CONST_DOUBLE)
8263 elpart = CONST_DOUBLE_HIGH (el);
8268 gcc_assert (idx == GET_MODE_SIZE (mode));
8272 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8273 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8275 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8276 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8278 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8279 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8281 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8282 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8284 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8286 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8288 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8289 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8291 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8292 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8294 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8295 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8297 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8298 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8300 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8302 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8304 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8305 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8307 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8308 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8310 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8311 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8313 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8314 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8316 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8318 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8319 && bytes[i] == bytes[(i + 8) % idx]);
8327 *elementwidth = elsize;
8331 unsigned HOST_WIDE_INT imm = 0;
8333 /* Un-invert bytes of recognized vector, if necessary. */
8335 for (i = 0; i < idx; i++)
8336 bytes[i] ^= invmask;
8340 /* FIXME: Broken on 32-bit H_W_I hosts. */
8341 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8343 for (i = 0; i < 8; i++)
8344 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8345 << (i * BITS_PER_UNIT);
8347 *modconst = GEN_INT (imm);
8351 unsigned HOST_WIDE_INT imm = 0;
8353 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8354 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8356 *modconst = GEN_INT (imm);
8364 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8365 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8366 float elements), and a modified constant (whatever should be output for a
8367 VMOV) in *MODCONST. */
8370 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8371 rtx *modconst, int *elementwidth)
8375 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8381 *modconst = tmpconst;
8384 *elementwidth = tmpwidth;
8389 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8390 the immediate is valid, write a constant suitable for using as an operand
8391 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8392 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8395 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8396 rtx *modconst, int *elementwidth)
8400 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8402 if (retval < 0 || retval > 5)
8406 *modconst = tmpconst;
8409 *elementwidth = tmpwidth;
8414 /* Return a string suitable for output of Neon immediate logic operation
8418 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8419 int inverse, int quad)
8421 int width, is_valid;
8422 static char templ[40];
8424 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8426 gcc_assert (is_valid != 0);
8429 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8431 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8436 /* Output a sequence of pairwise operations to implement a reduction.
8437 NOTE: We do "too much work" here, because pairwise operations work on two
8438 registers-worth of operands in one go. Unfortunately we can't exploit those
8439 extra calculations to do the full operation in fewer steps, I don't think.
8440 Although all vector elements of the result but the first are ignored, we
8441 actually calculate the same result in each of the elements. An alternative
8442 such as initially loading a vector with zero to use as each of the second
8443 operands would use up an additional register and take an extra instruction,
8444 for no particular gain. */
8447 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8448 rtx (*reduc) (rtx, rtx, rtx))
8450 enum machine_mode inner = GET_MODE_INNER (mode);
8451 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8454 for (i = parts / 2; i >= 1; i /= 2)
8456 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8457 emit_insn (reduc (dest, tmpsum, tmpsum));
8462 /* If VALS is a vector constant that can be loaded into a register
8463 using VDUP, generate instructions to do so and return an RTX to
8464 assign to the register. Otherwise return NULL_RTX. */
8467 neon_vdup_constant (rtx vals)
8469 enum machine_mode mode = GET_MODE (vals);
8470 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8471 int n_elts = GET_MODE_NUNITS (mode);
8472 bool all_same = true;
8476 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8479 for (i = 0; i < n_elts; ++i)
8481 x = XVECEXP (vals, 0, i);
8482 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8487 /* The elements are not all the same. We could handle repeating
8488 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8489 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8493 /* We can load this constant by using VDUP and a constant in a
8494 single ARM register. This will be cheaper than a vector
8497 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8498 return gen_rtx_VEC_DUPLICATE (mode, x);
8501 /* Generate code to load VALS, which is a PARALLEL containing only
8502 constants (for vec_init) or CONST_VECTOR, efficiently into a
8503 register. Returns an RTX to copy into the register, or NULL_RTX
8504 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8507 neon_make_constant (rtx vals)
8509 enum machine_mode mode = GET_MODE (vals);
8511 rtx const_vec = NULL_RTX;
8512 int n_elts = GET_MODE_NUNITS (mode);
8516 if (GET_CODE (vals) == CONST_VECTOR)
8518 else if (GET_CODE (vals) == PARALLEL)
8520 /* A CONST_VECTOR must contain only CONST_INTs and
8521 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8522 Only store valid constants in a CONST_VECTOR. */
8523 for (i = 0; i < n_elts; ++i)
8525 rtx x = XVECEXP (vals, 0, i);
8526 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8529 if (n_const == n_elts)
8530 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8535 if (const_vec != NULL
8536 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8537 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8539 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8540 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8541 pipeline cycle; creating the constant takes one or two ARM
8544 else if (const_vec != NULL_RTX)
8545 /* Load from constant pool. On Cortex-A8 this takes two cycles
8546 (for either double or quad vectors). We can not take advantage
8547 of single-cycle VLD1 because we need a PC-relative addressing
8551 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8552 We can not construct an initializer. */
8556 /* Initialize vector TARGET to VALS. */
8559 neon_expand_vector_init (rtx target, rtx vals)
8561 enum machine_mode mode = GET_MODE (target);
8562 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8563 int n_elts = GET_MODE_NUNITS (mode);
8564 int n_var = 0, one_var = -1;
8565 bool all_same = true;
8569 for (i = 0; i < n_elts; ++i)
8571 x = XVECEXP (vals, 0, i);
8572 if (!CONSTANT_P (x))
8573 ++n_var, one_var = i;
8575 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8581 rtx constant = neon_make_constant (vals);
8582 if (constant != NULL_RTX)
8584 emit_move_insn (target, constant);
8589 /* Splat a single non-constant element if we can. */
8590 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8592 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8593 emit_insn (gen_rtx_SET (VOIDmode, target,
8594 gen_rtx_VEC_DUPLICATE (mode, x)));
8598 /* One field is non-constant. Load constant then overwrite varying
8599 field. This is more efficient than using the stack. */
8602 rtx copy = copy_rtx (vals);
8603 rtx index = GEN_INT (one_var);
8605 /* Load constant part of vector, substitute neighboring value for
8607 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8608 neon_expand_vector_init (target, copy);
8610 /* Insert variable. */
8611 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8615 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8618 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8621 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8624 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8627 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8630 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8633 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8636 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8639 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8647 /* Construct the vector in memory one field at a time
8648 and load the whole vector. */
8649 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8650 for (i = 0; i < n_elts; i++)
8651 emit_move_insn (adjust_address_nv (mem, inner_mode,
8652 i * GET_MODE_SIZE (inner_mode)),
8653 XVECEXP (vals, 0, i));
8654 emit_move_insn (target, mem);
8657 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8658 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8659 reported source locations are bogus. */
8662 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8667 gcc_assert (GET_CODE (operand) == CONST_INT);
8669 lane = INTVAL (operand);
8671 if (lane < low || lane >= high)
8675 /* Bounds-check lanes. */
8678 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8680 bounds_check (operand, low, high, "lane out of range");
8683 /* Bounds-check constants. */
8686 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8688 bounds_check (operand, low, high, "constant out of range");
8692 neon_element_bits (enum machine_mode mode)
8695 return GET_MODE_BITSIZE (mode);
8697 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8701 /* Predicates for `match_operand' and `match_operator'. */
8703 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8705 cirrus_memory_offset (rtx op)
8707 /* Reject eliminable registers. */
8708 if (! (reload_in_progress || reload_completed)
8709 && ( reg_mentioned_p (frame_pointer_rtx, op)
8710 || reg_mentioned_p (arg_pointer_rtx, op)
8711 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8712 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8713 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8714 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8717 if (GET_CODE (op) == MEM)
8723 /* Match: (mem (reg)). */
8724 if (GET_CODE (ind) == REG)
8730 if (GET_CODE (ind) == PLUS
8731 && GET_CODE (XEXP (ind, 0)) == REG
8732 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8733 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8740 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8741 WB is true if full writeback address modes are allowed and is false
8742 if limited writeback address modes (POST_INC and PRE_DEC) are
8746 arm_coproc_mem_operand (rtx op, bool wb)
8750 /* Reject eliminable registers. */
8751 if (! (reload_in_progress || reload_completed)
8752 && ( reg_mentioned_p (frame_pointer_rtx, op)
8753 || reg_mentioned_p (arg_pointer_rtx, op)
8754 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8755 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8756 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8757 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8760 /* Constants are converted into offsets from labels. */
8761 if (GET_CODE (op) != MEM)
8766 if (reload_completed
8767 && (GET_CODE (ind) == LABEL_REF
8768 || (GET_CODE (ind) == CONST
8769 && GET_CODE (XEXP (ind, 0)) == PLUS
8770 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8771 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8774 /* Match: (mem (reg)). */
8775 if (GET_CODE (ind) == REG)
8776 return arm_address_register_rtx_p (ind, 0);
8778 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8779 acceptable in any case (subject to verification by
8780 arm_address_register_rtx_p). We need WB to be true to accept
8781 PRE_INC and POST_DEC. */
8782 if (GET_CODE (ind) == POST_INC
8783 || GET_CODE (ind) == PRE_DEC
8785 && (GET_CODE (ind) == PRE_INC
8786 || GET_CODE (ind) == POST_DEC)))
8787 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8790 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8791 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8792 && GET_CODE (XEXP (ind, 1)) == PLUS
8793 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8794 ind = XEXP (ind, 1);
8799 if (GET_CODE (ind) == PLUS
8800 && GET_CODE (XEXP (ind, 0)) == REG
8801 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8802 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8803 && INTVAL (XEXP (ind, 1)) > -1024
8804 && INTVAL (XEXP (ind, 1)) < 1024
8805 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8811 /* Return TRUE if OP is a memory operand which we can load or store a vector
8812 to/from. TYPE is one of the following values:
8813 0 - Vector load/stor (vldr)
8814 1 - Core registers (ldm)
8815 2 - Element/structure loads (vld1)
8818 neon_vector_mem_operand (rtx op, int type)
8822 /* Reject eliminable registers. */
8823 if (! (reload_in_progress || reload_completed)
8824 && ( reg_mentioned_p (frame_pointer_rtx, op)
8825 || reg_mentioned_p (arg_pointer_rtx, op)
8826 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8827 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8828 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8829 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8832 /* Constants are converted into offsets from labels. */
8833 if (GET_CODE (op) != MEM)
8838 if (reload_completed
8839 && (GET_CODE (ind) == LABEL_REF
8840 || (GET_CODE (ind) == CONST
8841 && GET_CODE (XEXP (ind, 0)) == PLUS
8842 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8843 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8846 /* Match: (mem (reg)). */
8847 if (GET_CODE (ind) == REG)
8848 return arm_address_register_rtx_p (ind, 0);
8850 /* Allow post-increment with Neon registers. */
8851 if ((type != 1 && GET_CODE (ind) == POST_INC)
8852 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8853 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8855 /* FIXME: vld1 allows register post-modify. */
8861 && GET_CODE (ind) == PLUS
8862 && GET_CODE (XEXP (ind, 0)) == REG
8863 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8864 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8865 && INTVAL (XEXP (ind, 1)) > -1024
8866 && INTVAL (XEXP (ind, 1)) < 1016
8867 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8873 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8876 neon_struct_mem_operand (rtx op)
8880 /* Reject eliminable registers. */
8881 if (! (reload_in_progress || reload_completed)
8882 && ( reg_mentioned_p (frame_pointer_rtx, op)
8883 || reg_mentioned_p (arg_pointer_rtx, op)
8884 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8885 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8886 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8887 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8890 /* Constants are converted into offsets from labels. */
8891 if (GET_CODE (op) != MEM)
8896 if (reload_completed
8897 && (GET_CODE (ind) == LABEL_REF
8898 || (GET_CODE (ind) == CONST
8899 && GET_CODE (XEXP (ind, 0)) == PLUS
8900 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8901 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8904 /* Match: (mem (reg)). */
8905 if (GET_CODE (ind) == REG)
8906 return arm_address_register_rtx_p (ind, 0);
8911 /* Return true if X is a register that will be eliminated later on. */
8913 arm_eliminable_register (rtx x)
8915 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8916 || REGNO (x) == ARG_POINTER_REGNUM
8917 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8918 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8921 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8922 coprocessor registers. Otherwise return NO_REGS. */
8925 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8929 if (!TARGET_NEON_FP16)
8930 return GENERAL_REGS;
8931 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8933 return GENERAL_REGS;
8937 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8938 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8939 && neon_vector_mem_operand (x, 0))
8942 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8945 return GENERAL_REGS;
8948 /* Values which must be returned in the most-significant end of the return
8952 arm_return_in_msb (const_tree valtype)
8954 return (TARGET_AAPCS_BASED
8956 && (AGGREGATE_TYPE_P (valtype)
8957 || TREE_CODE (valtype) == COMPLEX_TYPE));
8960 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8961 Use by the Cirrus Maverick code which has to workaround
8962 a hardware bug triggered by such instructions. */
8964 arm_memory_load_p (rtx insn)
8966 rtx body, lhs, rhs;;
8968 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8971 body = PATTERN (insn);
8973 if (GET_CODE (body) != SET)
8976 lhs = XEXP (body, 0);
8977 rhs = XEXP (body, 1);
8979 lhs = REG_OR_SUBREG_RTX (lhs);
8981 /* If the destination is not a general purpose
8982 register we do not have to worry. */
8983 if (GET_CODE (lhs) != REG
8984 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8987 /* As well as loads from memory we also have to react
8988 to loads of invalid constants which will be turned
8989 into loads from the minipool. */
8990 return (GET_CODE (rhs) == MEM
8991 || GET_CODE (rhs) == SYMBOL_REF
8992 || note_invalid_constants (insn, -1, false));
8995 /* Return TRUE if INSN is a Cirrus instruction. */
8997 arm_cirrus_insn_p (rtx insn)
8999 enum attr_cirrus attr;
9001 /* get_attr cannot accept USE or CLOBBER. */
9003 || GET_CODE (insn) != INSN
9004 || GET_CODE (PATTERN (insn)) == USE
9005 || GET_CODE (PATTERN (insn)) == CLOBBER)
9008 attr = get_attr_cirrus (insn);
9010 return attr != CIRRUS_NOT;
9013 /* Cirrus reorg for invalid instruction combinations. */
9015 cirrus_reorg (rtx first)
9017 enum attr_cirrus attr;
9018 rtx body = PATTERN (first);
9022 /* Any branch must be followed by 2 non Cirrus instructions. */
9023 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9026 t = next_nonnote_insn (first);
9028 if (arm_cirrus_insn_p (t))
9031 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9035 emit_insn_after (gen_nop (), first);
9040 /* (float (blah)) is in parallel with a clobber. */
9041 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9042 body = XVECEXP (body, 0, 0);
9044 if (GET_CODE (body) == SET)
9046 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9048 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9049 be followed by a non Cirrus insn. */
9050 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9052 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9053 emit_insn_after (gen_nop (), first);
9057 else if (arm_memory_load_p (first))
9059 unsigned int arm_regno;
9061 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9062 ldr/cfmv64hr combination where the Rd field is the same
9063 in both instructions must be split with a non Cirrus
9070 /* Get Arm register number for ldr insn. */
9071 if (GET_CODE (lhs) == REG)
9072 arm_regno = REGNO (lhs);
9075 gcc_assert (GET_CODE (rhs) == REG);
9076 arm_regno = REGNO (rhs);
9080 first = next_nonnote_insn (first);
9082 if (! arm_cirrus_insn_p (first))
9085 body = PATTERN (first);
9087 /* (float (blah)) is in parallel with a clobber. */
9088 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9089 body = XVECEXP (body, 0, 0);
9091 if (GET_CODE (body) == FLOAT)
9092 body = XEXP (body, 0);
9094 if (get_attr_cirrus (first) == CIRRUS_MOVE
9095 && GET_CODE (XEXP (body, 1)) == REG
9096 && arm_regno == REGNO (XEXP (body, 1)))
9097 emit_insn_after (gen_nop (), first);
9103 /* get_attr cannot accept USE or CLOBBER. */
9105 || GET_CODE (first) != INSN
9106 || GET_CODE (PATTERN (first)) == USE
9107 || GET_CODE (PATTERN (first)) == CLOBBER)
9110 attr = get_attr_cirrus (first);
9112 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9113 must be followed by a non-coprocessor instruction. */
9114 if (attr == CIRRUS_COMPARE)
9118 t = next_nonnote_insn (first);
9120 if (arm_cirrus_insn_p (t))
9123 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9127 emit_insn_after (gen_nop (), first);
9133 /* Return TRUE if X references a SYMBOL_REF. */
9135 symbol_mentioned_p (rtx x)
9140 if (GET_CODE (x) == SYMBOL_REF)
9143 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9144 are constant offsets, not symbols. */
9145 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9148 fmt = GET_RTX_FORMAT (GET_CODE (x));
9150 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9156 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9157 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9160 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9167 /* Return TRUE if X references a LABEL_REF. */
9169 label_mentioned_p (rtx x)
9174 if (GET_CODE (x) == LABEL_REF)
9177 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9178 instruction, but they are constant offsets, not symbols. */
9179 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9182 fmt = GET_RTX_FORMAT (GET_CODE (x));
9183 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9189 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9190 if (label_mentioned_p (XVECEXP (x, i, j)))
9193 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9201 tls_mentioned_p (rtx x)
9203 switch (GET_CODE (x))
9206 return tls_mentioned_p (XEXP (x, 0));
9209 if (XINT (x, 1) == UNSPEC_TLS)
9217 /* Must not copy any rtx that uses a pc-relative address. */
9220 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9222 if (GET_CODE (*x) == UNSPEC
9223 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9229 arm_cannot_copy_insn_p (rtx insn)
9231 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9237 enum rtx_code code = GET_CODE (x);
9254 /* Return 1 if memory locations are adjacent. */
9256 adjacent_mem_locations (rtx a, rtx b)
9258 /* We don't guarantee to preserve the order of these memory refs. */
9259 if (volatile_refs_p (a) || volatile_refs_p (b))
9262 if ((GET_CODE (XEXP (a, 0)) == REG
9263 || (GET_CODE (XEXP (a, 0)) == PLUS
9264 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9265 && (GET_CODE (XEXP (b, 0)) == REG
9266 || (GET_CODE (XEXP (b, 0)) == PLUS
9267 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9269 HOST_WIDE_INT val0 = 0, val1 = 0;
9273 if (GET_CODE (XEXP (a, 0)) == PLUS)
9275 reg0 = XEXP (XEXP (a, 0), 0);
9276 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9281 if (GET_CODE (XEXP (b, 0)) == PLUS)
9283 reg1 = XEXP (XEXP (b, 0), 0);
9284 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9289 /* Don't accept any offset that will require multiple
9290 instructions to handle, since this would cause the
9291 arith_adjacentmem pattern to output an overlong sequence. */
9292 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9295 /* Don't allow an eliminable register: register elimination can make
9296 the offset too large. */
9297 if (arm_eliminable_register (reg0))
9300 val_diff = val1 - val0;
9304 /* If the target has load delay slots, then there's no benefit
9305 to using an ldm instruction unless the offset is zero and
9306 we are optimizing for size. */
9307 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9308 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9309 && (val_diff == 4 || val_diff == -4));
9312 return ((REGNO (reg0) == REGNO (reg1))
9313 && (val_diff == 4 || val_diff == -4));
9319 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9320 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9321 instruction. ADD_OFFSET is nonzero if the base address register needs
9322 to be modified with an add instruction before we can use it. */
9325 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9326 int nops, HOST_WIDE_INT add_offset)
9328 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9329 if the offset isn't small enough. The reason 2 ldrs are faster
9330 is because these ARMs are able to do more than one cache access
9331 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9332 whilst the ARM8 has a double bandwidth cache. This means that
9333 these cores can do both an instruction fetch and a data fetch in
9334 a single cycle, so the trick of calculating the address into a
9335 scratch register (one of the result regs) and then doing a load
9336 multiple actually becomes slower (and no smaller in code size).
9337 That is the transformation
9339 ldr rd1, [rbase + offset]
9340 ldr rd2, [rbase + offset + 4]
9344 add rd1, rbase, offset
9345 ldmia rd1, {rd1, rd2}
9347 produces worse code -- '3 cycles + any stalls on rd2' instead of
9348 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9349 access per cycle, the first sequence could never complete in less
9350 than 6 cycles, whereas the ldm sequence would only take 5 and
9351 would make better use of sequential accesses if not hitting the
9354 We cheat here and test 'arm_ld_sched' which we currently know to
9355 only be true for the ARM8, ARM9 and StrongARM. If this ever
9356 changes, then the test below needs to be reworked. */
9357 if (nops == 2 && arm_ld_sched && add_offset != 0)
9360 /* XScale has load-store double instructions, but they have stricter
9361 alignment requirements than load-store multiple, so we cannot
9364 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9365 the pipeline until completion.
9373 An ldr instruction takes 1-3 cycles, but does not block the
9382 Best case ldr will always win. However, the more ldr instructions
9383 we issue, the less likely we are to be able to schedule them well.
9384 Using ldr instructions also increases code size.
9386 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9387 for counts of 3 or 4 regs. */
9388 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9393 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9394 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9395 an array ORDER which describes the sequence to use when accessing the
9396 offsets that produces an ascending order. In this sequence, each
9397 offset must be larger by exactly 4 than the previous one. ORDER[0]
9398 must have been filled in with the lowest offset by the caller.
9399 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9400 we use to verify that ORDER produces an ascending order of registers.
9401 Return true if it was possible to construct such an order, false if
9405 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9409 for (i = 1; i < nops; i++)
9413 order[i] = order[i - 1];
9414 for (j = 0; j < nops; j++)
9415 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9417 /* We must find exactly one offset that is higher than the
9418 previous one by 4. */
9419 if (order[i] != order[i - 1])
9423 if (order[i] == order[i - 1])
9425 /* The register numbers must be ascending. */
9426 if (unsorted_regs != NULL
9427 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9433 /* Used to determine in a peephole whether a sequence of load
9434 instructions can be changed into a load-multiple instruction.
9435 NOPS is the number of separate load instructions we are examining. The
9436 first NOPS entries in OPERANDS are the destination registers, the
9437 next NOPS entries are memory operands. If this function is
9438 successful, *BASE is set to the common base register of the memory
9439 accesses; *LOAD_OFFSET is set to the first memory location's offset
9440 from that base register.
9441 REGS is an array filled in with the destination register numbers.
9442 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9443 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9444 the sequence of registers in REGS matches the loads from ascending memory
9445 locations, and the function verifies that the register numbers are
9446 themselves ascending. If CHECK_REGS is false, the register numbers
9447 are stored in the order they are found in the operands. */
9449 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9450 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9452 int unsorted_regs[MAX_LDM_STM_OPS];
9453 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9454 int order[MAX_LDM_STM_OPS];
9455 rtx base_reg_rtx = NULL;
9459 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9460 easily extended if required. */
9461 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9463 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9465 /* Loop over the operands and check that the memory references are
9466 suitable (i.e. immediate offsets from the same base register). At
9467 the same time, extract the target register, and the memory
9469 for (i = 0; i < nops; i++)
9474 /* Convert a subreg of a mem into the mem itself. */
9475 if (GET_CODE (operands[nops + i]) == SUBREG)
9476 operands[nops + i] = alter_subreg (operands + (nops + i));
9478 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9480 /* Don't reorder volatile memory references; it doesn't seem worth
9481 looking for the case where the order is ok anyway. */
9482 if (MEM_VOLATILE_P (operands[nops + i]))
9485 offset = const0_rtx;
9487 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9488 || (GET_CODE (reg) == SUBREG
9489 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9490 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9491 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9493 || (GET_CODE (reg) == SUBREG
9494 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9495 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9500 base_reg = REGNO (reg);
9502 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9505 else if (base_reg != (int) REGNO (reg))
9506 /* Not addressed from the same base register. */
9509 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9510 ? REGNO (operands[i])
9511 : REGNO (SUBREG_REG (operands[i])));
9513 /* If it isn't an integer register, or if it overwrites the
9514 base register but isn't the last insn in the list, then
9515 we can't do this. */
9516 if (unsorted_regs[i] < 0
9517 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9518 || unsorted_regs[i] > 14
9519 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9522 unsorted_offsets[i] = INTVAL (offset);
9523 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9527 /* Not a suitable memory address. */
9531 /* All the useful information has now been extracted from the
9532 operands into unsorted_regs and unsorted_offsets; additionally,
9533 order[0] has been set to the lowest offset in the list. Sort
9534 the offsets into order, verifying that they are adjacent, and
9535 check that the register numbers are ascending. */
9536 if (!compute_offset_order (nops, unsorted_offsets, order,
9537 check_regs ? unsorted_regs : NULL))
9541 memcpy (saved_order, order, sizeof order);
9547 for (i = 0; i < nops; i++)
9548 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9550 *load_offset = unsorted_offsets[order[0]];
9554 && !peep2_reg_dead_p (nops, base_reg_rtx))
9557 if (unsorted_offsets[order[0]] == 0)
9558 ldm_case = 1; /* ldmia */
9559 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9560 ldm_case = 2; /* ldmib */
9561 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9562 ldm_case = 3; /* ldmda */
9563 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9564 ldm_case = 4; /* ldmdb */
9565 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9566 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9571 if (!multiple_operation_profitable_p (false, nops,
9573 ? unsorted_offsets[order[0]] : 0))
9579 /* Used to determine in a peephole whether a sequence of store instructions can
9580 be changed into a store-multiple instruction.
9581 NOPS is the number of separate store instructions we are examining.
9582 NOPS_TOTAL is the total number of instructions recognized by the peephole
9584 The first NOPS entries in OPERANDS are the source registers, the next
9585 NOPS entries are memory operands. If this function is successful, *BASE is
9586 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9587 to the first memory location's offset from that base register. REGS is an
9588 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9589 likewise filled with the corresponding rtx's.
9590 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9591 numbers to to an ascending order of stores.
9592 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9593 from ascending memory locations, and the function verifies that the register
9594 numbers are themselves ascending. If CHECK_REGS is false, the register
9595 numbers are stored in the order they are found in the operands. */
9597 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9598 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9599 HOST_WIDE_INT *load_offset, bool check_regs)
9601 int unsorted_regs[MAX_LDM_STM_OPS];
9602 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9603 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9604 int order[MAX_LDM_STM_OPS];
9606 rtx base_reg_rtx = NULL;
9609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9610 easily extended if required. */
9611 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9613 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9615 /* Loop over the operands and check that the memory references are
9616 suitable (i.e. immediate offsets from the same base register). At
9617 the same time, extract the target register, and the memory
9619 for (i = 0; i < nops; i++)
9624 /* Convert a subreg of a mem into the mem itself. */
9625 if (GET_CODE (operands[nops + i]) == SUBREG)
9626 operands[nops + i] = alter_subreg (operands + (nops + i));
9628 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9630 /* Don't reorder volatile memory references; it doesn't seem worth
9631 looking for the case where the order is ok anyway. */
9632 if (MEM_VOLATILE_P (operands[nops + i]))
9635 offset = const0_rtx;
9637 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9638 || (GET_CODE (reg) == SUBREG
9639 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9640 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9641 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9643 || (GET_CODE (reg) == SUBREG
9644 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9645 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9648 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9649 ? operands[i] : SUBREG_REG (operands[i]));
9650 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9654 base_reg = REGNO (reg);
9656 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9659 else if (base_reg != (int) REGNO (reg))
9660 /* Not addressed from the same base register. */
9663 /* If it isn't an integer register, then we can't do this. */
9664 if (unsorted_regs[i] < 0
9665 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9666 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9667 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9668 || unsorted_regs[i] > 14)
9671 unsorted_offsets[i] = INTVAL (offset);
9672 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9676 /* Not a suitable memory address. */
9680 /* All the useful information has now been extracted from the
9681 operands into unsorted_regs and unsorted_offsets; additionally,
9682 order[0] has been set to the lowest offset in the list. Sort
9683 the offsets into order, verifying that they are adjacent, and
9684 check that the register numbers are ascending. */
9685 if (!compute_offset_order (nops, unsorted_offsets, order,
9686 check_regs ? unsorted_regs : NULL))
9690 memcpy (saved_order, order, sizeof order);
9696 for (i = 0; i < nops; i++)
9698 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9700 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9703 *load_offset = unsorted_offsets[order[0]];
9707 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9710 if (unsorted_offsets[order[0]] == 0)
9711 stm_case = 1; /* stmia */
9712 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9713 stm_case = 2; /* stmib */
9714 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9715 stm_case = 3; /* stmda */
9716 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9717 stm_case = 4; /* stmdb */
9721 if (!multiple_operation_profitable_p (false, nops, 0))
9727 /* Routines for use in generating RTL. */
9729 /* Generate a load-multiple instruction. COUNT is the number of loads in
9730 the instruction; REGS and MEMS are arrays containing the operands.
9731 BASEREG is the base register to be used in addressing the memory operands.
9732 WBACK_OFFSET is nonzero if the instruction should update the base
9736 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9737 HOST_WIDE_INT wback_offset)
9742 if (!multiple_operation_profitable_p (false, count, 0))
9748 for (i = 0; i < count; i++)
9749 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9751 if (wback_offset != 0)
9752 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9760 result = gen_rtx_PARALLEL (VOIDmode,
9761 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9762 if (wback_offset != 0)
9764 XVECEXP (result, 0, 0)
9765 = gen_rtx_SET (VOIDmode, basereg,
9766 plus_constant (basereg, wback_offset));
9771 for (j = 0; i < count; i++, j++)
9772 XVECEXP (result, 0, i)
9773 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9778 /* Generate a store-multiple instruction. COUNT is the number of stores in
9779 the instruction; REGS and MEMS are arrays containing the operands.
9780 BASEREG is the base register to be used in addressing the memory operands.
9781 WBACK_OFFSET is nonzero if the instruction should update the base
9785 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9786 HOST_WIDE_INT wback_offset)
9791 if (GET_CODE (basereg) == PLUS)
9792 basereg = XEXP (basereg, 0);
9794 if (!multiple_operation_profitable_p (false, count, 0))
9800 for (i = 0; i < count; i++)
9801 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9803 if (wback_offset != 0)
9804 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9812 result = gen_rtx_PARALLEL (VOIDmode,
9813 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9814 if (wback_offset != 0)
9816 XVECEXP (result, 0, 0)
9817 = gen_rtx_SET (VOIDmode, basereg,
9818 plus_constant (basereg, wback_offset));
9823 for (j = 0; i < count; i++, j++)
9824 XVECEXP (result, 0, i)
9825 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9830 /* Generate either a load-multiple or a store-multiple instruction. This
9831 function can be used in situations where we can start with a single MEM
9832 rtx and adjust its address upwards.
9833 COUNT is the number of operations in the instruction, not counting a
9834 possible update of the base register. REGS is an array containing the
9836 BASEREG is the base register to be used in addressing the memory operands,
9837 which are constructed from BASEMEM.
9838 WRITE_BACK specifies whether the generated instruction should include an
9839 update of the base register.
9840 OFFSETP is used to pass an offset to and from this function; this offset
9841 is not used when constructing the address (instead BASEMEM should have an
9842 appropriate offset in its address), it is used only for setting
9843 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9846 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9847 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9849 rtx mems[MAX_LDM_STM_OPS];
9850 HOST_WIDE_INT offset = *offsetp;
9853 gcc_assert (count <= MAX_LDM_STM_OPS);
9855 if (GET_CODE (basereg) == PLUS)
9856 basereg = XEXP (basereg, 0);
9858 for (i = 0; i < count; i++)
9860 rtx addr = plus_constant (basereg, i * 4);
9861 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9869 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9870 write_back ? 4 * count : 0);
9872 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9873 write_back ? 4 * count : 0);
9877 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9878 rtx basemem, HOST_WIDE_INT *offsetp)
9880 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9885 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9886 rtx basemem, HOST_WIDE_INT *offsetp)
9888 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9892 /* Called from a peephole2 expander to turn a sequence of loads into an
9893 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9894 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9895 is true if we can reorder the registers because they are used commutatively
9897 Returns true iff we could generate a new instruction. */
9900 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9902 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9903 rtx mems[MAX_LDM_STM_OPS];
9906 HOST_WIDE_INT offset;
9907 int write_back = FALSE;
9911 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9912 &base_reg, &offset, !sort_regs);
9918 for (i = 0; i < nops - 1; i++)
9919 for (j = i + 1; j < nops; j++)
9920 if (regs[i] > regs[j])
9926 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9930 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9931 gcc_assert (ldm_case == 1 || ldm_case == 5);
9937 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
9938 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
9943 base_reg_rtx = newbase;
9947 for (i = 0; i < nops; i++)
9949 addr = plus_constant (base_reg_rtx, offset + i * 4);
9950 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9953 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
9954 write_back ? offset + i * 4 : 0));
9958 /* Called from a peephole2 expander to turn a sequence of stores into an
9959 STM instruction. OPERANDS are the operands found by the peephole matcher;
9960 NOPS indicates how many separate stores we are trying to combine.
9961 Returns true iff we could generate a new instruction. */
9964 gen_stm_seq (rtx *operands, int nops)
9967 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9968 rtx mems[MAX_LDM_STM_OPS];
9971 HOST_WIDE_INT offset;
9972 int write_back = FALSE;
9977 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
9978 mem_order, &base_reg, &offset, true);
9983 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9985 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
9988 gcc_assert (base_reg_dies);
9994 gcc_assert (base_reg_dies);
9995 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
9999 addr = plus_constant (base_reg_rtx, offset);
10001 for (i = 0; i < nops; i++)
10003 addr = plus_constant (base_reg_rtx, offset + i * 4);
10004 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10007 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10008 write_back ? offset + i * 4 : 0));
10012 /* Called from a peephole2 expander to turn a sequence of stores that are
10013 preceded by constant loads into an STM instruction. OPERANDS are the
10014 operands found by the peephole matcher; NOPS indicates how many
10015 separate stores we are trying to combine; there are 2 * NOPS
10016 instructions in the peephole.
10017 Returns true iff we could generate a new instruction. */
10020 gen_const_stm_seq (rtx *operands, int nops)
10022 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10023 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10024 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10025 rtx mems[MAX_LDM_STM_OPS];
10028 HOST_WIDE_INT offset;
10029 int write_back = FALSE;
10032 bool base_reg_dies;
10034 HARD_REG_SET allocated;
10036 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10037 mem_order, &base_reg, &offset, false);
10042 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10044 /* If the same register is used more than once, try to find a free
10046 CLEAR_HARD_REG_SET (allocated);
10047 for (i = 0; i < nops; i++)
10049 for (j = i + 1; j < nops; j++)
10050 if (regs[i] == regs[j])
10052 rtx t = peep2_find_free_register (0, nops * 2,
10053 TARGET_THUMB1 ? "l" : "r",
10054 SImode, &allocated);
10058 regs[i] = REGNO (t);
10062 /* Compute an ordering that maps the register numbers to an ascending
10065 for (i = 0; i < nops; i++)
10066 if (regs[i] < regs[reg_order[0]])
10069 for (i = 1; i < nops; i++)
10071 int this_order = reg_order[i - 1];
10072 for (j = 0; j < nops; j++)
10073 if (regs[j] > regs[reg_order[i - 1]]
10074 && (this_order == reg_order[i - 1]
10075 || regs[j] < regs[this_order]))
10077 reg_order[i] = this_order;
10080 /* Ensure that registers that must be live after the instruction end
10081 up with the correct value. */
10082 for (i = 0; i < nops; i++)
10084 int this_order = reg_order[i];
10085 if ((this_order != mem_order[i]
10086 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10087 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10091 /* Load the constants. */
10092 for (i = 0; i < nops; i++)
10094 rtx op = operands[2 * nops + mem_order[i]];
10095 sorted_regs[i] = regs[reg_order[i]];
10096 emit_move_insn (reg_rtxs[reg_order[i]], op);
10099 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10101 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10104 gcc_assert (base_reg_dies);
10110 gcc_assert (base_reg_dies);
10111 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10115 addr = plus_constant (base_reg_rtx, offset);
10117 for (i = 0; i < nops; i++)
10119 addr = plus_constant (base_reg_rtx, offset + i * 4);
10120 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10123 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10124 write_back ? offset + i * 4 : 0));
10129 arm_gen_movmemqi (rtx *operands)
10131 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10132 HOST_WIDE_INT srcoffset, dstoffset;
10134 rtx src, dst, srcbase, dstbase;
10135 rtx part_bytes_reg = NULL;
10138 if (GET_CODE (operands[2]) != CONST_INT
10139 || GET_CODE (operands[3]) != CONST_INT
10140 || INTVAL (operands[2]) > 64
10141 || INTVAL (operands[3]) & 3)
10144 dstbase = operands[0];
10145 srcbase = operands[1];
10147 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10148 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10150 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10151 out_words_to_go = INTVAL (operands[2]) / 4;
10152 last_bytes = INTVAL (operands[2]) & 3;
10153 dstoffset = srcoffset = 0;
10155 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10156 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10158 for (i = 0; in_words_to_go >= 2; i+=4)
10160 if (in_words_to_go > 4)
10161 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10162 TRUE, srcbase, &srcoffset));
10164 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10165 src, FALSE, srcbase,
10168 if (out_words_to_go)
10170 if (out_words_to_go > 4)
10171 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10172 TRUE, dstbase, &dstoffset));
10173 else if (out_words_to_go != 1)
10174 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10175 out_words_to_go, dst,
10178 dstbase, &dstoffset));
10181 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10182 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10183 if (last_bytes != 0)
10185 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10191 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10192 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10195 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10196 if (out_words_to_go)
10200 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10201 sreg = copy_to_reg (mem);
10203 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10204 emit_move_insn (mem, sreg);
10207 gcc_assert (!in_words_to_go); /* Sanity check */
10210 if (in_words_to_go)
10212 gcc_assert (in_words_to_go > 0);
10214 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10215 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10218 gcc_assert (!last_bytes || part_bytes_reg);
10220 if (BYTES_BIG_ENDIAN && last_bytes)
10222 rtx tmp = gen_reg_rtx (SImode);
10224 /* The bytes we want are in the top end of the word. */
10225 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10226 GEN_INT (8 * (4 - last_bytes))));
10227 part_bytes_reg = tmp;
10231 mem = adjust_automodify_address (dstbase, QImode,
10232 plus_constant (dst, last_bytes - 1),
10233 dstoffset + last_bytes - 1);
10234 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10238 tmp = gen_reg_rtx (SImode);
10239 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10240 part_bytes_reg = tmp;
10247 if (last_bytes > 1)
10249 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10250 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10254 rtx tmp = gen_reg_rtx (SImode);
10255 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10256 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10257 part_bytes_reg = tmp;
10264 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10265 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10272 /* Select a dominance comparison mode if possible for a test of the general
10273 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10274 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10275 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10276 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10277 In all cases OP will be either EQ or NE, but we don't need to know which
10278 here. If we are unable to support a dominance comparison we return
10279 CC mode. This will then fail to match for the RTL expressions that
10280 generate this call. */
10282 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10284 enum rtx_code cond1, cond2;
10287 /* Currently we will probably get the wrong result if the individual
10288 comparisons are not simple. This also ensures that it is safe to
10289 reverse a comparison if necessary. */
10290 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10292 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10296 /* The if_then_else variant of this tests the second condition if the
10297 first passes, but is true if the first fails. Reverse the first
10298 condition to get a true "inclusive-or" expression. */
10299 if (cond_or == DOM_CC_NX_OR_Y)
10300 cond1 = reverse_condition (cond1);
10302 /* If the comparisons are not equal, and one doesn't dominate the other,
10303 then we can't do this. */
10305 && !comparison_dominates_p (cond1, cond2)
10306 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10311 enum rtx_code temp = cond1;
10319 if (cond_or == DOM_CC_X_AND_Y)
10324 case EQ: return CC_DEQmode;
10325 case LE: return CC_DLEmode;
10326 case LEU: return CC_DLEUmode;
10327 case GE: return CC_DGEmode;
10328 case GEU: return CC_DGEUmode;
10329 default: gcc_unreachable ();
10333 if (cond_or == DOM_CC_X_AND_Y)
10345 gcc_unreachable ();
10349 if (cond_or == DOM_CC_X_AND_Y)
10361 gcc_unreachable ();
10365 if (cond_or == DOM_CC_X_AND_Y)
10366 return CC_DLTUmode;
10371 return CC_DLTUmode;
10373 return CC_DLEUmode;
10377 gcc_unreachable ();
10381 if (cond_or == DOM_CC_X_AND_Y)
10382 return CC_DGTUmode;
10387 return CC_DGTUmode;
10389 return CC_DGEUmode;
10393 gcc_unreachable ();
10396 /* The remaining cases only occur when both comparisons are the
10399 gcc_assert (cond1 == cond2);
10403 gcc_assert (cond1 == cond2);
10407 gcc_assert (cond1 == cond2);
10411 gcc_assert (cond1 == cond2);
10412 return CC_DLEUmode;
10415 gcc_assert (cond1 == cond2);
10416 return CC_DGEUmode;
10419 gcc_unreachable ();
10424 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10426 /* All floating point compares return CCFP if it is an equality
10427 comparison, and CCFPE otherwise. */
10428 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10448 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10453 gcc_unreachable ();
10457 /* A compare with a shifted operand. Because of canonicalization, the
10458 comparison will have to be swapped when we emit the assembler. */
10459 if (GET_MODE (y) == SImode
10460 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10461 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10462 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10463 || GET_CODE (x) == ROTATERT))
10466 /* This operation is performed swapped, but since we only rely on the Z
10467 flag we don't need an additional mode. */
10468 if (GET_MODE (y) == SImode
10469 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10470 && GET_CODE (x) == NEG
10471 && (op == EQ || op == NE))
10474 /* This is a special case that is used by combine to allow a
10475 comparison of a shifted byte load to be split into a zero-extend
10476 followed by a comparison of the shifted integer (only valid for
10477 equalities and unsigned inequalities). */
10478 if (GET_MODE (x) == SImode
10479 && GET_CODE (x) == ASHIFT
10480 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10481 && GET_CODE (XEXP (x, 0)) == SUBREG
10482 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10483 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10484 && (op == EQ || op == NE
10485 || op == GEU || op == GTU || op == LTU || op == LEU)
10486 && GET_CODE (y) == CONST_INT)
10489 /* A construct for a conditional compare, if the false arm contains
10490 0, then both conditions must be true, otherwise either condition
10491 must be true. Not all conditions are possible, so CCmode is
10492 returned if it can't be done. */
10493 if (GET_CODE (x) == IF_THEN_ELSE
10494 && (XEXP (x, 2) == const0_rtx
10495 || XEXP (x, 2) == const1_rtx)
10496 && COMPARISON_P (XEXP (x, 0))
10497 && COMPARISON_P (XEXP (x, 1)))
10498 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10499 INTVAL (XEXP (x, 2)));
10501 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10502 if (GET_CODE (x) == AND
10503 && COMPARISON_P (XEXP (x, 0))
10504 && COMPARISON_P (XEXP (x, 1)))
10505 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10508 if (GET_CODE (x) == IOR
10509 && COMPARISON_P (XEXP (x, 0))
10510 && COMPARISON_P (XEXP (x, 1)))
10511 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10514 /* An operation (on Thumb) where we want to test for a single bit.
10515 This is done by shifting that bit up into the top bit of a
10516 scratch register; we can then branch on the sign bit. */
10518 && GET_MODE (x) == SImode
10519 && (op == EQ || op == NE)
10520 && GET_CODE (x) == ZERO_EXTRACT
10521 && XEXP (x, 1) == const1_rtx)
10524 /* An operation that sets the condition codes as a side-effect, the
10525 V flag is not set correctly, so we can only use comparisons where
10526 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10528 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10529 if (GET_MODE (x) == SImode
10531 && (op == EQ || op == NE || op == LT || op == GE)
10532 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10533 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10534 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10535 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10536 || GET_CODE (x) == LSHIFTRT
10537 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10538 || GET_CODE (x) == ROTATERT
10539 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10540 return CC_NOOVmode;
10542 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10545 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10546 && GET_CODE (x) == PLUS
10547 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10550 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10552 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10554 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10561 /* A DImode comparison against zero can be implemented by
10562 or'ing the two halves together. */
10563 if (y == const0_rtx)
10566 /* We can do an equality test in three Thumb instructions. */
10576 /* DImode unsigned comparisons can be implemented by cmp +
10577 cmpeq without a scratch register. Not worth doing in
10588 /* DImode signed and unsigned comparisons can be implemented
10589 by cmp + sbcs with a scratch register, but that does not
10590 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10591 gcc_assert (op != EQ && op != NE);
10595 gcc_unreachable ();
10602 /* X and Y are two things to compare using CODE. Emit the compare insn and
10603 return the rtx for register 0 in the proper mode. FP means this is a
10604 floating point compare: I don't think that it is needed on the arm. */
10606 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10608 enum machine_mode mode;
10610 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10612 /* We might have X as a constant, Y as a register because of the predicates
10613 used for cmpdi. If so, force X to a register here. */
10614 if (dimode_comparison && !REG_P (x))
10615 x = force_reg (DImode, x);
10617 mode = SELECT_CC_MODE (code, x, y);
10618 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10620 if (dimode_comparison
10621 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10622 && mode != CC_CZmode)
10626 /* To compare two non-zero values for equality, XOR them and
10627 then compare against zero. Not used for ARM mode; there
10628 CC_CZmode is cheaper. */
10629 if (mode == CC_Zmode && y != const0_rtx)
10631 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10634 /* A scratch register is required. */
10635 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10636 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10637 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10640 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10645 /* Generate a sequence of insns that will generate the correct return
10646 address mask depending on the physical architecture that the program
10649 arm_gen_return_addr_mask (void)
10651 rtx reg = gen_reg_rtx (Pmode);
10653 emit_insn (gen_return_addr_mask (reg));
10658 arm_reload_in_hi (rtx *operands)
10660 rtx ref = operands[1];
10662 HOST_WIDE_INT offset = 0;
10664 if (GET_CODE (ref) == SUBREG)
10666 offset = SUBREG_BYTE (ref);
10667 ref = SUBREG_REG (ref);
10670 if (GET_CODE (ref) == REG)
10672 /* We have a pseudo which has been spilt onto the stack; there
10673 are two cases here: the first where there is a simple
10674 stack-slot replacement and a second where the stack-slot is
10675 out of range, or is used as a subreg. */
10676 if (reg_equiv_mem[REGNO (ref)])
10678 ref = reg_equiv_mem[REGNO (ref)];
10679 base = find_replacement (&XEXP (ref, 0));
10682 /* The slot is out of range, or was dressed up in a SUBREG. */
10683 base = reg_equiv_address[REGNO (ref)];
10686 base = find_replacement (&XEXP (ref, 0));
10688 /* Handle the case where the address is too complex to be offset by 1. */
10689 if (GET_CODE (base) == MINUS
10690 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10692 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10694 emit_set_insn (base_plus, base);
10697 else if (GET_CODE (base) == PLUS)
10699 /* The addend must be CONST_INT, or we would have dealt with it above. */
10700 HOST_WIDE_INT hi, lo;
10702 offset += INTVAL (XEXP (base, 1));
10703 base = XEXP (base, 0);
10705 /* Rework the address into a legal sequence of insns. */
10706 /* Valid range for lo is -4095 -> 4095 */
10709 : -((-offset) & 0xfff));
10711 /* Corner case, if lo is the max offset then we would be out of range
10712 once we have added the additional 1 below, so bump the msb into the
10713 pre-loading insn(s). */
10717 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10718 ^ (HOST_WIDE_INT) 0x80000000)
10719 - (HOST_WIDE_INT) 0x80000000);
10721 gcc_assert (hi + lo == offset);
10725 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10727 /* Get the base address; addsi3 knows how to handle constants
10728 that require more than one insn. */
10729 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10735 /* Operands[2] may overlap operands[0] (though it won't overlap
10736 operands[1]), that's why we asked for a DImode reg -- so we can
10737 use the bit that does not overlap. */
10738 if (REGNO (operands[2]) == REGNO (operands[0]))
10739 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10741 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10743 emit_insn (gen_zero_extendqisi2 (scratch,
10744 gen_rtx_MEM (QImode,
10745 plus_constant (base,
10747 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10748 gen_rtx_MEM (QImode,
10749 plus_constant (base,
10751 if (!BYTES_BIG_ENDIAN)
10752 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10753 gen_rtx_IOR (SImode,
10756 gen_rtx_SUBREG (SImode, operands[0], 0),
10760 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10761 gen_rtx_IOR (SImode,
10762 gen_rtx_ASHIFT (SImode, scratch,
10764 gen_rtx_SUBREG (SImode, operands[0], 0)));
10767 /* Handle storing a half-word to memory during reload by synthesizing as two
10768 byte stores. Take care not to clobber the input values until after we
10769 have moved them somewhere safe. This code assumes that if the DImode
10770 scratch in operands[2] overlaps either the input value or output address
10771 in some way, then that value must die in this insn (we absolutely need
10772 two scratch registers for some corner cases). */
10774 arm_reload_out_hi (rtx *operands)
10776 rtx ref = operands[0];
10777 rtx outval = operands[1];
10779 HOST_WIDE_INT offset = 0;
10781 if (GET_CODE (ref) == SUBREG)
10783 offset = SUBREG_BYTE (ref);
10784 ref = SUBREG_REG (ref);
10787 if (GET_CODE (ref) == REG)
10789 /* We have a pseudo which has been spilt onto the stack; there
10790 are two cases here: the first where there is a simple
10791 stack-slot replacement and a second where the stack-slot is
10792 out of range, or is used as a subreg. */
10793 if (reg_equiv_mem[REGNO (ref)])
10795 ref = reg_equiv_mem[REGNO (ref)];
10796 base = find_replacement (&XEXP (ref, 0));
10799 /* The slot is out of range, or was dressed up in a SUBREG. */
10800 base = reg_equiv_address[REGNO (ref)];
10803 base = find_replacement (&XEXP (ref, 0));
10805 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10807 /* Handle the case where the address is too complex to be offset by 1. */
10808 if (GET_CODE (base) == MINUS
10809 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10811 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10813 /* Be careful not to destroy OUTVAL. */
10814 if (reg_overlap_mentioned_p (base_plus, outval))
10816 /* Updating base_plus might destroy outval, see if we can
10817 swap the scratch and base_plus. */
10818 if (!reg_overlap_mentioned_p (scratch, outval))
10821 scratch = base_plus;
10826 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10828 /* Be conservative and copy OUTVAL into the scratch now,
10829 this should only be necessary if outval is a subreg
10830 of something larger than a word. */
10831 /* XXX Might this clobber base? I can't see how it can,
10832 since scratch is known to overlap with OUTVAL, and
10833 must be wider than a word. */
10834 emit_insn (gen_movhi (scratch_hi, outval));
10835 outval = scratch_hi;
10839 emit_set_insn (base_plus, base);
10842 else if (GET_CODE (base) == PLUS)
10844 /* The addend must be CONST_INT, or we would have dealt with it above. */
10845 HOST_WIDE_INT hi, lo;
10847 offset += INTVAL (XEXP (base, 1));
10848 base = XEXP (base, 0);
10850 /* Rework the address into a legal sequence of insns. */
10851 /* Valid range for lo is -4095 -> 4095 */
10854 : -((-offset) & 0xfff));
10856 /* Corner case, if lo is the max offset then we would be out of range
10857 once we have added the additional 1 below, so bump the msb into the
10858 pre-loading insn(s). */
10862 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10863 ^ (HOST_WIDE_INT) 0x80000000)
10864 - (HOST_WIDE_INT) 0x80000000);
10866 gcc_assert (hi + lo == offset);
10870 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10872 /* Be careful not to destroy OUTVAL. */
10873 if (reg_overlap_mentioned_p (base_plus, outval))
10875 /* Updating base_plus might destroy outval, see if we
10876 can swap the scratch and base_plus. */
10877 if (!reg_overlap_mentioned_p (scratch, outval))
10880 scratch = base_plus;
10885 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10887 /* Be conservative and copy outval into scratch now,
10888 this should only be necessary if outval is a
10889 subreg of something larger than a word. */
10890 /* XXX Might this clobber base? I can't see how it
10891 can, since scratch is known to overlap with
10893 emit_insn (gen_movhi (scratch_hi, outval));
10894 outval = scratch_hi;
10898 /* Get the base address; addsi3 knows how to handle constants
10899 that require more than one insn. */
10900 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10906 if (BYTES_BIG_ENDIAN)
10908 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10909 plus_constant (base, offset + 1)),
10910 gen_lowpart (QImode, outval)));
10911 emit_insn (gen_lshrsi3 (scratch,
10912 gen_rtx_SUBREG (SImode, outval, 0),
10914 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10915 gen_lowpart (QImode, scratch)));
10919 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10920 gen_lowpart (QImode, outval)));
10921 emit_insn (gen_lshrsi3 (scratch,
10922 gen_rtx_SUBREG (SImode, outval, 0),
10924 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10925 plus_constant (base, offset + 1)),
10926 gen_lowpart (QImode, scratch)));
10930 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10931 (padded to the size of a word) should be passed in a register. */
10934 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10936 if (TARGET_AAPCS_BASED)
10937 return must_pass_in_stack_var_size (mode, type);
10939 return must_pass_in_stack_var_size_or_pad (mode, type);
10943 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10944 Return true if an argument passed on the stack should be padded upwards,
10945 i.e. if the least-significant byte has useful data.
10946 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10947 aggregate types are placed in the lowest memory address. */
10950 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10952 if (!TARGET_AAPCS_BASED)
10953 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10955 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10962 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10963 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10964 byte of the register has useful data, and return the opposite if the
10965 most significant byte does.
10966 For AAPCS, small aggregates and small complex types are always padded
10970 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10971 tree type, int first ATTRIBUTE_UNUSED)
10973 if (TARGET_AAPCS_BASED
10974 && BYTES_BIG_ENDIAN
10975 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10976 && int_size_in_bytes (type) <= 4)
10979 /* Otherwise, use default padding. */
10980 return !BYTES_BIG_ENDIAN;
10984 /* Print a symbolic form of X to the debug file, F. */
10986 arm_print_value (FILE *f, rtx x)
10988 switch (GET_CODE (x))
10991 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10995 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11003 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11005 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11006 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11014 fprintf (f, "\"%s\"", XSTR (x, 0));
11018 fprintf (f, "`%s'", XSTR (x, 0));
11022 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11026 arm_print_value (f, XEXP (x, 0));
11030 arm_print_value (f, XEXP (x, 0));
11032 arm_print_value (f, XEXP (x, 1));
11040 fprintf (f, "????");
11045 /* Routines for manipulation of the constant pool. */
11047 /* Arm instructions cannot load a large constant directly into a
11048 register; they have to come from a pc relative load. The constant
11049 must therefore be placed in the addressable range of the pc
11050 relative load. Depending on the precise pc relative load
11051 instruction the range is somewhere between 256 bytes and 4k. This
11052 means that we often have to dump a constant inside a function, and
11053 generate code to branch around it.
11055 It is important to minimize this, since the branches will slow
11056 things down and make the code larger.
11058 Normally we can hide the table after an existing unconditional
11059 branch so that there is no interruption of the flow, but in the
11060 worst case the code looks like this:
11078 We fix this by performing a scan after scheduling, which notices
11079 which instructions need to have their operands fetched from the
11080 constant table and builds the table.
11082 The algorithm starts by building a table of all the constants that
11083 need fixing up and all the natural barriers in the function (places
11084 where a constant table can be dropped without breaking the flow).
11085 For each fixup we note how far the pc-relative replacement will be
11086 able to reach and the offset of the instruction into the function.
11088 Having built the table we then group the fixes together to form
11089 tables that are as large as possible (subject to addressing
11090 constraints) and emit each table of constants after the last
11091 barrier that is within range of all the instructions in the group.
11092 If a group does not contain a barrier, then we forcibly create one
11093 by inserting a jump instruction into the flow. Once the table has
11094 been inserted, the insns are then modified to reference the
11095 relevant entry in the pool.
11097 Possible enhancements to the algorithm (not implemented) are:
11099 1) For some processors and object formats, there may be benefit in
11100 aligning the pools to the start of cache lines; this alignment
11101 would need to be taken into account when calculating addressability
11104 /* These typedefs are located at the start of this file, so that
11105 they can be used in the prototypes there. This comment is to
11106 remind readers of that fact so that the following structures
11107 can be understood more easily.
11109 typedef struct minipool_node Mnode;
11110 typedef struct minipool_fixup Mfix; */
11112 struct minipool_node
11114 /* Doubly linked chain of entries. */
11117 /* The maximum offset into the code that this entry can be placed. While
11118 pushing fixes for forward references, all entries are sorted in order
11119 of increasing max_address. */
11120 HOST_WIDE_INT max_address;
11121 /* Similarly for an entry inserted for a backwards ref. */
11122 HOST_WIDE_INT min_address;
11123 /* The number of fixes referencing this entry. This can become zero
11124 if we "unpush" an entry. In this case we ignore the entry when we
11125 come to emit the code. */
11127 /* The offset from the start of the minipool. */
11128 HOST_WIDE_INT offset;
11129 /* The value in table. */
11131 /* The mode of value. */
11132 enum machine_mode mode;
11133 /* The size of the value. With iWMMXt enabled
11134 sizes > 4 also imply an alignment of 8-bytes. */
11138 struct minipool_fixup
11142 HOST_WIDE_INT address;
11144 enum machine_mode mode;
11148 HOST_WIDE_INT forwards;
11149 HOST_WIDE_INT backwards;
11152 /* Fixes less than a word need padding out to a word boundary. */
11153 #define MINIPOOL_FIX_SIZE(mode) \
11154 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11156 static Mnode * minipool_vector_head;
11157 static Mnode * minipool_vector_tail;
11158 static rtx minipool_vector_label;
11159 static int minipool_pad;
11161 /* The linked list of all minipool fixes required for this function. */
11162 Mfix * minipool_fix_head;
11163 Mfix * minipool_fix_tail;
11164 /* The fix entry for the current minipool, once it has been placed. */
11165 Mfix * minipool_barrier;
11167 /* Determines if INSN is the start of a jump table. Returns the end
11168 of the TABLE or NULL_RTX. */
11170 is_jump_table (rtx insn)
11174 if (GET_CODE (insn) == JUMP_INSN
11175 && JUMP_LABEL (insn) != NULL
11176 && ((table = next_real_insn (JUMP_LABEL (insn)))
11177 == next_real_insn (insn))
11179 && GET_CODE (table) == JUMP_INSN
11180 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11181 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11187 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11188 #define JUMP_TABLES_IN_TEXT_SECTION 0
11191 static HOST_WIDE_INT
11192 get_jump_table_size (rtx insn)
11194 /* ADDR_VECs only take room if read-only data does into the text
11196 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11198 rtx body = PATTERN (insn);
11199 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11200 HOST_WIDE_INT size;
11201 HOST_WIDE_INT modesize;
11203 modesize = GET_MODE_SIZE (GET_MODE (body));
11204 size = modesize * XVECLEN (body, elt);
11208 /* Round up size of TBB table to a halfword boundary. */
11209 size = (size + 1) & ~(HOST_WIDE_INT)1;
11212 /* No padding necessary for TBH. */
11215 /* Add two bytes for alignment on Thumb. */
11220 gcc_unreachable ();
11228 /* Move a minipool fix MP from its current location to before MAX_MP.
11229 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11230 constraints may need updating. */
11232 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11233 HOST_WIDE_INT max_address)
11235 /* The code below assumes these are different. */
11236 gcc_assert (mp != max_mp);
11238 if (max_mp == NULL)
11240 if (max_address < mp->max_address)
11241 mp->max_address = max_address;
11245 if (max_address > max_mp->max_address - mp->fix_size)
11246 mp->max_address = max_mp->max_address - mp->fix_size;
11248 mp->max_address = max_address;
11250 /* Unlink MP from its current position. Since max_mp is non-null,
11251 mp->prev must be non-null. */
11252 mp->prev->next = mp->next;
11253 if (mp->next != NULL)
11254 mp->next->prev = mp->prev;
11256 minipool_vector_tail = mp->prev;
11258 /* Re-insert it before MAX_MP. */
11260 mp->prev = max_mp->prev;
11263 if (mp->prev != NULL)
11264 mp->prev->next = mp;
11266 minipool_vector_head = mp;
11269 /* Save the new entry. */
11272 /* Scan over the preceding entries and adjust their addresses as
11274 while (mp->prev != NULL
11275 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11277 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11284 /* Add a constant to the minipool for a forward reference. Returns the
11285 node added or NULL if the constant will not fit in this pool. */
11287 add_minipool_forward_ref (Mfix *fix)
11289 /* If set, max_mp is the first pool_entry that has a lower
11290 constraint than the one we are trying to add. */
11291 Mnode * max_mp = NULL;
11292 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11295 /* If the minipool starts before the end of FIX->INSN then this FIX
11296 can not be placed into the current pool. Furthermore, adding the
11297 new constant pool entry may cause the pool to start FIX_SIZE bytes
11299 if (minipool_vector_head &&
11300 (fix->address + get_attr_length (fix->insn)
11301 >= minipool_vector_head->max_address - fix->fix_size))
11304 /* Scan the pool to see if a constant with the same value has
11305 already been added. While we are doing this, also note the
11306 location where we must insert the constant if it doesn't already
11308 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11310 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11311 && fix->mode == mp->mode
11312 && (GET_CODE (fix->value) != CODE_LABEL
11313 || (CODE_LABEL_NUMBER (fix->value)
11314 == CODE_LABEL_NUMBER (mp->value)))
11315 && rtx_equal_p (fix->value, mp->value))
11317 /* More than one fix references this entry. */
11319 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11322 /* Note the insertion point if necessary. */
11324 && mp->max_address > max_address)
11327 /* If we are inserting an 8-bytes aligned quantity and
11328 we have not already found an insertion point, then
11329 make sure that all such 8-byte aligned quantities are
11330 placed at the start of the pool. */
11331 if (ARM_DOUBLEWORD_ALIGN
11333 && fix->fix_size >= 8
11334 && mp->fix_size < 8)
11337 max_address = mp->max_address;
11341 /* The value is not currently in the minipool, so we need to create
11342 a new entry for it. If MAX_MP is NULL, the entry will be put on
11343 the end of the list since the placement is less constrained than
11344 any existing entry. Otherwise, we insert the new fix before
11345 MAX_MP and, if necessary, adjust the constraints on the other
11348 mp->fix_size = fix->fix_size;
11349 mp->mode = fix->mode;
11350 mp->value = fix->value;
11352 /* Not yet required for a backwards ref. */
11353 mp->min_address = -65536;
11355 if (max_mp == NULL)
11357 mp->max_address = max_address;
11359 mp->prev = minipool_vector_tail;
11361 if (mp->prev == NULL)
11363 minipool_vector_head = mp;
11364 minipool_vector_label = gen_label_rtx ();
11367 mp->prev->next = mp;
11369 minipool_vector_tail = mp;
11373 if (max_address > max_mp->max_address - mp->fix_size)
11374 mp->max_address = max_mp->max_address - mp->fix_size;
11376 mp->max_address = max_address;
11379 mp->prev = max_mp->prev;
11381 if (mp->prev != NULL)
11382 mp->prev->next = mp;
11384 minipool_vector_head = mp;
11387 /* Save the new entry. */
11390 /* Scan over the preceding entries and adjust their addresses as
11392 while (mp->prev != NULL
11393 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11395 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11403 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11404 HOST_WIDE_INT min_address)
11406 HOST_WIDE_INT offset;
11408 /* The code below assumes these are different. */
11409 gcc_assert (mp != min_mp);
11411 if (min_mp == NULL)
11413 if (min_address > mp->min_address)
11414 mp->min_address = min_address;
11418 /* We will adjust this below if it is too loose. */
11419 mp->min_address = min_address;
11421 /* Unlink MP from its current position. Since min_mp is non-null,
11422 mp->next must be non-null. */
11423 mp->next->prev = mp->prev;
11424 if (mp->prev != NULL)
11425 mp->prev->next = mp->next;
11427 minipool_vector_head = mp->next;
11429 /* Reinsert it after MIN_MP. */
11431 mp->next = min_mp->next;
11433 if (mp->next != NULL)
11434 mp->next->prev = mp;
11436 minipool_vector_tail = mp;
11442 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11444 mp->offset = offset;
11445 if (mp->refcount > 0)
11446 offset += mp->fix_size;
11448 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11449 mp->next->min_address = mp->min_address + mp->fix_size;
11455 /* Add a constant to the minipool for a backward reference. Returns the
11456 node added or NULL if the constant will not fit in this pool.
11458 Note that the code for insertion for a backwards reference can be
11459 somewhat confusing because the calculated offsets for each fix do
11460 not take into account the size of the pool (which is still under
11463 add_minipool_backward_ref (Mfix *fix)
11465 /* If set, min_mp is the last pool_entry that has a lower constraint
11466 than the one we are trying to add. */
11467 Mnode *min_mp = NULL;
11468 /* This can be negative, since it is only a constraint. */
11469 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11472 /* If we can't reach the current pool from this insn, or if we can't
11473 insert this entry at the end of the pool without pushing other
11474 fixes out of range, then we don't try. This ensures that we
11475 can't fail later on. */
11476 if (min_address >= minipool_barrier->address
11477 || (minipool_vector_tail->min_address + fix->fix_size
11478 >= minipool_barrier->address))
11481 /* Scan the pool to see if a constant with the same value has
11482 already been added. While we are doing this, also note the
11483 location where we must insert the constant if it doesn't already
11485 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11487 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11488 && fix->mode == mp->mode
11489 && (GET_CODE (fix->value) != CODE_LABEL
11490 || (CODE_LABEL_NUMBER (fix->value)
11491 == CODE_LABEL_NUMBER (mp->value)))
11492 && rtx_equal_p (fix->value, mp->value)
11493 /* Check that there is enough slack to move this entry to the
11494 end of the table (this is conservative). */
11495 && (mp->max_address
11496 > (minipool_barrier->address
11497 + minipool_vector_tail->offset
11498 + minipool_vector_tail->fix_size)))
11501 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11504 if (min_mp != NULL)
11505 mp->min_address += fix->fix_size;
11508 /* Note the insertion point if necessary. */
11509 if (mp->min_address < min_address)
11511 /* For now, we do not allow the insertion of 8-byte alignment
11512 requiring nodes anywhere but at the start of the pool. */
11513 if (ARM_DOUBLEWORD_ALIGN
11514 && fix->fix_size >= 8 && mp->fix_size < 8)
11519 else if (mp->max_address
11520 < minipool_barrier->address + mp->offset + fix->fix_size)
11522 /* Inserting before this entry would push the fix beyond
11523 its maximum address (which can happen if we have
11524 re-located a forwards fix); force the new fix to come
11526 if (ARM_DOUBLEWORD_ALIGN
11527 && fix->fix_size >= 8 && mp->fix_size < 8)
11532 min_address = mp->min_address + fix->fix_size;
11535 /* Do not insert a non-8-byte aligned quantity before 8-byte
11536 aligned quantities. */
11537 else if (ARM_DOUBLEWORD_ALIGN
11538 && fix->fix_size < 8
11539 && mp->fix_size >= 8)
11542 min_address = mp->min_address + fix->fix_size;
11547 /* We need to create a new entry. */
11549 mp->fix_size = fix->fix_size;
11550 mp->mode = fix->mode;
11551 mp->value = fix->value;
11553 mp->max_address = minipool_barrier->address + 65536;
11555 mp->min_address = min_address;
11557 if (min_mp == NULL)
11560 mp->next = minipool_vector_head;
11562 if (mp->next == NULL)
11564 minipool_vector_tail = mp;
11565 minipool_vector_label = gen_label_rtx ();
11568 mp->next->prev = mp;
11570 minipool_vector_head = mp;
11574 mp->next = min_mp->next;
11578 if (mp->next != NULL)
11579 mp->next->prev = mp;
11581 minipool_vector_tail = mp;
11584 /* Save the new entry. */
11592 /* Scan over the following entries and adjust their offsets. */
11593 while (mp->next != NULL)
11595 if (mp->next->min_address < mp->min_address + mp->fix_size)
11596 mp->next->min_address = mp->min_address + mp->fix_size;
11599 mp->next->offset = mp->offset + mp->fix_size;
11601 mp->next->offset = mp->offset;
11610 assign_minipool_offsets (Mfix *barrier)
11612 HOST_WIDE_INT offset = 0;
11615 minipool_barrier = barrier;
11617 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11619 mp->offset = offset;
11621 if (mp->refcount > 0)
11622 offset += mp->fix_size;
11626 /* Output the literal table */
11628 dump_minipool (rtx scan)
11634 if (ARM_DOUBLEWORD_ALIGN)
11635 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11636 if (mp->refcount > 0 && mp->fix_size >= 8)
11643 fprintf (dump_file,
11644 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11645 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11647 scan = emit_label_after (gen_label_rtx (), scan);
11648 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11649 scan = emit_label_after (minipool_vector_label, scan);
11651 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11653 if (mp->refcount > 0)
11657 fprintf (dump_file,
11658 ";; Offset %u, min %ld, max %ld ",
11659 (unsigned) mp->offset, (unsigned long) mp->min_address,
11660 (unsigned long) mp->max_address);
11661 arm_print_value (dump_file, mp->value);
11662 fputc ('\n', dump_file);
11665 switch (mp->fix_size)
11667 #ifdef HAVE_consttable_1
11669 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11673 #ifdef HAVE_consttable_2
11675 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11679 #ifdef HAVE_consttable_4
11681 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11685 #ifdef HAVE_consttable_8
11687 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11691 #ifdef HAVE_consttable_16
11693 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11698 gcc_unreachable ();
11706 minipool_vector_head = minipool_vector_tail = NULL;
11707 scan = emit_insn_after (gen_consttable_end (), scan);
11708 scan = emit_barrier_after (scan);
11711 /* Return the cost of forcibly inserting a barrier after INSN. */
11713 arm_barrier_cost (rtx insn)
11715 /* Basing the location of the pool on the loop depth is preferable,
11716 but at the moment, the basic block information seems to be
11717 corrupt by this stage of the compilation. */
11718 int base_cost = 50;
11719 rtx next = next_nonnote_insn (insn);
11721 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11724 switch (GET_CODE (insn))
11727 /* It will always be better to place the table before the label, rather
11736 return base_cost - 10;
11739 return base_cost + 10;
11743 /* Find the best place in the insn stream in the range
11744 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11745 Create the barrier by inserting a jump and add a new fix entry for
11748 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11750 HOST_WIDE_INT count = 0;
11752 rtx from = fix->insn;
11753 /* The instruction after which we will insert the jump. */
11754 rtx selected = NULL;
11756 /* The address at which the jump instruction will be placed. */
11757 HOST_WIDE_INT selected_address;
11759 HOST_WIDE_INT max_count = max_address - fix->address;
11760 rtx label = gen_label_rtx ();
11762 selected_cost = arm_barrier_cost (from);
11763 selected_address = fix->address;
11765 while (from && count < max_count)
11770 /* This code shouldn't have been called if there was a natural barrier
11772 gcc_assert (GET_CODE (from) != BARRIER);
11774 /* Count the length of this insn. */
11775 count += get_attr_length (from);
11777 /* If there is a jump table, add its length. */
11778 tmp = is_jump_table (from);
11781 count += get_jump_table_size (tmp);
11783 /* Jump tables aren't in a basic block, so base the cost on
11784 the dispatch insn. If we select this location, we will
11785 still put the pool after the table. */
11786 new_cost = arm_barrier_cost (from);
11788 if (count < max_count
11789 && (!selected || new_cost <= selected_cost))
11792 selected_cost = new_cost;
11793 selected_address = fix->address + count;
11796 /* Continue after the dispatch table. */
11797 from = NEXT_INSN (tmp);
11801 new_cost = arm_barrier_cost (from);
11803 if (count < max_count
11804 && (!selected || new_cost <= selected_cost))
11807 selected_cost = new_cost;
11808 selected_address = fix->address + count;
11811 from = NEXT_INSN (from);
11814 /* Make sure that we found a place to insert the jump. */
11815 gcc_assert (selected);
11817 /* Create a new JUMP_INSN that branches around a barrier. */
11818 from = emit_jump_insn_after (gen_jump (label), selected);
11819 JUMP_LABEL (from) = label;
11820 barrier = emit_barrier_after (from);
11821 emit_label_after (label, barrier);
11823 /* Create a minipool barrier entry for the new barrier. */
11824 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11825 new_fix->insn = barrier;
11826 new_fix->address = selected_address;
11827 new_fix->next = fix->next;
11828 fix->next = new_fix;
11833 /* Record that there is a natural barrier in the insn stream at
11836 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11838 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11841 fix->address = address;
11844 if (minipool_fix_head != NULL)
11845 minipool_fix_tail->next = fix;
11847 minipool_fix_head = fix;
11849 minipool_fix_tail = fix;
11852 /* Record INSN, which will need fixing up to load a value from the
11853 minipool. ADDRESS is the offset of the insn since the start of the
11854 function; LOC is a pointer to the part of the insn which requires
11855 fixing; VALUE is the constant that must be loaded, which is of type
11858 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11859 enum machine_mode mode, rtx value)
11861 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11864 fix->address = address;
11867 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11868 fix->value = value;
11869 fix->forwards = get_attr_pool_range (insn);
11870 fix->backwards = get_attr_neg_pool_range (insn);
11871 fix->minipool = NULL;
11873 /* If an insn doesn't have a range defined for it, then it isn't
11874 expecting to be reworked by this code. Better to stop now than
11875 to generate duff assembly code. */
11876 gcc_assert (fix->forwards || fix->backwards);
11878 /* If an entry requires 8-byte alignment then assume all constant pools
11879 require 4 bytes of padding. Trying to do this later on a per-pool
11880 basis is awkward because existing pool entries have to be modified. */
11881 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11886 fprintf (dump_file,
11887 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11888 GET_MODE_NAME (mode),
11889 INSN_UID (insn), (unsigned long) address,
11890 -1 * (long)fix->backwards, (long)fix->forwards);
11891 arm_print_value (dump_file, fix->value);
11892 fprintf (dump_file, "\n");
11895 /* Add it to the chain of fixes. */
11898 if (minipool_fix_head != NULL)
11899 minipool_fix_tail->next = fix;
11901 minipool_fix_head = fix;
11903 minipool_fix_tail = fix;
11906 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11907 Returns the number of insns needed, or 99 if we don't know how to
11910 arm_const_double_inline_cost (rtx val)
11912 rtx lowpart, highpart;
11913 enum machine_mode mode;
11915 mode = GET_MODE (val);
11917 if (mode == VOIDmode)
11920 gcc_assert (GET_MODE_SIZE (mode) == 8);
11922 lowpart = gen_lowpart (SImode, val);
11923 highpart = gen_highpart_mode (SImode, mode, val);
11925 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11926 gcc_assert (GET_CODE (highpart) == CONST_INT);
11928 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11929 NULL_RTX, NULL_RTX, 0, 0)
11930 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11931 NULL_RTX, NULL_RTX, 0, 0));
11934 /* Return true if it is worthwhile to split a 64-bit constant into two
11935 32-bit operations. This is the case if optimizing for size, or
11936 if we have load delay slots, or if one 32-bit part can be done with
11937 a single data operation. */
11939 arm_const_double_by_parts (rtx val)
11941 enum machine_mode mode = GET_MODE (val);
11944 if (optimize_size || arm_ld_sched)
11947 if (mode == VOIDmode)
11950 part = gen_highpart_mode (SImode, mode, val);
11952 gcc_assert (GET_CODE (part) == CONST_INT);
11954 if (const_ok_for_arm (INTVAL (part))
11955 || const_ok_for_arm (~INTVAL (part)))
11958 part = gen_lowpart (SImode, val);
11960 gcc_assert (GET_CODE (part) == CONST_INT);
11962 if (const_ok_for_arm (INTVAL (part))
11963 || const_ok_for_arm (~INTVAL (part)))
11969 /* Return true if it is possible to inline both the high and low parts
11970 of a 64-bit constant into 32-bit data processing instructions. */
11972 arm_const_double_by_immediates (rtx val)
11974 enum machine_mode mode = GET_MODE (val);
11977 if (mode == VOIDmode)
11980 part = gen_highpart_mode (SImode, mode, val);
11982 gcc_assert (GET_CODE (part) == CONST_INT);
11984 if (!const_ok_for_arm (INTVAL (part)))
11987 part = gen_lowpart (SImode, val);
11989 gcc_assert (GET_CODE (part) == CONST_INT);
11991 if (!const_ok_for_arm (INTVAL (part)))
11997 /* Scan INSN and note any of its operands that need fixing.
11998 If DO_PUSHES is false we do not actually push any of the fixups
11999 needed. The function returns TRUE if any fixups were needed/pushed.
12000 This is used by arm_memory_load_p() which needs to know about loads
12001 of constants that will be converted into minipool loads. */
12003 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12005 bool result = false;
12008 extract_insn (insn);
12010 if (!constrain_operands (1))
12011 fatal_insn_not_found (insn);
12013 if (recog_data.n_alternatives == 0)
12016 /* Fill in recog_op_alt with information about the constraints of
12018 preprocess_constraints ();
12020 for (opno = 0; opno < recog_data.n_operands; opno++)
12022 /* Things we need to fix can only occur in inputs. */
12023 if (recog_data.operand_type[opno] != OP_IN)
12026 /* If this alternative is a memory reference, then any mention
12027 of constants in this alternative is really to fool reload
12028 into allowing us to accept one there. We need to fix them up
12029 now so that we output the right code. */
12030 if (recog_op_alt[opno][which_alternative].memory_ok)
12032 rtx op = recog_data.operand[opno];
12034 if (CONSTANT_P (op))
12037 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12038 recog_data.operand_mode[opno], op);
12041 else if (GET_CODE (op) == MEM
12042 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12043 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12047 rtx cop = avoid_constant_pool_reference (op);
12049 /* Casting the address of something to a mode narrower
12050 than a word can cause avoid_constant_pool_reference()
12051 to return the pool reference itself. That's no good to
12052 us here. Lets just hope that we can use the
12053 constant pool value directly. */
12055 cop = get_pool_constant (XEXP (op, 0));
12057 push_minipool_fix (insn, address,
12058 recog_data.operand_loc[opno],
12059 recog_data.operand_mode[opno], cop);
12070 /* Convert instructions to their cc-clobbering variant if possible, since
12071 that allows us to use smaller encodings. */
12074 thumb2_reorg (void)
12079 INIT_REG_SET (&live);
12081 /* We are freeing block_for_insn in the toplev to keep compatibility
12082 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12083 compute_bb_for_insn ();
12089 COPY_REG_SET (&live, DF_LR_OUT (bb));
12090 df_simulate_initialize_backwards (bb, &live);
12091 FOR_BB_INSNS_REVERSE (bb, insn)
12093 if (NONJUMP_INSN_P (insn)
12094 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12096 rtx pat = PATTERN (insn);
12097 if (GET_CODE (pat) == SET
12098 && low_register_operand (XEXP (pat, 0), SImode)
12099 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12100 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12101 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12103 rtx dst = XEXP (pat, 0);
12104 rtx src = XEXP (pat, 1);
12105 rtx op0 = XEXP (src, 0);
12106 if (rtx_equal_p (dst, op0)
12107 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12109 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12110 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12111 rtvec vec = gen_rtvec (2, pat, clobber);
12112 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12113 INSN_CODE (insn) = -1;
12117 if (NONDEBUG_INSN_P (insn))
12118 df_simulate_one_insn_backwards (bb, insn, &live);
12121 CLEAR_REG_SET (&live);
12124 /* Gcc puts the pool in the wrong place for ARM, since we can only
12125 load addresses a limited distance around the pc. We do some
12126 special munging to move the constant pool values to the correct
12127 point in the code. */
12132 HOST_WIDE_INT address = 0;
12138 minipool_fix_head = minipool_fix_tail = NULL;
12140 /* The first insn must always be a note, or the code below won't
12141 scan it properly. */
12142 insn = get_insns ();
12143 gcc_assert (GET_CODE (insn) == NOTE);
12146 /* Scan all the insns and record the operands that will need fixing. */
12147 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12149 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12150 && (arm_cirrus_insn_p (insn)
12151 || GET_CODE (insn) == JUMP_INSN
12152 || arm_memory_load_p (insn)))
12153 cirrus_reorg (insn);
12155 if (GET_CODE (insn) == BARRIER)
12156 push_minipool_barrier (insn, address);
12157 else if (INSN_P (insn))
12161 note_invalid_constants (insn, address, true);
12162 address += get_attr_length (insn);
12164 /* If the insn is a vector jump, add the size of the table
12165 and skip the table. */
12166 if ((table = is_jump_table (insn)) != NULL)
12168 address += get_jump_table_size (table);
12174 fix = minipool_fix_head;
12176 /* Now scan the fixups and perform the required changes. */
12181 Mfix * last_added_fix;
12182 Mfix * last_barrier = NULL;
12185 /* Skip any further barriers before the next fix. */
12186 while (fix && GET_CODE (fix->insn) == BARRIER)
12189 /* No more fixes. */
12193 last_added_fix = NULL;
12195 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12197 if (GET_CODE (ftmp->insn) == BARRIER)
12199 if (ftmp->address >= minipool_vector_head->max_address)
12202 last_barrier = ftmp;
12204 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12207 last_added_fix = ftmp; /* Keep track of the last fix added. */
12210 /* If we found a barrier, drop back to that; any fixes that we
12211 could have reached but come after the barrier will now go in
12212 the next mini-pool. */
12213 if (last_barrier != NULL)
12215 /* Reduce the refcount for those fixes that won't go into this
12217 for (fdel = last_barrier->next;
12218 fdel && fdel != ftmp;
12221 fdel->minipool->refcount--;
12222 fdel->minipool = NULL;
12225 ftmp = last_barrier;
12229 /* ftmp is first fix that we can't fit into this pool and
12230 there no natural barriers that we could use. Insert a
12231 new barrier in the code somewhere between the previous
12232 fix and this one, and arrange to jump around it. */
12233 HOST_WIDE_INT max_address;
12235 /* The last item on the list of fixes must be a barrier, so
12236 we can never run off the end of the list of fixes without
12237 last_barrier being set. */
12240 max_address = minipool_vector_head->max_address;
12241 /* Check that there isn't another fix that is in range that
12242 we couldn't fit into this pool because the pool was
12243 already too large: we need to put the pool before such an
12244 instruction. The pool itself may come just after the
12245 fix because create_fix_barrier also allows space for a
12246 jump instruction. */
12247 if (ftmp->address < max_address)
12248 max_address = ftmp->address + 1;
12250 last_barrier = create_fix_barrier (last_added_fix, max_address);
12253 assign_minipool_offsets (last_barrier);
12257 if (GET_CODE (ftmp->insn) != BARRIER
12258 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12265 /* Scan over the fixes we have identified for this pool, fixing them
12266 up and adding the constants to the pool itself. */
12267 for (this_fix = fix; this_fix && ftmp != this_fix;
12268 this_fix = this_fix->next)
12269 if (GET_CODE (this_fix->insn) != BARRIER)
12272 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12273 minipool_vector_label),
12274 this_fix->minipool->offset);
12275 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12278 dump_minipool (last_barrier->insn);
12282 /* From now on we must synthesize any constants that we can't handle
12283 directly. This can happen if the RTL gets split during final
12284 instruction generation. */
12285 after_arm_reorg = 1;
12287 /* Free the minipool memory. */
12288 obstack_free (&minipool_obstack, minipool_startobj);
12291 /* Routines to output assembly language. */
12293 /* If the rtx is the correct value then return the string of the number.
12294 In this way we can ensure that valid double constants are generated even
12295 when cross compiling. */
12297 fp_immediate_constant (rtx x)
12302 if (!fp_consts_inited)
12305 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12306 for (i = 0; i < 8; i++)
12307 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12308 return strings_fp[i];
12310 gcc_unreachable ();
12313 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12314 static const char *
12315 fp_const_from_val (REAL_VALUE_TYPE *r)
12319 if (!fp_consts_inited)
12322 for (i = 0; i < 8; i++)
12323 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12324 return strings_fp[i];
12326 gcc_unreachable ();
12329 /* Output the operands of a LDM/STM instruction to STREAM.
12330 MASK is the ARM register set mask of which only bits 0-15 are important.
12331 REG is the base register, either the frame pointer or the stack pointer,
12332 INSTR is the possibly suffixed load or store instruction.
12333 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12336 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12337 unsigned long mask, int rfe)
12340 bool not_first = FALSE;
12342 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12343 fputc ('\t', stream);
12344 asm_fprintf (stream, instr, reg);
12345 fputc ('{', stream);
12347 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12348 if (mask & (1 << i))
12351 fprintf (stream, ", ");
12353 asm_fprintf (stream, "%r", i);
12358 fprintf (stream, "}^\n");
12360 fprintf (stream, "}\n");
12364 /* Output a FLDMD instruction to STREAM.
12365 BASE if the register containing the address.
12366 REG and COUNT specify the register range.
12367 Extra registers may be added to avoid hardware bugs.
12369 We output FLDMD even for ARMv5 VFP implementations. Although
12370 FLDMD is technically not supported until ARMv6, it is believed
12371 that all VFP implementations support its use in this context. */
12374 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12378 /* Workaround ARM10 VFPr1 bug. */
12379 if (count == 2 && !arm_arch6)
12386 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12387 load into multiple parts if we have to handle more than 16 registers. */
12390 vfp_output_fldmd (stream, base, reg, 16);
12391 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12395 fputc ('\t', stream);
12396 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12398 for (i = reg; i < reg + count; i++)
12401 fputs (", ", stream);
12402 asm_fprintf (stream, "d%d", i);
12404 fputs ("}\n", stream);
12409 /* Output the assembly for a store multiple. */
12412 vfp_output_fstmd (rtx * operands)
12419 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12420 p = strlen (pattern);
12422 gcc_assert (GET_CODE (operands[1]) == REG);
12424 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12425 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12427 p += sprintf (&pattern[p], ", d%d", base + i);
12429 strcpy (&pattern[p], "}");
12431 output_asm_insn (pattern, operands);
12436 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12437 number of bytes pushed. */
12440 vfp_emit_fstmd (int base_reg, int count)
12447 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12448 register pairs are stored by a store multiple insn. We avoid this
12449 by pushing an extra pair. */
12450 if (count == 2 && !arm_arch6)
12452 if (base_reg == LAST_VFP_REGNUM - 3)
12457 /* FSTMD may not store more than 16 doubleword registers at once. Split
12458 larger stores into multiple parts (up to a maximum of two, in
12463 /* NOTE: base_reg is an internal register number, so each D register
12465 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12466 saved += vfp_emit_fstmd (base_reg, 16);
12470 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12471 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12473 reg = gen_rtx_REG (DFmode, base_reg);
12476 XVECEXP (par, 0, 0)
12477 = gen_rtx_SET (VOIDmode,
12480 gen_rtx_PRE_MODIFY (Pmode,
12483 (stack_pointer_rtx,
12486 gen_rtx_UNSPEC (BLKmode,
12487 gen_rtvec (1, reg),
12488 UNSPEC_PUSH_MULT));
12490 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12491 plus_constant (stack_pointer_rtx, -(count * 8)));
12492 RTX_FRAME_RELATED_P (tmp) = 1;
12493 XVECEXP (dwarf, 0, 0) = tmp;
12495 tmp = gen_rtx_SET (VOIDmode,
12496 gen_frame_mem (DFmode, stack_pointer_rtx),
12498 RTX_FRAME_RELATED_P (tmp) = 1;
12499 XVECEXP (dwarf, 0, 1) = tmp;
12501 for (i = 1; i < count; i++)
12503 reg = gen_rtx_REG (DFmode, base_reg);
12505 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12507 tmp = gen_rtx_SET (VOIDmode,
12508 gen_frame_mem (DFmode,
12509 plus_constant (stack_pointer_rtx,
12512 RTX_FRAME_RELATED_P (tmp) = 1;
12513 XVECEXP (dwarf, 0, i + 1) = tmp;
12516 par = emit_insn (par);
12517 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12518 RTX_FRAME_RELATED_P (par) = 1;
12523 /* Emit a call instruction with pattern PAT. ADDR is the address of
12524 the call target. */
12527 arm_emit_call_insn (rtx pat, rtx addr)
12531 insn = emit_call_insn (pat);
12533 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12534 If the call might use such an entry, add a use of the PIC register
12535 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12536 if (TARGET_VXWORKS_RTP
12538 && GET_CODE (addr) == SYMBOL_REF
12539 && (SYMBOL_REF_DECL (addr)
12540 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12541 : !SYMBOL_REF_LOCAL_P (addr)))
12543 require_pic_register ();
12544 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12548 /* Output a 'call' insn. */
12550 output_call (rtx *operands)
12552 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12554 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12555 if (REGNO (operands[0]) == LR_REGNUM)
12557 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12558 output_asm_insn ("mov%?\t%0, %|lr", operands);
12561 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12563 if (TARGET_INTERWORK || arm_arch4t)
12564 output_asm_insn ("bx%?\t%0", operands);
12566 output_asm_insn ("mov%?\t%|pc, %0", operands);
12571 /* Output a 'call' insn that is a reference in memory. This is
12572 disabled for ARMv5 and we prefer a blx instead because otherwise
12573 there's a significant performance overhead. */
12575 output_call_mem (rtx *operands)
12577 gcc_assert (!arm_arch5);
12578 if (TARGET_INTERWORK)
12580 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12581 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12582 output_asm_insn ("bx%?\t%|ip", operands);
12584 else if (regno_use_in (LR_REGNUM, operands[0]))
12586 /* LR is used in the memory address. We load the address in the
12587 first instruction. It's safe to use IP as the target of the
12588 load since the call will kill it anyway. */
12589 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12590 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12592 output_asm_insn ("bx%?\t%|ip", operands);
12594 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12598 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12599 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12606 /* Output a move from arm registers to an fpa registers.
12607 OPERANDS[0] is an fpa register.
12608 OPERANDS[1] is the first registers of an arm register pair. */
12610 output_mov_long_double_fpa_from_arm (rtx *operands)
12612 int arm_reg0 = REGNO (operands[1]);
12615 gcc_assert (arm_reg0 != IP_REGNUM);
12617 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12618 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12619 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12621 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12622 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12627 /* Output a move from an fpa register to arm registers.
12628 OPERANDS[0] is the first registers of an arm register pair.
12629 OPERANDS[1] is an fpa register. */
12631 output_mov_long_double_arm_from_fpa (rtx *operands)
12633 int arm_reg0 = REGNO (operands[0]);
12636 gcc_assert (arm_reg0 != IP_REGNUM);
12638 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12639 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12640 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12642 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12643 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12647 /* Output a move from arm registers to arm registers of a long double
12648 OPERANDS[0] is the destination.
12649 OPERANDS[1] is the source. */
12651 output_mov_long_double_arm_from_arm (rtx *operands)
12653 /* We have to be careful here because the two might overlap. */
12654 int dest_start = REGNO (operands[0]);
12655 int src_start = REGNO (operands[1]);
12659 if (dest_start < src_start)
12661 for (i = 0; i < 3; i++)
12663 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12664 ops[1] = gen_rtx_REG (SImode, src_start + i);
12665 output_asm_insn ("mov%?\t%0, %1", ops);
12670 for (i = 2; i >= 0; i--)
12672 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12673 ops[1] = gen_rtx_REG (SImode, src_start + i);
12674 output_asm_insn ("mov%?\t%0, %1", ops);
12682 arm_emit_movpair (rtx dest, rtx src)
12684 /* If the src is an immediate, simplify it. */
12685 if (CONST_INT_P (src))
12687 HOST_WIDE_INT val = INTVAL (src);
12688 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12689 if ((val >> 16) & 0x0000ffff)
12690 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12692 GEN_INT ((val >> 16) & 0x0000ffff));
12695 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12696 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12699 /* Output a move from arm registers to an fpa registers.
12700 OPERANDS[0] is an fpa register.
12701 OPERANDS[1] is the first registers of an arm register pair. */
12703 output_mov_double_fpa_from_arm (rtx *operands)
12705 int arm_reg0 = REGNO (operands[1]);
12708 gcc_assert (arm_reg0 != IP_REGNUM);
12710 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12711 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12712 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12713 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12717 /* Output a move from an fpa register to arm registers.
12718 OPERANDS[0] is the first registers of an arm register pair.
12719 OPERANDS[1] is an fpa register. */
12721 output_mov_double_arm_from_fpa (rtx *operands)
12723 int arm_reg0 = REGNO (operands[0]);
12726 gcc_assert (arm_reg0 != IP_REGNUM);
12728 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12729 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12730 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12731 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12735 /* Output a move between double words.
12736 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12737 or MEM<-REG and all MEMs must be offsettable addresses. */
12739 output_move_double (rtx *operands)
12741 enum rtx_code code0 = GET_CODE (operands[0]);
12742 enum rtx_code code1 = GET_CODE (operands[1]);
12747 unsigned int reg0 = REGNO (operands[0]);
12749 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12751 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12753 switch (GET_CODE (XEXP (operands[1], 0)))
12757 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12758 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12760 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12764 gcc_assert (TARGET_LDRD);
12765 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12770 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12772 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12777 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12779 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12783 gcc_assert (TARGET_LDRD);
12784 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12789 /* Autoicrement addressing modes should never have overlapping
12790 base and destination registers, and overlapping index registers
12791 are already prohibited, so this doesn't need to worry about
12793 otherops[0] = operands[0];
12794 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12795 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12797 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12799 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12801 /* Registers overlap so split out the increment. */
12802 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12803 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12807 /* Use a single insn if we can.
12808 FIXME: IWMMXT allows offsets larger than ldrd can
12809 handle, fix these up with a pair of ldr. */
12811 || GET_CODE (otherops[2]) != CONST_INT
12812 || (INTVAL (otherops[2]) > -256
12813 && INTVAL (otherops[2]) < 256))
12814 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12817 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12818 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12824 /* Use a single insn if we can.
12825 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12826 fix these up with a pair of ldr. */
12828 || GET_CODE (otherops[2]) != CONST_INT
12829 || (INTVAL (otherops[2]) > -256
12830 && INTVAL (otherops[2]) < 256))
12831 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12834 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12835 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12842 /* We might be able to use ldrd %0, %1 here. However the range is
12843 different to ldr/adr, and it is broken on some ARMv7-M
12844 implementations. */
12845 /* Use the second register of the pair to avoid problematic
12847 otherops[1] = operands[1];
12848 output_asm_insn ("adr%?\t%0, %1", otherops);
12849 operands[1] = otherops[0];
12851 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12853 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12856 /* ??? This needs checking for thumb2. */
12858 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12859 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12861 otherops[0] = operands[0];
12862 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12863 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12865 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12867 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12869 switch ((int) INTVAL (otherops[2]))
12872 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12877 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12882 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12886 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12887 operands[1] = otherops[0];
12889 && (GET_CODE (otherops[2]) == REG
12891 || (GET_CODE (otherops[2]) == CONST_INT
12892 && INTVAL (otherops[2]) > -256
12893 && INTVAL (otherops[2]) < 256)))
12895 if (reg_overlap_mentioned_p (operands[0],
12899 /* Swap base and index registers over to
12900 avoid a conflict. */
12902 otherops[1] = otherops[2];
12905 /* If both registers conflict, it will usually
12906 have been fixed by a splitter. */
12907 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12908 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12910 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12911 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12915 otherops[0] = operands[0];
12916 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12921 if (GET_CODE (otherops[2]) == CONST_INT)
12923 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12924 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12926 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12929 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12932 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12935 return "ldr%(d%)\t%0, [%1]";
12937 return "ldm%(ia%)\t%1, %M0";
12941 otherops[1] = adjust_address (operands[1], SImode, 4);
12942 /* Take care of overlapping base/data reg. */
12943 if (reg_mentioned_p (operands[0], operands[1]))
12945 output_asm_insn ("ldr%?\t%0, %1", otherops);
12946 output_asm_insn ("ldr%?\t%0, %1", operands);
12950 output_asm_insn ("ldr%?\t%0, %1", operands);
12951 output_asm_insn ("ldr%?\t%0, %1", otherops);
12958 /* Constraints should ensure this. */
12959 gcc_assert (code0 == MEM && code1 == REG);
12960 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12962 switch (GET_CODE (XEXP (operands[0], 0)))
12966 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12968 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12972 gcc_assert (TARGET_LDRD);
12973 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12978 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12980 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12985 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12987 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12991 gcc_assert (TARGET_LDRD);
12992 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12997 otherops[0] = operands[1];
12998 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12999 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13001 /* IWMMXT allows offsets larger than ldrd can handle,
13002 fix these up with a pair of ldr. */
13004 && GET_CODE (otherops[2]) == CONST_INT
13005 && (INTVAL(otherops[2]) <= -256
13006 || INTVAL(otherops[2]) >= 256))
13008 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13010 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13011 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13015 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13016 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13019 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13020 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13022 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13026 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13027 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13029 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13032 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13038 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13044 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13049 && (GET_CODE (otherops[2]) == REG
13051 || (GET_CODE (otherops[2]) == CONST_INT
13052 && INTVAL (otherops[2]) > -256
13053 && INTVAL (otherops[2]) < 256)))
13055 otherops[0] = operands[1];
13056 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13057 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13063 otherops[0] = adjust_address (operands[0], SImode, 4);
13064 otherops[1] = operands[1];
13065 output_asm_insn ("str%?\t%1, %0", operands);
13066 output_asm_insn ("str%?\t%H1, %0", otherops);
13073 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13074 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13077 output_move_quad (rtx *operands)
13079 if (REG_P (operands[0]))
13081 /* Load, or reg->reg move. */
13083 if (MEM_P (operands[1]))
13085 switch (GET_CODE (XEXP (operands[1], 0)))
13088 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13093 output_asm_insn ("adr%?\t%0, %1", operands);
13094 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13098 gcc_unreachable ();
13106 gcc_assert (REG_P (operands[1]));
13108 dest = REGNO (operands[0]);
13109 src = REGNO (operands[1]);
13111 /* This seems pretty dumb, but hopefully GCC won't try to do it
13114 for (i = 0; i < 4; i++)
13116 ops[0] = gen_rtx_REG (SImode, dest + i);
13117 ops[1] = gen_rtx_REG (SImode, src + i);
13118 output_asm_insn ("mov%?\t%0, %1", ops);
13121 for (i = 3; i >= 0; i--)
13123 ops[0] = gen_rtx_REG (SImode, dest + i);
13124 ops[1] = gen_rtx_REG (SImode, src + i);
13125 output_asm_insn ("mov%?\t%0, %1", ops);
13131 gcc_assert (MEM_P (operands[0]));
13132 gcc_assert (REG_P (operands[1]));
13133 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13135 switch (GET_CODE (XEXP (operands[0], 0)))
13138 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13142 gcc_unreachable ();
13149 /* Output a VFP load or store instruction. */
13152 output_move_vfp (rtx *operands)
13154 rtx reg, mem, addr, ops[2];
13155 int load = REG_P (operands[0]);
13156 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13157 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13160 enum machine_mode mode;
13162 reg = operands[!load];
13163 mem = operands[load];
13165 mode = GET_MODE (reg);
13167 gcc_assert (REG_P (reg));
13168 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13169 gcc_assert (mode == SFmode
13173 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13174 gcc_assert (MEM_P (mem));
13176 addr = XEXP (mem, 0);
13178 switch (GET_CODE (addr))
13181 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13182 ops[0] = XEXP (addr, 0);
13187 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13188 ops[0] = XEXP (addr, 0);
13193 templ = "f%s%c%%?\t%%%s0, %%1%s";
13199 sprintf (buff, templ,
13200 load ? "ld" : "st",
13203 integer_p ? "\t%@ int" : "");
13204 output_asm_insn (buff, ops);
13209 /* Output a Neon quad-word load or store, or a load or store for
13210 larger structure modes.
13212 WARNING: The ordering of elements is weird in big-endian mode,
13213 because we use VSTM, as required by the EABI. GCC RTL defines
13214 element ordering based on in-memory order. This can be differ
13215 from the architectural ordering of elements within a NEON register.
13216 The intrinsics defined in arm_neon.h use the NEON register element
13217 ordering, not the GCC RTL element ordering.
13219 For example, the in-memory ordering of a big-endian a quadword
13220 vector with 16-bit elements when stored from register pair {d0,d1}
13221 will be (lowest address first, d0[N] is NEON register element N):
13223 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13225 When necessary, quadword registers (dN, dN+1) are moved to ARM
13226 registers from rN in the order:
13228 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13230 So that STM/LDM can be used on vectors in ARM registers, and the
13231 same memory layout will result as if VSTM/VLDM were used. */
13234 output_move_neon (rtx *operands)
13236 rtx reg, mem, addr, ops[2];
13237 int regno, load = REG_P (operands[0]);
13240 enum machine_mode mode;
13242 reg = operands[!load];
13243 mem = operands[load];
13245 mode = GET_MODE (reg);
13247 gcc_assert (REG_P (reg));
13248 regno = REGNO (reg);
13249 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13250 || NEON_REGNO_OK_FOR_QUAD (regno));
13251 gcc_assert (VALID_NEON_DREG_MODE (mode)
13252 || VALID_NEON_QREG_MODE (mode)
13253 || VALID_NEON_STRUCT_MODE (mode));
13254 gcc_assert (MEM_P (mem));
13256 addr = XEXP (mem, 0);
13258 /* Strip off const from addresses like (const (plus (...))). */
13259 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13260 addr = XEXP (addr, 0);
13262 switch (GET_CODE (addr))
13265 templ = "v%smia%%?\t%%0!, %%h1";
13266 ops[0] = XEXP (addr, 0);
13271 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13272 templ = "v%smdb%%?\t%%0!, %%h1";
13273 ops[0] = XEXP (addr, 0);
13278 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13279 gcc_unreachable ();
13284 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13287 for (i = 0; i < nregs; i++)
13289 /* We're only using DImode here because it's a convenient size. */
13290 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13291 ops[1] = adjust_address (mem, DImode, 8 * i);
13292 if (reg_overlap_mentioned_p (ops[0], mem))
13294 gcc_assert (overlap == -1);
13299 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13300 output_asm_insn (buff, ops);
13305 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13306 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13307 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13308 output_asm_insn (buff, ops);
13315 templ = "v%smia%%?\t%%m0, %%h1";
13320 sprintf (buff, templ, load ? "ld" : "st");
13321 output_asm_insn (buff, ops);
13326 /* Compute and return the length of neon_mov<mode>, where <mode> is
13327 one of VSTRUCT modes: EI, OI, CI or XI. */
13329 arm_attr_length_move_neon (rtx insn)
13331 rtx reg, mem, addr;
13333 enum machine_mode mode;
13335 extract_insn_cached (insn);
13337 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13339 mode = GET_MODE (recog_data.operand[0]);
13350 gcc_unreachable ();
13354 load = REG_P (recog_data.operand[0]);
13355 reg = recog_data.operand[!load];
13356 mem = recog_data.operand[load];
13358 gcc_assert (MEM_P (mem));
13360 mode = GET_MODE (reg);
13361 addr = XEXP (mem, 0);
13363 /* Strip off const from addresses like (const (plus (...))). */
13364 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13365 addr = XEXP (addr, 0);
13367 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13369 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13376 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13380 arm_address_offset_is_imm (rtx insn)
13384 extract_insn_cached (insn);
13386 if (REG_P (recog_data.operand[0]))
13389 mem = recog_data.operand[0];
13391 gcc_assert (MEM_P (mem));
13393 addr = XEXP (mem, 0);
13395 if (GET_CODE (addr) == REG
13396 || (GET_CODE (addr) == PLUS
13397 && GET_CODE (XEXP (addr, 0)) == REG
13398 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13404 /* Output an ADD r, s, #n where n may be too big for one instruction.
13405 If adding zero to one register, output nothing. */
13407 output_add_immediate (rtx *operands)
13409 HOST_WIDE_INT n = INTVAL (operands[2]);
13411 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13414 output_multi_immediate (operands,
13415 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13418 output_multi_immediate (operands,
13419 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13426 /* Output a multiple immediate operation.
13427 OPERANDS is the vector of operands referred to in the output patterns.
13428 INSTR1 is the output pattern to use for the first constant.
13429 INSTR2 is the output pattern to use for subsequent constants.
13430 IMMED_OP is the index of the constant slot in OPERANDS.
13431 N is the constant value. */
13432 static const char *
13433 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13434 int immed_op, HOST_WIDE_INT n)
13436 #if HOST_BITS_PER_WIDE_INT > 32
13442 /* Quick and easy output. */
13443 operands[immed_op] = const0_rtx;
13444 output_asm_insn (instr1, operands);
13449 const char * instr = instr1;
13451 /* Note that n is never zero here (which would give no output). */
13452 for (i = 0; i < 32; i += 2)
13456 operands[immed_op] = GEN_INT (n & (255 << i));
13457 output_asm_insn (instr, operands);
13467 /* Return the name of a shifter operation. */
13468 static const char *
13469 arm_shift_nmem(enum rtx_code code)
13474 return ARM_LSL_NAME;
13490 /* Return the appropriate ARM instruction for the operation code.
13491 The returned result should not be overwritten. OP is the rtx of the
13492 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13495 arithmetic_instr (rtx op, int shift_first_arg)
13497 switch (GET_CODE (op))
13503 return shift_first_arg ? "rsb" : "sub";
13518 return arm_shift_nmem(GET_CODE(op));
13521 gcc_unreachable ();
13525 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13526 for the operation code. The returned result should not be overwritten.
13527 OP is the rtx code of the shift.
13528 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13530 static const char *
13531 shift_op (rtx op, HOST_WIDE_INT *amountp)
13534 enum rtx_code code = GET_CODE (op);
13536 switch (GET_CODE (XEXP (op, 1)))
13544 *amountp = INTVAL (XEXP (op, 1));
13548 gcc_unreachable ();
13554 gcc_assert (*amountp != -1);
13555 *amountp = 32 - *amountp;
13558 /* Fall through. */
13564 mnem = arm_shift_nmem(code);
13568 /* We never have to worry about the amount being other than a
13569 power of 2, since this case can never be reloaded from a reg. */
13570 gcc_assert (*amountp != -1);
13571 *amountp = int_log2 (*amountp);
13572 return ARM_LSL_NAME;
13575 gcc_unreachable ();
13578 if (*amountp != -1)
13580 /* This is not 100% correct, but follows from the desire to merge
13581 multiplication by a power of 2 with the recognizer for a
13582 shift. >=32 is not a valid shift for "lsl", so we must try and
13583 output a shift that produces the correct arithmetical result.
13584 Using lsr #32 is identical except for the fact that the carry bit
13585 is not set correctly if we set the flags; but we never use the
13586 carry bit from such an operation, so we can ignore that. */
13587 if (code == ROTATERT)
13588 /* Rotate is just modulo 32. */
13590 else if (*amountp != (*amountp & 31))
13592 if (code == ASHIFT)
13597 /* Shifts of 0 are no-ops. */
13605 /* Obtain the shift from the POWER of two. */
13607 static HOST_WIDE_INT
13608 int_log2 (HOST_WIDE_INT power)
13610 HOST_WIDE_INT shift = 0;
13612 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13614 gcc_assert (shift <= 31);
13621 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13622 because /bin/as is horribly restrictive. The judgement about
13623 whether or not each character is 'printable' (and can be output as
13624 is) or not (and must be printed with an octal escape) must be made
13625 with reference to the *host* character set -- the situation is
13626 similar to that discussed in the comments above pp_c_char in
13627 c-pretty-print.c. */
13629 #define MAX_ASCII_LEN 51
13632 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13635 int len_so_far = 0;
13637 fputs ("\t.ascii\t\"", stream);
13639 for (i = 0; i < len; i++)
13643 if (len_so_far >= MAX_ASCII_LEN)
13645 fputs ("\"\n\t.ascii\t\"", stream);
13651 if (c == '\\' || c == '\"')
13653 putc ('\\', stream);
13661 fprintf (stream, "\\%03o", c);
13666 fputs ("\"\n", stream);
13669 /* Compute the register save mask for registers 0 through 12
13670 inclusive. This code is used by arm_compute_save_reg_mask. */
13672 static unsigned long
13673 arm_compute_save_reg0_reg12_mask (void)
13675 unsigned long func_type = arm_current_func_type ();
13676 unsigned long save_reg_mask = 0;
13679 if (IS_INTERRUPT (func_type))
13681 unsigned int max_reg;
13682 /* Interrupt functions must not corrupt any registers,
13683 even call clobbered ones. If this is a leaf function
13684 we can just examine the registers used by the RTL, but
13685 otherwise we have to assume that whatever function is
13686 called might clobber anything, and so we have to save
13687 all the call-clobbered registers as well. */
13688 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13689 /* FIQ handlers have registers r8 - r12 banked, so
13690 we only need to check r0 - r7, Normal ISRs only
13691 bank r14 and r15, so we must check up to r12.
13692 r13 is the stack pointer which is always preserved,
13693 so we do not need to consider it here. */
13698 for (reg = 0; reg <= max_reg; reg++)
13699 if (df_regs_ever_live_p (reg)
13700 || (! current_function_is_leaf && call_used_regs[reg]))
13701 save_reg_mask |= (1 << reg);
13703 /* Also save the pic base register if necessary. */
13705 && !TARGET_SINGLE_PIC_BASE
13706 && arm_pic_register != INVALID_REGNUM
13707 && crtl->uses_pic_offset_table)
13708 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13710 else if (IS_VOLATILE(func_type))
13712 /* For noreturn functions we historically omitted register saves
13713 altogether. However this really messes up debugging. As a
13714 compromise save just the frame pointers. Combined with the link
13715 register saved elsewhere this should be sufficient to get
13717 if (frame_pointer_needed)
13718 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13719 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13720 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13721 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13722 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13726 /* In the normal case we only need to save those registers
13727 which are call saved and which are used by this function. */
13728 for (reg = 0; reg <= 11; reg++)
13729 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13730 save_reg_mask |= (1 << reg);
13732 /* Handle the frame pointer as a special case. */
13733 if (frame_pointer_needed)
13734 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13736 /* If we aren't loading the PIC register,
13737 don't stack it even though it may be live. */
13739 && !TARGET_SINGLE_PIC_BASE
13740 && arm_pic_register != INVALID_REGNUM
13741 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13742 || crtl->uses_pic_offset_table))
13743 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13745 /* The prologue will copy SP into R0, so save it. */
13746 if (IS_STACKALIGN (func_type))
13747 save_reg_mask |= 1;
13750 /* Save registers so the exception handler can modify them. */
13751 if (crtl->calls_eh_return)
13757 reg = EH_RETURN_DATA_REGNO (i);
13758 if (reg == INVALID_REGNUM)
13760 save_reg_mask |= 1 << reg;
13764 return save_reg_mask;
13768 /* Compute the number of bytes used to store the static chain register on the
13769 stack, above the stack frame. We need to know this accurately to get the
13770 alignment of the rest of the stack frame correct. */
13772 static int arm_compute_static_chain_stack_bytes (void)
13774 unsigned long func_type = arm_current_func_type ();
13775 int static_chain_stack_bytes = 0;
13777 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13778 IS_NESTED (func_type) &&
13779 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13780 static_chain_stack_bytes = 4;
13782 return static_chain_stack_bytes;
13786 /* Compute a bit mask of which registers need to be
13787 saved on the stack for the current function.
13788 This is used by arm_get_frame_offsets, which may add extra registers. */
13790 static unsigned long
13791 arm_compute_save_reg_mask (void)
13793 unsigned int save_reg_mask = 0;
13794 unsigned long func_type = arm_current_func_type ();
13797 if (IS_NAKED (func_type))
13798 /* This should never really happen. */
13801 /* If we are creating a stack frame, then we must save the frame pointer,
13802 IP (which will hold the old stack pointer), LR and the PC. */
13803 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13805 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13808 | (1 << PC_REGNUM);
13810 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13812 /* Decide if we need to save the link register.
13813 Interrupt routines have their own banked link register,
13814 so they never need to save it.
13815 Otherwise if we do not use the link register we do not need to save
13816 it. If we are pushing other registers onto the stack however, we
13817 can save an instruction in the epilogue by pushing the link register
13818 now and then popping it back into the PC. This incurs extra memory
13819 accesses though, so we only do it when optimizing for size, and only
13820 if we know that we will not need a fancy return sequence. */
13821 if (df_regs_ever_live_p (LR_REGNUM)
13824 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13825 && !crtl->calls_eh_return))
13826 save_reg_mask |= 1 << LR_REGNUM;
13828 if (cfun->machine->lr_save_eliminated)
13829 save_reg_mask &= ~ (1 << LR_REGNUM);
13831 if (TARGET_REALLY_IWMMXT
13832 && ((bit_count (save_reg_mask)
13833 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13834 arm_compute_static_chain_stack_bytes())
13837 /* The total number of registers that are going to be pushed
13838 onto the stack is odd. We need to ensure that the stack
13839 is 64-bit aligned before we start to save iWMMXt registers,
13840 and also before we start to create locals. (A local variable
13841 might be a double or long long which we will load/store using
13842 an iWMMXt instruction). Therefore we need to push another
13843 ARM register, so that the stack will be 64-bit aligned. We
13844 try to avoid using the arg registers (r0 -r3) as they might be
13845 used to pass values in a tail call. */
13846 for (reg = 4; reg <= 12; reg++)
13847 if ((save_reg_mask & (1 << reg)) == 0)
13851 save_reg_mask |= (1 << reg);
13854 cfun->machine->sibcall_blocked = 1;
13855 save_reg_mask |= (1 << 3);
13859 /* We may need to push an additional register for use initializing the
13860 PIC base register. */
13861 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13862 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13864 reg = thumb_find_work_register (1 << 4);
13865 if (!call_used_regs[reg])
13866 save_reg_mask |= (1 << reg);
13869 return save_reg_mask;
13873 /* Compute a bit mask of which registers need to be
13874 saved on the stack for the current function. */
13875 static unsigned long
13876 thumb1_compute_save_reg_mask (void)
13878 unsigned long mask;
13882 for (reg = 0; reg < 12; reg ++)
13883 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13887 && !TARGET_SINGLE_PIC_BASE
13888 && arm_pic_register != INVALID_REGNUM
13889 && crtl->uses_pic_offset_table)
13890 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13892 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13893 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13894 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13896 /* LR will also be pushed if any lo regs are pushed. */
13897 if (mask & 0xff || thumb_force_lr_save ())
13898 mask |= (1 << LR_REGNUM);
13900 /* Make sure we have a low work register if we need one.
13901 We will need one if we are going to push a high register,
13902 but we are not currently intending to push a low register. */
13903 if ((mask & 0xff) == 0
13904 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13906 /* Use thumb_find_work_register to choose which register
13907 we will use. If the register is live then we will
13908 have to push it. Use LAST_LO_REGNUM as our fallback
13909 choice for the register to select. */
13910 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13911 /* Make sure the register returned by thumb_find_work_register is
13912 not part of the return value. */
13913 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13914 reg = LAST_LO_REGNUM;
13916 if (! call_used_regs[reg])
13920 /* The 504 below is 8 bytes less than 512 because there are two possible
13921 alignment words. We can't tell here if they will be present or not so we
13922 have to play it safe and assume that they are. */
13923 if ((CALLER_INTERWORKING_SLOT_SIZE +
13924 ROUND_UP_WORD (get_frame_size ()) +
13925 crtl->outgoing_args_size) >= 504)
13927 /* This is the same as the code in thumb1_expand_prologue() which
13928 determines which register to use for stack decrement. */
13929 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13930 if (mask & (1 << reg))
13933 if (reg > LAST_LO_REGNUM)
13935 /* Make sure we have a register available for stack decrement. */
13936 mask |= 1 << LAST_LO_REGNUM;
13944 /* Return the number of bytes required to save VFP registers. */
13946 arm_get_vfp_saved_size (void)
13948 unsigned int regno;
13953 /* Space for saved VFP registers. */
13954 if (TARGET_HARD_FLOAT && TARGET_VFP)
13957 for (regno = FIRST_VFP_REGNUM;
13958 regno < LAST_VFP_REGNUM;
13961 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13962 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13966 /* Workaround ARM10 VFPr1 bug. */
13967 if (count == 2 && !arm_arch6)
13969 saved += count * 8;
13978 if (count == 2 && !arm_arch6)
13980 saved += count * 8;
13987 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13988 everything bar the final return instruction. */
13990 output_return_instruction (rtx operand, int really_return, int reverse)
13992 char conditional[10];
13995 unsigned long live_regs_mask;
13996 unsigned long func_type;
13997 arm_stack_offsets *offsets;
13999 func_type = arm_current_func_type ();
14001 if (IS_NAKED (func_type))
14004 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14006 /* If this function was declared non-returning, and we have
14007 found a tail call, then we have to trust that the called
14008 function won't return. */
14013 /* Otherwise, trap an attempted return by aborting. */
14015 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14017 assemble_external_libcall (ops[1]);
14018 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14024 gcc_assert (!cfun->calls_alloca || really_return);
14026 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14028 cfun->machine->return_used_this_function = 1;
14030 offsets = arm_get_frame_offsets ();
14031 live_regs_mask = offsets->saved_regs_mask;
14033 if (live_regs_mask)
14035 const char * return_reg;
14037 /* If we do not have any special requirements for function exit
14038 (e.g. interworking) then we can load the return address
14039 directly into the PC. Otherwise we must load it into LR. */
14041 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14042 return_reg = reg_names[PC_REGNUM];
14044 return_reg = reg_names[LR_REGNUM];
14046 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14048 /* There are three possible reasons for the IP register
14049 being saved. 1) a stack frame was created, in which case
14050 IP contains the old stack pointer, or 2) an ISR routine
14051 corrupted it, or 3) it was saved to align the stack on
14052 iWMMXt. In case 1, restore IP into SP, otherwise just
14054 if (frame_pointer_needed)
14056 live_regs_mask &= ~ (1 << IP_REGNUM);
14057 live_regs_mask |= (1 << SP_REGNUM);
14060 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14063 /* On some ARM architectures it is faster to use LDR rather than
14064 LDM to load a single register. On other architectures, the
14065 cost is the same. In 26 bit mode, or for exception handlers,
14066 we have to use LDM to load the PC so that the CPSR is also
14068 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14069 if (live_regs_mask == (1U << reg))
14072 if (reg <= LAST_ARM_REGNUM
14073 && (reg != LR_REGNUM
14075 || ! IS_INTERRUPT (func_type)))
14077 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14078 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14085 /* Generate the load multiple instruction to restore the
14086 registers. Note we can get here, even if
14087 frame_pointer_needed is true, but only if sp already
14088 points to the base of the saved core registers. */
14089 if (live_regs_mask & (1 << SP_REGNUM))
14091 unsigned HOST_WIDE_INT stack_adjust;
14093 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14094 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14096 if (stack_adjust && arm_arch5 && TARGET_ARM)
14097 if (TARGET_UNIFIED_ASM)
14098 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14100 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14103 /* If we can't use ldmib (SA110 bug),
14104 then try to pop r3 instead. */
14106 live_regs_mask |= 1 << 3;
14108 if (TARGET_UNIFIED_ASM)
14109 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14111 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14115 if (TARGET_UNIFIED_ASM)
14116 sprintf (instr, "pop%s\t{", conditional);
14118 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14120 p = instr + strlen (instr);
14122 for (reg = 0; reg <= SP_REGNUM; reg++)
14123 if (live_regs_mask & (1 << reg))
14125 int l = strlen (reg_names[reg]);
14131 memcpy (p, ", ", 2);
14135 memcpy (p, "%|", 2);
14136 memcpy (p + 2, reg_names[reg], l);
14140 if (live_regs_mask & (1 << LR_REGNUM))
14142 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14143 /* If returning from an interrupt, restore the CPSR. */
14144 if (IS_INTERRUPT (func_type))
14151 output_asm_insn (instr, & operand);
14153 /* See if we need to generate an extra instruction to
14154 perform the actual function return. */
14156 && func_type != ARM_FT_INTERWORKED
14157 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14159 /* The return has already been handled
14160 by loading the LR into the PC. */
14167 switch ((int) ARM_FUNC_TYPE (func_type))
14171 /* ??? This is wrong for unified assembly syntax. */
14172 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14175 case ARM_FT_INTERWORKED:
14176 sprintf (instr, "bx%s\t%%|lr", conditional);
14179 case ARM_FT_EXCEPTION:
14180 /* ??? This is wrong for unified assembly syntax. */
14181 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14185 /* Use bx if it's available. */
14186 if (arm_arch5 || arm_arch4t)
14187 sprintf (instr, "bx%s\t%%|lr", conditional);
14189 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14193 output_asm_insn (instr, & operand);
14199 /* Write the function name into the code section, directly preceding
14200 the function prologue.
14202 Code will be output similar to this:
14204 .ascii "arm_poke_function_name", 0
14207 .word 0xff000000 + (t1 - t0)
14208 arm_poke_function_name
14210 stmfd sp!, {fp, ip, lr, pc}
14213 When performing a stack backtrace, code can inspect the value
14214 of 'pc' stored at 'fp' + 0. If the trace function then looks
14215 at location pc - 12 and the top 8 bits are set, then we know
14216 that there is a function name embedded immediately preceding this
14217 location and has length ((pc[-3]) & 0xff000000).
14219 We assume that pc is declared as a pointer to an unsigned long.
14221 It is of no benefit to output the function name if we are assembling
14222 a leaf function. These function types will not contain a stack
14223 backtrace structure, therefore it is not possible to determine the
14226 arm_poke_function_name (FILE *stream, const char *name)
14228 unsigned long alignlength;
14229 unsigned long length;
14232 length = strlen (name) + 1;
14233 alignlength = ROUND_UP_WORD (length);
14235 ASM_OUTPUT_ASCII (stream, name, length);
14236 ASM_OUTPUT_ALIGN (stream, 2);
14237 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14238 assemble_aligned_integer (UNITS_PER_WORD, x);
14241 /* Place some comments into the assembler stream
14242 describing the current function. */
14244 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14246 unsigned long func_type;
14250 thumb1_output_function_prologue (f, frame_size);
14254 /* Sanity check. */
14255 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14257 func_type = arm_current_func_type ();
14259 switch ((int) ARM_FUNC_TYPE (func_type))
14262 case ARM_FT_NORMAL:
14264 case ARM_FT_INTERWORKED:
14265 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14268 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14271 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14273 case ARM_FT_EXCEPTION:
14274 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14278 if (IS_NAKED (func_type))
14279 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14281 if (IS_VOLATILE (func_type))
14282 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14284 if (IS_NESTED (func_type))
14285 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14286 if (IS_STACKALIGN (func_type))
14287 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14289 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14291 crtl->args.pretend_args_size, frame_size);
14293 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14294 frame_pointer_needed,
14295 cfun->machine->uses_anonymous_args);
14297 if (cfun->machine->lr_save_eliminated)
14298 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14300 if (crtl->calls_eh_return)
14301 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14306 arm_output_epilogue (rtx sibling)
14309 unsigned long saved_regs_mask;
14310 unsigned long func_type;
14311 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14312 frame that is $fp + 4 for a non-variadic function. */
14313 int floats_offset = 0;
14315 FILE * f = asm_out_file;
14316 unsigned int lrm_count = 0;
14317 int really_return = (sibling == NULL);
14319 arm_stack_offsets *offsets;
14321 /* If we have already generated the return instruction
14322 then it is futile to generate anything else. */
14323 if (use_return_insn (FALSE, sibling) &&
14324 (cfun->machine->return_used_this_function != 0))
14327 func_type = arm_current_func_type ();
14329 if (IS_NAKED (func_type))
14330 /* Naked functions don't have epilogues. */
14333 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14337 /* A volatile function should never return. Call abort. */
14338 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14339 assemble_external_libcall (op);
14340 output_asm_insn ("bl\t%a0", &op);
14345 /* If we are throwing an exception, then we really must be doing a
14346 return, so we can't tail-call. */
14347 gcc_assert (!crtl->calls_eh_return || really_return);
14349 offsets = arm_get_frame_offsets ();
14350 saved_regs_mask = offsets->saved_regs_mask;
14353 lrm_count = bit_count (saved_regs_mask);
14355 floats_offset = offsets->saved_args;
14356 /* Compute how far away the floats will be. */
14357 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14358 if (saved_regs_mask & (1 << reg))
14359 floats_offset += 4;
14361 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14363 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14364 int vfp_offset = offsets->frame;
14366 if (TARGET_FPA_EMU2)
14368 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14369 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14371 floats_offset += 12;
14372 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14373 reg, FP_REGNUM, floats_offset - vfp_offset);
14378 start_reg = LAST_FPA_REGNUM;
14380 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14382 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14384 floats_offset += 12;
14386 /* We can't unstack more than four registers at once. */
14387 if (start_reg - reg == 3)
14389 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14390 reg, FP_REGNUM, floats_offset - vfp_offset);
14391 start_reg = reg - 1;
14396 if (reg != start_reg)
14397 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14398 reg + 1, start_reg - reg,
14399 FP_REGNUM, floats_offset - vfp_offset);
14400 start_reg = reg - 1;
14404 /* Just in case the last register checked also needs unstacking. */
14405 if (reg != start_reg)
14406 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14407 reg + 1, start_reg - reg,
14408 FP_REGNUM, floats_offset - vfp_offset);
14411 if (TARGET_HARD_FLOAT && TARGET_VFP)
14415 /* The fldmd insns do not have base+offset addressing
14416 modes, so we use IP to hold the address. */
14417 saved_size = arm_get_vfp_saved_size ();
14419 if (saved_size > 0)
14421 floats_offset += saved_size;
14422 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14423 FP_REGNUM, floats_offset - vfp_offset);
14425 start_reg = FIRST_VFP_REGNUM;
14426 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14428 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14429 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14431 if (start_reg != reg)
14432 vfp_output_fldmd (f, IP_REGNUM,
14433 (start_reg - FIRST_VFP_REGNUM) / 2,
14434 (reg - start_reg) / 2);
14435 start_reg = reg + 2;
14438 if (start_reg != reg)
14439 vfp_output_fldmd (f, IP_REGNUM,
14440 (start_reg - FIRST_VFP_REGNUM) / 2,
14441 (reg - start_reg) / 2);
14446 /* The frame pointer is guaranteed to be non-double-word aligned.
14447 This is because it is set to (old_stack_pointer - 4) and the
14448 old_stack_pointer was double word aligned. Thus the offset to
14449 the iWMMXt registers to be loaded must also be non-double-word
14450 sized, so that the resultant address *is* double-word aligned.
14451 We can ignore floats_offset since that was already included in
14452 the live_regs_mask. */
14453 lrm_count += (lrm_count % 2 ? 2 : 1);
14455 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14456 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14458 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14459 reg, FP_REGNUM, lrm_count * 4);
14464 /* saved_regs_mask should contain the IP, which at the time of stack
14465 frame generation actually contains the old stack pointer. So a
14466 quick way to unwind the stack is just pop the IP register directly
14467 into the stack pointer. */
14468 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14469 saved_regs_mask &= ~ (1 << IP_REGNUM);
14470 saved_regs_mask |= (1 << SP_REGNUM);
14472 /* There are two registers left in saved_regs_mask - LR and PC. We
14473 only need to restore the LR register (the return address), but to
14474 save time we can load it directly into the PC, unless we need a
14475 special function exit sequence, or we are not really returning. */
14477 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14478 && !crtl->calls_eh_return)
14479 /* Delete the LR from the register mask, so that the LR on
14480 the stack is loaded into the PC in the register mask. */
14481 saved_regs_mask &= ~ (1 << LR_REGNUM);
14483 saved_regs_mask &= ~ (1 << PC_REGNUM);
14485 /* We must use SP as the base register, because SP is one of the
14486 registers being restored. If an interrupt or page fault
14487 happens in the ldm instruction, the SP might or might not
14488 have been restored. That would be bad, as then SP will no
14489 longer indicate the safe area of stack, and we can get stack
14490 corruption. Using SP as the base register means that it will
14491 be reset correctly to the original value, should an interrupt
14492 occur. If the stack pointer already points at the right
14493 place, then omit the subtraction. */
14494 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14495 || cfun->calls_alloca)
14496 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14497 4 * bit_count (saved_regs_mask));
14498 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14500 if (IS_INTERRUPT (func_type))
14501 /* Interrupt handlers will have pushed the
14502 IP onto the stack, so restore it now. */
14503 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14507 /* This branch is executed for ARM mode (non-apcs frames) and
14508 Thumb-2 mode. Frame layout is essentially the same for those
14509 cases, except that in ARM mode frame pointer points to the
14510 first saved register, while in Thumb-2 mode the frame pointer points
14511 to the last saved register.
14513 It is possible to make frame pointer point to last saved
14514 register in both cases, and remove some conditionals below.
14515 That means that fp setup in prologue would be just "mov fp, sp"
14516 and sp restore in epilogue would be just "mov sp, fp", whereas
14517 now we have to use add/sub in those cases. However, the value
14518 of that would be marginal, as both mov and add/sub are 32-bit
14519 in ARM mode, and it would require extra conditionals
14520 in arm_expand_prologue to distingish ARM-apcs-frame case
14521 (where frame pointer is required to point at first register)
14522 and ARM-non-apcs-frame. Therefore, such change is postponed
14523 until real need arise. */
14524 unsigned HOST_WIDE_INT amount;
14526 /* Restore stack pointer if necessary. */
14527 if (TARGET_ARM && frame_pointer_needed)
14529 operands[0] = stack_pointer_rtx;
14530 operands[1] = hard_frame_pointer_rtx;
14532 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14533 output_add_immediate (operands);
14537 if (frame_pointer_needed)
14539 /* For Thumb-2 restore sp from the frame pointer.
14540 Operand restrictions mean we have to incrememnt FP, then copy
14542 amount = offsets->locals_base - offsets->saved_regs;
14543 operands[0] = hard_frame_pointer_rtx;
14547 unsigned long count;
14548 operands[0] = stack_pointer_rtx;
14549 amount = offsets->outgoing_args - offsets->saved_regs;
14550 /* pop call clobbered registers if it avoids a
14551 separate stack adjustment. */
14552 count = offsets->saved_regs - offsets->saved_args;
14555 && !crtl->calls_eh_return
14556 && bit_count(saved_regs_mask) * 4 == count
14557 && !IS_INTERRUPT (func_type)
14558 && !crtl->tail_call_emit)
14560 unsigned long mask;
14561 /* Preserve return values, of any size. */
14562 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14564 mask &= ~saved_regs_mask;
14566 while (bit_count (mask) * 4 > amount)
14568 while ((mask & (1 << reg)) == 0)
14570 mask &= ~(1 << reg);
14572 if (bit_count (mask) * 4 == amount) {
14574 saved_regs_mask |= mask;
14581 operands[1] = operands[0];
14582 operands[2] = GEN_INT (amount);
14583 output_add_immediate (operands);
14585 if (frame_pointer_needed)
14586 asm_fprintf (f, "\tmov\t%r, %r\n",
14587 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14590 if (TARGET_FPA_EMU2)
14592 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14593 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14594 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14599 start_reg = FIRST_FPA_REGNUM;
14601 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14603 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14605 if (reg - start_reg == 3)
14607 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14608 start_reg, SP_REGNUM);
14609 start_reg = reg + 1;
14614 if (reg != start_reg)
14615 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14616 start_reg, reg - start_reg,
14619 start_reg = reg + 1;
14623 /* Just in case the last register checked also needs unstacking. */
14624 if (reg != start_reg)
14625 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14626 start_reg, reg - start_reg, SP_REGNUM);
14629 if (TARGET_HARD_FLOAT && TARGET_VFP)
14631 int end_reg = LAST_VFP_REGNUM + 1;
14633 /* Scan the registers in reverse order. We need to match
14634 any groupings made in the prologue and generate matching
14636 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14638 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14639 && (!df_regs_ever_live_p (reg + 1)
14640 || call_used_regs[reg + 1]))
14642 if (end_reg > reg + 2)
14643 vfp_output_fldmd (f, SP_REGNUM,
14644 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14645 (end_reg - (reg + 2)) / 2);
14649 if (end_reg > reg + 2)
14650 vfp_output_fldmd (f, SP_REGNUM, 0,
14651 (end_reg - (reg + 2)) / 2);
14655 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14656 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14657 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14659 /* If we can, restore the LR into the PC. */
14660 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14661 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14662 && !IS_STACKALIGN (func_type)
14664 && crtl->args.pretend_args_size == 0
14665 && saved_regs_mask & (1 << LR_REGNUM)
14666 && !crtl->calls_eh_return)
14668 saved_regs_mask &= ~ (1 << LR_REGNUM);
14669 saved_regs_mask |= (1 << PC_REGNUM);
14670 rfe = IS_INTERRUPT (func_type);
14675 /* Load the registers off the stack. If we only have one register
14676 to load use the LDR instruction - it is faster. For Thumb-2
14677 always use pop and the assembler will pick the best instruction.*/
14678 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14679 && !IS_INTERRUPT(func_type))
14681 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14683 else if (saved_regs_mask)
14685 if (saved_regs_mask & (1 << SP_REGNUM))
14686 /* Note - write back to the stack register is not enabled
14687 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14688 in the list of registers and if we add writeback the
14689 instruction becomes UNPREDICTABLE. */
14690 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14692 else if (TARGET_ARM)
14693 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14696 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14699 if (crtl->args.pretend_args_size)
14701 /* Unwind the pre-pushed regs. */
14702 operands[0] = operands[1] = stack_pointer_rtx;
14703 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14704 output_add_immediate (operands);
14708 /* We may have already restored PC directly from the stack. */
14709 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14712 /* Stack adjustment for exception handler. */
14713 if (crtl->calls_eh_return)
14714 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14715 ARM_EH_STACKADJ_REGNUM);
14717 /* Generate the return instruction. */
14718 switch ((int) ARM_FUNC_TYPE (func_type))
14722 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14725 case ARM_FT_EXCEPTION:
14726 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14729 case ARM_FT_INTERWORKED:
14730 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14734 if (IS_STACKALIGN (func_type))
14736 /* See comment in arm_expand_prologue. */
14737 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14739 if (arm_arch5 || arm_arch4t)
14740 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14742 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14750 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14751 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14753 arm_stack_offsets *offsets;
14759 /* Emit any call-via-reg trampolines that are needed for v4t support
14760 of call_reg and call_value_reg type insns. */
14761 for (regno = 0; regno < LR_REGNUM; regno++)
14763 rtx label = cfun->machine->call_via[regno];
14767 switch_to_section (function_section (current_function_decl));
14768 targetm.asm_out.internal_label (asm_out_file, "L",
14769 CODE_LABEL_NUMBER (label));
14770 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14774 /* ??? Probably not safe to set this here, since it assumes that a
14775 function will be emitted as assembly immediately after we generate
14776 RTL for it. This does not happen for inline functions. */
14777 cfun->machine->return_used_this_function = 0;
14779 else /* TARGET_32BIT */
14781 /* We need to take into account any stack-frame rounding. */
14782 offsets = arm_get_frame_offsets ();
14784 gcc_assert (!use_return_insn (FALSE, NULL)
14785 || (cfun->machine->return_used_this_function != 0)
14786 || offsets->saved_regs == offsets->outgoing_args
14787 || frame_pointer_needed);
14789 /* Reset the ARM-specific per-function variables. */
14790 after_arm_reorg = 0;
14794 /* Generate and emit an insn that we will recognize as a push_multi.
14795 Unfortunately, since this insn does not reflect very well the actual
14796 semantics of the operation, we need to annotate the insn for the benefit
14797 of DWARF2 frame unwind information. */
14799 emit_multi_reg_push (unsigned long mask)
14802 int num_dwarf_regs;
14806 int dwarf_par_index;
14809 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14810 if (mask & (1 << i))
14813 gcc_assert (num_regs && num_regs <= 16);
14815 /* We don't record the PC in the dwarf frame information. */
14816 num_dwarf_regs = num_regs;
14817 if (mask & (1 << PC_REGNUM))
14820 /* For the body of the insn we are going to generate an UNSPEC in
14821 parallel with several USEs. This allows the insn to be recognized
14822 by the push_multi pattern in the arm.md file.
14824 The body of the insn looks something like this:
14827 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14828 (const_int:SI <num>)))
14829 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14835 For the frame note however, we try to be more explicit and actually
14836 show each register being stored into the stack frame, plus a (single)
14837 decrement of the stack pointer. We do it this way in order to be
14838 friendly to the stack unwinding code, which only wants to see a single
14839 stack decrement per instruction. The RTL we generate for the note looks
14840 something like this:
14843 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14844 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14845 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14846 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14850 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14851 instead we'd have a parallel expression detailing all
14852 the stores to the various memory addresses so that debug
14853 information is more up-to-date. Remember however while writing
14854 this to take care of the constraints with the push instruction.
14856 Note also that this has to be taken care of for the VFP registers.
14858 For more see PR43399. */
14860 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14861 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14862 dwarf_par_index = 1;
14864 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14866 if (mask & (1 << i))
14868 reg = gen_rtx_REG (SImode, i);
14870 XVECEXP (par, 0, 0)
14871 = gen_rtx_SET (VOIDmode,
14874 gen_rtx_PRE_MODIFY (Pmode,
14877 (stack_pointer_rtx,
14880 gen_rtx_UNSPEC (BLKmode,
14881 gen_rtvec (1, reg),
14882 UNSPEC_PUSH_MULT));
14884 if (i != PC_REGNUM)
14886 tmp = gen_rtx_SET (VOIDmode,
14887 gen_frame_mem (SImode, stack_pointer_rtx),
14889 RTX_FRAME_RELATED_P (tmp) = 1;
14890 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14898 for (j = 1, i++; j < num_regs; i++)
14900 if (mask & (1 << i))
14902 reg = gen_rtx_REG (SImode, i);
14904 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14906 if (i != PC_REGNUM)
14909 = gen_rtx_SET (VOIDmode,
14912 plus_constant (stack_pointer_rtx,
14915 RTX_FRAME_RELATED_P (tmp) = 1;
14916 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14923 par = emit_insn (par);
14925 tmp = gen_rtx_SET (VOIDmode,
14927 plus_constant (stack_pointer_rtx, -4 * num_regs));
14928 RTX_FRAME_RELATED_P (tmp) = 1;
14929 XVECEXP (dwarf, 0, 0) = tmp;
14931 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14936 /* Calculate the size of the return value that is passed in registers. */
14938 arm_size_return_regs (void)
14940 enum machine_mode mode;
14942 if (crtl->return_rtx != 0)
14943 mode = GET_MODE (crtl->return_rtx);
14945 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14947 return GET_MODE_SIZE (mode);
14951 emit_sfm (int base_reg, int count)
14958 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14959 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14961 reg = gen_rtx_REG (XFmode, base_reg++);
14963 XVECEXP (par, 0, 0)
14964 = gen_rtx_SET (VOIDmode,
14967 gen_rtx_PRE_MODIFY (Pmode,
14970 (stack_pointer_rtx,
14973 gen_rtx_UNSPEC (BLKmode,
14974 gen_rtvec (1, reg),
14975 UNSPEC_PUSH_MULT));
14976 tmp = gen_rtx_SET (VOIDmode,
14977 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14978 RTX_FRAME_RELATED_P (tmp) = 1;
14979 XVECEXP (dwarf, 0, 1) = tmp;
14981 for (i = 1; i < count; i++)
14983 reg = gen_rtx_REG (XFmode, base_reg++);
14984 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14986 tmp = gen_rtx_SET (VOIDmode,
14987 gen_frame_mem (XFmode,
14988 plus_constant (stack_pointer_rtx,
14991 RTX_FRAME_RELATED_P (tmp) = 1;
14992 XVECEXP (dwarf, 0, i + 1) = tmp;
14995 tmp = gen_rtx_SET (VOIDmode,
14997 plus_constant (stack_pointer_rtx, -12 * count));
14999 RTX_FRAME_RELATED_P (tmp) = 1;
15000 XVECEXP (dwarf, 0, 0) = tmp;
15002 par = emit_insn (par);
15003 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15009 /* Return true if the current function needs to save/restore LR. */
15012 thumb_force_lr_save (void)
15014 return !cfun->machine->lr_save_eliminated
15015 && (!leaf_function_p ()
15016 || thumb_far_jump_used_p ()
15017 || df_regs_ever_live_p (LR_REGNUM));
15021 /* Compute the distance from register FROM to register TO.
15022 These can be the arg pointer (26), the soft frame pointer (25),
15023 the stack pointer (13) or the hard frame pointer (11).
15024 In thumb mode r7 is used as the soft frame pointer, if needed.
15025 Typical stack layout looks like this:
15027 old stack pointer -> | |
15030 | | saved arguments for
15031 | | vararg functions
15034 hard FP & arg pointer -> | | \
15042 soft frame pointer -> | | /
15047 locals base pointer -> | | /
15052 current stack pointer -> | | /
15055 For a given function some or all of these stack components
15056 may not be needed, giving rise to the possibility of
15057 eliminating some of the registers.
15059 The values returned by this function must reflect the behavior
15060 of arm_expand_prologue() and arm_compute_save_reg_mask().
15062 The sign of the number returned reflects the direction of stack
15063 growth, so the values are positive for all eliminations except
15064 from the soft frame pointer to the hard frame pointer.
15066 SFP may point just inside the local variables block to ensure correct
15070 /* Calculate stack offsets. These are used to calculate register elimination
15071 offsets and in prologue/epilogue code. Also calculates which registers
15072 should be saved. */
15074 static arm_stack_offsets *
15075 arm_get_frame_offsets (void)
15077 struct arm_stack_offsets *offsets;
15078 unsigned long func_type;
15082 HOST_WIDE_INT frame_size;
15085 offsets = &cfun->machine->stack_offsets;
15087 /* We need to know if we are a leaf function. Unfortunately, it
15088 is possible to be called after start_sequence has been called,
15089 which causes get_insns to return the insns for the sequence,
15090 not the function, which will cause leaf_function_p to return
15091 the incorrect result.
15093 to know about leaf functions once reload has completed, and the
15094 frame size cannot be changed after that time, so we can safely
15095 use the cached value. */
15097 if (reload_completed)
15100 /* Initially this is the size of the local variables. It will translated
15101 into an offset once we have determined the size of preceding data. */
15102 frame_size = ROUND_UP_WORD (get_frame_size ());
15104 leaf = leaf_function_p ();
15106 /* Space for variadic functions. */
15107 offsets->saved_args = crtl->args.pretend_args_size;
15109 /* In Thumb mode this is incorrect, but never used. */
15110 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15111 arm_compute_static_chain_stack_bytes();
15115 unsigned int regno;
15117 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15118 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15119 saved = core_saved;
15121 /* We know that SP will be doubleword aligned on entry, and we must
15122 preserve that condition at any subroutine call. We also require the
15123 soft frame pointer to be doubleword aligned. */
15125 if (TARGET_REALLY_IWMMXT)
15127 /* Check for the call-saved iWMMXt registers. */
15128 for (regno = FIRST_IWMMXT_REGNUM;
15129 regno <= LAST_IWMMXT_REGNUM;
15131 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15135 func_type = arm_current_func_type ();
15136 if (! IS_VOLATILE (func_type))
15138 /* Space for saved FPA registers. */
15139 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15140 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15143 /* Space for saved VFP registers. */
15144 if (TARGET_HARD_FLOAT && TARGET_VFP)
15145 saved += arm_get_vfp_saved_size ();
15148 else /* TARGET_THUMB1 */
15150 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15151 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15152 saved = core_saved;
15153 if (TARGET_BACKTRACE)
15157 /* Saved registers include the stack frame. */
15158 offsets->saved_regs = offsets->saved_args + saved +
15159 arm_compute_static_chain_stack_bytes();
15160 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15161 /* A leaf function does not need any stack alignment if it has nothing
15163 if (leaf && frame_size == 0)
15165 offsets->outgoing_args = offsets->soft_frame;
15166 offsets->locals_base = offsets->soft_frame;
15170 /* Ensure SFP has the correct alignment. */
15171 if (ARM_DOUBLEWORD_ALIGN
15172 && (offsets->soft_frame & 7))
15174 offsets->soft_frame += 4;
15175 /* Try to align stack by pushing an extra reg. Don't bother doing this
15176 when there is a stack frame as the alignment will be rolled into
15177 the normal stack adjustment. */
15178 if (frame_size + crtl->outgoing_args_size == 0)
15182 /* If it is safe to use r3, then do so. This sometimes
15183 generates better code on Thumb-2 by avoiding the need to
15184 use 32-bit push/pop instructions. */
15185 if (!crtl->tail_call_emit
15186 && arm_size_return_regs () <= 12
15187 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15192 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15194 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15203 offsets->saved_regs += 4;
15204 offsets->saved_regs_mask |= (1 << reg);
15209 offsets->locals_base = offsets->soft_frame + frame_size;
15210 offsets->outgoing_args = (offsets->locals_base
15211 + crtl->outgoing_args_size);
15213 if (ARM_DOUBLEWORD_ALIGN)
15215 /* Ensure SP remains doubleword aligned. */
15216 if (offsets->outgoing_args & 7)
15217 offsets->outgoing_args += 4;
15218 gcc_assert (!(offsets->outgoing_args & 7));
15225 /* Calculate the relative offsets for the different stack pointers. Positive
15226 offsets are in the direction of stack growth. */
15229 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15231 arm_stack_offsets *offsets;
15233 offsets = arm_get_frame_offsets ();
15235 /* OK, now we have enough information to compute the distances.
15236 There must be an entry in these switch tables for each pair
15237 of registers in ELIMINABLE_REGS, even if some of the entries
15238 seem to be redundant or useless. */
15241 case ARG_POINTER_REGNUM:
15244 case THUMB_HARD_FRAME_POINTER_REGNUM:
15247 case FRAME_POINTER_REGNUM:
15248 /* This is the reverse of the soft frame pointer
15249 to hard frame pointer elimination below. */
15250 return offsets->soft_frame - offsets->saved_args;
15252 case ARM_HARD_FRAME_POINTER_REGNUM:
15253 /* This is only non-zero in the case where the static chain register
15254 is stored above the frame. */
15255 return offsets->frame - offsets->saved_args - 4;
15257 case STACK_POINTER_REGNUM:
15258 /* If nothing has been pushed on the stack at all
15259 then this will return -4. This *is* correct! */
15260 return offsets->outgoing_args - (offsets->saved_args + 4);
15263 gcc_unreachable ();
15265 gcc_unreachable ();
15267 case FRAME_POINTER_REGNUM:
15270 case THUMB_HARD_FRAME_POINTER_REGNUM:
15273 case ARM_HARD_FRAME_POINTER_REGNUM:
15274 /* The hard frame pointer points to the top entry in the
15275 stack frame. The soft frame pointer to the bottom entry
15276 in the stack frame. If there is no stack frame at all,
15277 then they are identical. */
15279 return offsets->frame - offsets->soft_frame;
15281 case STACK_POINTER_REGNUM:
15282 return offsets->outgoing_args - offsets->soft_frame;
15285 gcc_unreachable ();
15287 gcc_unreachable ();
15290 /* You cannot eliminate from the stack pointer.
15291 In theory you could eliminate from the hard frame
15292 pointer to the stack pointer, but this will never
15293 happen, since if a stack frame is not needed the
15294 hard frame pointer will never be used. */
15295 gcc_unreachable ();
15299 /* Given FROM and TO register numbers, say whether this elimination is
15300 allowed. Frame pointer elimination is automatically handled.
15302 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15303 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15304 pointer, we must eliminate FRAME_POINTER_REGNUM into
15305 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15306 ARG_POINTER_REGNUM. */
15309 arm_can_eliminate (const int from, const int to)
15311 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15312 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15313 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15314 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15318 /* Emit RTL to save coprocessor registers on function entry. Returns the
15319 number of bytes pushed. */
15322 arm_save_coproc_regs(void)
15324 int saved_size = 0;
15326 unsigned start_reg;
15329 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15330 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15332 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15333 insn = gen_rtx_MEM (V2SImode, insn);
15334 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15335 RTX_FRAME_RELATED_P (insn) = 1;
15339 /* Save any floating point call-saved registers used by this
15341 if (TARGET_FPA_EMU2)
15343 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15344 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15346 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15347 insn = gen_rtx_MEM (XFmode, insn);
15348 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15349 RTX_FRAME_RELATED_P (insn) = 1;
15355 start_reg = LAST_FPA_REGNUM;
15357 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15359 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15361 if (start_reg - reg == 3)
15363 insn = emit_sfm (reg, 4);
15364 RTX_FRAME_RELATED_P (insn) = 1;
15366 start_reg = reg - 1;
15371 if (start_reg != reg)
15373 insn = emit_sfm (reg + 1, start_reg - reg);
15374 RTX_FRAME_RELATED_P (insn) = 1;
15375 saved_size += (start_reg - reg) * 12;
15377 start_reg = reg - 1;
15381 if (start_reg != reg)
15383 insn = emit_sfm (reg + 1, start_reg - reg);
15384 saved_size += (start_reg - reg) * 12;
15385 RTX_FRAME_RELATED_P (insn) = 1;
15388 if (TARGET_HARD_FLOAT && TARGET_VFP)
15390 start_reg = FIRST_VFP_REGNUM;
15392 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15394 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15395 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15397 if (start_reg != reg)
15398 saved_size += vfp_emit_fstmd (start_reg,
15399 (reg - start_reg) / 2);
15400 start_reg = reg + 2;
15403 if (start_reg != reg)
15404 saved_size += vfp_emit_fstmd (start_reg,
15405 (reg - start_reg) / 2);
15411 /* Set the Thumb frame pointer from the stack pointer. */
15414 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15416 HOST_WIDE_INT amount;
15419 amount = offsets->outgoing_args - offsets->locals_base;
15421 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15422 stack_pointer_rtx, GEN_INT (amount)));
15425 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15426 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15427 expects the first two operands to be the same. */
15430 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15432 hard_frame_pointer_rtx));
15436 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15437 hard_frame_pointer_rtx,
15438 stack_pointer_rtx));
15440 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15441 plus_constant (stack_pointer_rtx, amount));
15442 RTX_FRAME_RELATED_P (dwarf) = 1;
15443 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15446 RTX_FRAME_RELATED_P (insn) = 1;
15449 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15452 arm_expand_prologue (void)
15457 unsigned long live_regs_mask;
15458 unsigned long func_type;
15460 int saved_pretend_args = 0;
15461 int saved_regs = 0;
15462 unsigned HOST_WIDE_INT args_to_push;
15463 arm_stack_offsets *offsets;
15465 func_type = arm_current_func_type ();
15467 /* Naked functions don't have prologues. */
15468 if (IS_NAKED (func_type))
15471 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15472 args_to_push = crtl->args.pretend_args_size;
15474 /* Compute which register we will have to save onto the stack. */
15475 offsets = arm_get_frame_offsets ();
15476 live_regs_mask = offsets->saved_regs_mask;
15478 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15480 if (IS_STACKALIGN (func_type))
15485 /* Handle a word-aligned stack pointer. We generate the following:
15490 <save and restore r0 in normal prologue/epilogue>
15494 The unwinder doesn't need to know about the stack realignment.
15495 Just tell it we saved SP in r0. */
15496 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15498 r0 = gen_rtx_REG (SImode, 0);
15499 r1 = gen_rtx_REG (SImode, 1);
15500 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15501 compiler won't choke. */
15502 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15503 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15504 insn = gen_movsi (r0, stack_pointer_rtx);
15505 RTX_FRAME_RELATED_P (insn) = 1;
15506 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15508 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15509 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15512 /* For APCS frames, if IP register is clobbered
15513 when creating frame, save that register in a special
15515 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15517 if (IS_INTERRUPT (func_type))
15519 /* Interrupt functions must not corrupt any registers.
15520 Creating a frame pointer however, corrupts the IP
15521 register, so we must push it first. */
15522 insn = emit_multi_reg_push (1 << IP_REGNUM);
15524 /* Do not set RTX_FRAME_RELATED_P on this insn.
15525 The dwarf stack unwinding code only wants to see one
15526 stack decrement per function, and this is not it. If
15527 this instruction is labeled as being part of the frame
15528 creation sequence then dwarf2out_frame_debug_expr will
15529 die when it encounters the assignment of IP to FP
15530 later on, since the use of SP here establishes SP as
15531 the CFA register and not IP.
15533 Anyway this instruction is not really part of the stack
15534 frame creation although it is part of the prologue. */
15536 else if (IS_NESTED (func_type))
15538 /* The Static chain register is the same as the IP register
15539 used as a scratch register during stack frame creation.
15540 To get around this need to find somewhere to store IP
15541 whilst the frame is being created. We try the following
15544 1. The last argument register.
15545 2. A slot on the stack above the frame. (This only
15546 works if the function is not a varargs function).
15547 3. Register r3, after pushing the argument registers
15550 Note - we only need to tell the dwarf2 backend about the SP
15551 adjustment in the second variant; the static chain register
15552 doesn't need to be unwound, as it doesn't contain a value
15553 inherited from the caller. */
15555 if (df_regs_ever_live_p (3) == false)
15556 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15557 else if (args_to_push == 0)
15561 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15564 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15565 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15568 /* Just tell the dwarf backend that we adjusted SP. */
15569 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15570 plus_constant (stack_pointer_rtx,
15572 RTX_FRAME_RELATED_P (insn) = 1;
15573 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15577 /* Store the args on the stack. */
15578 if (cfun->machine->uses_anonymous_args)
15579 insn = emit_multi_reg_push
15580 ((0xf0 >> (args_to_push / 4)) & 0xf);
15583 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15584 GEN_INT (- args_to_push)));
15586 RTX_FRAME_RELATED_P (insn) = 1;
15588 saved_pretend_args = 1;
15589 fp_offset = args_to_push;
15592 /* Now reuse r3 to preserve IP. */
15593 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15597 insn = emit_set_insn (ip_rtx,
15598 plus_constant (stack_pointer_rtx, fp_offset));
15599 RTX_FRAME_RELATED_P (insn) = 1;
15604 /* Push the argument registers, or reserve space for them. */
15605 if (cfun->machine->uses_anonymous_args)
15606 insn = emit_multi_reg_push
15607 ((0xf0 >> (args_to_push / 4)) & 0xf);
15610 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15611 GEN_INT (- args_to_push)));
15612 RTX_FRAME_RELATED_P (insn) = 1;
15615 /* If this is an interrupt service routine, and the link register
15616 is going to be pushed, and we're not generating extra
15617 push of IP (needed when frame is needed and frame layout if apcs),
15618 subtracting four from LR now will mean that the function return
15619 can be done with a single instruction. */
15620 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15621 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15622 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15625 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15627 emit_set_insn (lr, plus_constant (lr, -4));
15630 if (live_regs_mask)
15632 saved_regs += bit_count (live_regs_mask) * 4;
15633 if (optimize_size && !frame_pointer_needed
15634 && saved_regs == offsets->saved_regs - offsets->saved_args)
15636 /* If no coprocessor registers are being pushed and we don't have
15637 to worry about a frame pointer then push extra registers to
15638 create the stack frame. This is done is a way that does not
15639 alter the frame layout, so is independent of the epilogue. */
15643 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15645 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15646 if (frame && n * 4 >= frame)
15649 live_regs_mask |= (1 << n) - 1;
15650 saved_regs += frame;
15653 insn = emit_multi_reg_push (live_regs_mask);
15654 RTX_FRAME_RELATED_P (insn) = 1;
15657 if (! IS_VOLATILE (func_type))
15658 saved_regs += arm_save_coproc_regs ();
15660 if (frame_pointer_needed && TARGET_ARM)
15662 /* Create the new frame pointer. */
15663 if (TARGET_APCS_FRAME)
15665 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15666 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15667 RTX_FRAME_RELATED_P (insn) = 1;
15669 if (IS_NESTED (func_type))
15671 /* Recover the static chain register. */
15672 if (!df_regs_ever_live_p (3)
15673 || saved_pretend_args)
15674 insn = gen_rtx_REG (SImode, 3);
15675 else /* if (crtl->args.pretend_args_size == 0) */
15677 insn = plus_constant (hard_frame_pointer_rtx, 4);
15678 insn = gen_frame_mem (SImode, insn);
15680 emit_set_insn (ip_rtx, insn);
15681 /* Add a USE to stop propagate_one_insn() from barfing. */
15682 emit_insn (gen_prologue_use (ip_rtx));
15687 insn = GEN_INT (saved_regs - 4);
15688 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15689 stack_pointer_rtx, insn));
15690 RTX_FRAME_RELATED_P (insn) = 1;
15694 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15696 /* This add can produce multiple insns for a large constant, so we
15697 need to get tricky. */
15698 rtx last = get_last_insn ();
15700 amount = GEN_INT (offsets->saved_args + saved_regs
15701 - offsets->outgoing_args);
15703 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15707 last = last ? NEXT_INSN (last) : get_insns ();
15708 RTX_FRAME_RELATED_P (last) = 1;
15710 while (last != insn);
15712 /* If the frame pointer is needed, emit a special barrier that
15713 will prevent the scheduler from moving stores to the frame
15714 before the stack adjustment. */
15715 if (frame_pointer_needed)
15716 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15717 hard_frame_pointer_rtx));
15721 if (frame_pointer_needed && TARGET_THUMB2)
15722 thumb_set_frame_pointer (offsets);
15724 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15726 unsigned long mask;
15728 mask = live_regs_mask;
15729 mask &= THUMB2_WORK_REGS;
15730 if (!IS_NESTED (func_type))
15731 mask |= (1 << IP_REGNUM);
15732 arm_load_pic_register (mask);
15735 /* If we are profiling, make sure no instructions are scheduled before
15736 the call to mcount. Similarly if the user has requested no
15737 scheduling in the prolog. Similarly if we want non-call exceptions
15738 using the EABI unwinder, to prevent faulting instructions from being
15739 swapped with a stack adjustment. */
15740 if (crtl->profile || !TARGET_SCHED_PROLOG
15741 || (arm_except_unwind_info () == UI_TARGET
15742 && cfun->can_throw_non_call_exceptions))
15743 emit_insn (gen_blockage ());
15745 /* If the link register is being kept alive, with the return address in it,
15746 then make sure that it does not get reused by the ce2 pass. */
15747 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15748 cfun->machine->lr_save_eliminated = 1;
15751 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15753 arm_print_condition (FILE *stream)
15755 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15757 /* Branch conversion is not implemented for Thumb-2. */
15760 output_operand_lossage ("predicated Thumb instruction");
15763 if (current_insn_predicate != NULL)
15765 output_operand_lossage
15766 ("predicated instruction in conditional sequence");
15770 fputs (arm_condition_codes[arm_current_cc], stream);
15772 else if (current_insn_predicate)
15774 enum arm_cond_code code;
15778 output_operand_lossage ("predicated Thumb instruction");
15782 code = get_arm_condition_code (current_insn_predicate);
15783 fputs (arm_condition_codes[code], stream);
15788 /* If CODE is 'd', then the X is a condition operand and the instruction
15789 should only be executed if the condition is true.
15790 if CODE is 'D', then the X is a condition operand and the instruction
15791 should only be executed if the condition is false: however, if the mode
15792 of the comparison is CCFPEmode, then always execute the instruction -- we
15793 do this because in these circumstances !GE does not necessarily imply LT;
15794 in these cases the instruction pattern will take care to make sure that
15795 an instruction containing %d will follow, thereby undoing the effects of
15796 doing this instruction unconditionally.
15797 If CODE is 'N' then X is a floating point operand that must be negated
15799 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15800 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15802 arm_print_operand (FILE *stream, rtx x, int code)
15807 fputs (ASM_COMMENT_START, stream);
15811 fputs (user_label_prefix, stream);
15815 fputs (REGISTER_PREFIX, stream);
15819 arm_print_condition (stream);
15823 /* Nothing in unified syntax, otherwise the current condition code. */
15824 if (!TARGET_UNIFIED_ASM)
15825 arm_print_condition (stream);
15829 /* The current condition code in unified syntax, otherwise nothing. */
15830 if (TARGET_UNIFIED_ASM)
15831 arm_print_condition (stream);
15835 /* The current condition code for a condition code setting instruction.
15836 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15837 if (TARGET_UNIFIED_ASM)
15839 fputc('s', stream);
15840 arm_print_condition (stream);
15844 arm_print_condition (stream);
15845 fputc('s', stream);
15850 /* If the instruction is conditionally executed then print
15851 the current condition code, otherwise print 's'. */
15852 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15853 if (current_insn_predicate)
15854 arm_print_condition (stream);
15856 fputc('s', stream);
15859 /* %# is a "break" sequence. It doesn't output anything, but is used to
15860 separate e.g. operand numbers from following text, if that text consists
15861 of further digits which we don't want to be part of the operand
15869 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15870 r = real_value_negate (&r);
15871 fprintf (stream, "%s", fp_const_from_val (&r));
15875 /* An integer or symbol address without a preceding # sign. */
15877 switch (GET_CODE (x))
15880 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15884 output_addr_const (stream, x);
15888 gcc_unreachable ();
15893 if (GET_CODE (x) == CONST_INT)
15896 val = ARM_SIGN_EXTEND (~INTVAL (x));
15897 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15901 putc ('~', stream);
15902 output_addr_const (stream, x);
15907 /* The low 16 bits of an immediate constant. */
15908 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15912 fprintf (stream, "%s", arithmetic_instr (x, 1));
15915 /* Truncate Cirrus shift counts. */
15917 if (GET_CODE (x) == CONST_INT)
15919 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15922 arm_print_operand (stream, x, 0);
15926 fprintf (stream, "%s", arithmetic_instr (x, 0));
15934 if (!shift_operator (x, SImode))
15936 output_operand_lossage ("invalid shift operand");
15940 shift = shift_op (x, &val);
15944 fprintf (stream, ", %s ", shift);
15946 arm_print_operand (stream, XEXP (x, 1), 0);
15948 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15953 /* An explanation of the 'Q', 'R' and 'H' register operands:
15955 In a pair of registers containing a DI or DF value the 'Q'
15956 operand returns the register number of the register containing
15957 the least significant part of the value. The 'R' operand returns
15958 the register number of the register containing the most
15959 significant part of the value.
15961 The 'H' operand returns the higher of the two register numbers.
15962 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15963 same as the 'Q' operand, since the most significant part of the
15964 value is held in the lower number register. The reverse is true
15965 on systems where WORDS_BIG_ENDIAN is false.
15967 The purpose of these operands is to distinguish between cases
15968 where the endian-ness of the values is important (for example
15969 when they are added together), and cases where the endian-ness
15970 is irrelevant, but the order of register operations is important.
15971 For example when loading a value from memory into a register
15972 pair, the endian-ness does not matter. Provided that the value
15973 from the lower memory address is put into the lower numbered
15974 register, and the value from the higher address is put into the
15975 higher numbered register, the load will work regardless of whether
15976 the value being loaded is big-wordian or little-wordian. The
15977 order of the two register loads can matter however, if the address
15978 of the memory location is actually held in one of the registers
15979 being overwritten by the load.
15981 The 'Q' and 'R' constraints are also available for 64-bit
15984 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15986 rtx part = gen_lowpart (SImode, x);
15987 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15991 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15993 output_operand_lossage ("invalid operand for code '%c'", code);
15997 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16001 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16003 enum machine_mode mode = GET_MODE (x);
16006 if (mode == VOIDmode)
16008 part = gen_highpart_mode (SImode, mode, x);
16009 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16013 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16015 output_operand_lossage ("invalid operand for code '%c'", code);
16019 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16023 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16025 output_operand_lossage ("invalid operand for code '%c'", code);
16029 asm_fprintf (stream, "%r", REGNO (x) + 1);
16033 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16035 output_operand_lossage ("invalid operand for code '%c'", code);
16039 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16043 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16045 output_operand_lossage ("invalid operand for code '%c'", code);
16049 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16053 asm_fprintf (stream, "%r",
16054 GET_CODE (XEXP (x, 0)) == REG
16055 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16059 asm_fprintf (stream, "{%r-%r}",
16061 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16064 /* Like 'M', but writing doubleword vector registers, for use by Neon
16068 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16069 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16071 asm_fprintf (stream, "{d%d}", regno);
16073 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16078 /* CONST_TRUE_RTX means always -- that's the default. */
16079 if (x == const_true_rtx)
16082 if (!COMPARISON_P (x))
16084 output_operand_lossage ("invalid operand for code '%c'", code);
16088 fputs (arm_condition_codes[get_arm_condition_code (x)],
16093 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16094 want to do that. */
16095 if (x == const_true_rtx)
16097 output_operand_lossage ("instruction never executed");
16100 if (!COMPARISON_P (x))
16102 output_operand_lossage ("invalid operand for code '%c'", code);
16106 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16107 (get_arm_condition_code (x))],
16111 /* Cirrus registers can be accessed in a variety of ways:
16112 single floating point (f)
16113 double floating point (d)
16115 64bit integer (dx). */
16116 case 'W': /* Cirrus register in F mode. */
16117 case 'X': /* Cirrus register in D mode. */
16118 case 'Y': /* Cirrus register in FX mode. */
16119 case 'Z': /* Cirrus register in DX mode. */
16120 gcc_assert (GET_CODE (x) == REG
16121 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16123 fprintf (stream, "mv%s%s",
16125 : code == 'X' ? "d"
16126 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16130 /* Print cirrus register in the mode specified by the register's mode. */
16133 int mode = GET_MODE (x);
16135 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16137 output_operand_lossage ("invalid operand for code '%c'", code);
16141 fprintf (stream, "mv%s%s",
16142 mode == DFmode ? "d"
16143 : mode == SImode ? "fx"
16144 : mode == DImode ? "dx"
16145 : "f", reg_names[REGNO (x)] + 2);
16151 if (GET_CODE (x) != REG
16152 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16153 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16154 /* Bad value for wCG register number. */
16156 output_operand_lossage ("invalid operand for code '%c'", code);
16161 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16164 /* Print an iWMMXt control register name. */
16166 if (GET_CODE (x) != CONST_INT
16168 || INTVAL (x) >= 16)
16169 /* Bad value for wC register number. */
16171 output_operand_lossage ("invalid operand for code '%c'", code);
16177 static const char * wc_reg_names [16] =
16179 "wCID", "wCon", "wCSSF", "wCASF",
16180 "wC4", "wC5", "wC6", "wC7",
16181 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16182 "wC12", "wC13", "wC14", "wC15"
16185 fprintf (stream, wc_reg_names [INTVAL (x)]);
16189 /* Print the high single-precision register of a VFP double-precision
16193 int mode = GET_MODE (x);
16196 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16198 output_operand_lossage ("invalid operand for code '%c'", code);
16203 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16205 output_operand_lossage ("invalid operand for code '%c'", code);
16209 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16213 /* Print a VFP/Neon double precision or quad precision register name. */
16217 int mode = GET_MODE (x);
16218 int is_quad = (code == 'q');
16221 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16223 output_operand_lossage ("invalid operand for code '%c'", code);
16227 if (GET_CODE (x) != REG
16228 || !IS_VFP_REGNUM (REGNO (x)))
16230 output_operand_lossage ("invalid operand for code '%c'", code);
16235 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16236 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16238 output_operand_lossage ("invalid operand for code '%c'", code);
16242 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16243 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16247 /* These two codes print the low/high doubleword register of a Neon quad
16248 register, respectively. For pair-structure types, can also print
16249 low/high quadword registers. */
16253 int mode = GET_MODE (x);
16256 if ((GET_MODE_SIZE (mode) != 16
16257 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16259 output_operand_lossage ("invalid operand for code '%c'", code);
16264 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16266 output_operand_lossage ("invalid operand for code '%c'", code);
16270 if (GET_MODE_SIZE (mode) == 16)
16271 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16272 + (code == 'f' ? 1 : 0));
16274 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16275 + (code == 'f' ? 1 : 0));
16279 /* Print a VFPv3 floating-point constant, represented as an integer
16283 int index = vfp3_const_double_index (x);
16284 gcc_assert (index != -1);
16285 fprintf (stream, "%d", index);
16289 /* Print bits representing opcode features for Neon.
16291 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16292 and polynomials as unsigned.
16294 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16296 Bit 2 is 1 for rounding functions, 0 otherwise. */
16298 /* Identify the type as 's', 'u', 'p' or 'f'. */
16301 HOST_WIDE_INT bits = INTVAL (x);
16302 fputc ("uspf"[bits & 3], stream);
16306 /* Likewise, but signed and unsigned integers are both 'i'. */
16309 HOST_WIDE_INT bits = INTVAL (x);
16310 fputc ("iipf"[bits & 3], stream);
16314 /* As for 'T', but emit 'u' instead of 'p'. */
16317 HOST_WIDE_INT bits = INTVAL (x);
16318 fputc ("usuf"[bits & 3], stream);
16322 /* Bit 2: rounding (vs none). */
16325 HOST_WIDE_INT bits = INTVAL (x);
16326 fputs ((bits & 4) != 0 ? "r" : "", stream);
16330 /* Memory operand for vld1/vst1 instruction. */
16334 bool postinc = FALSE;
16335 unsigned align, modesize, align_bits;
16337 gcc_assert (GET_CODE (x) == MEM);
16338 addr = XEXP (x, 0);
16339 if (GET_CODE (addr) == POST_INC)
16342 addr = XEXP (addr, 0);
16344 asm_fprintf (stream, "[%r", REGNO (addr));
16346 /* We know the alignment of this access, so we can emit a hint in the
16347 instruction (for some alignments) as an aid to the memory subsystem
16349 align = MEM_ALIGN (x) >> 3;
16350 modesize = GET_MODE_SIZE (GET_MODE (x));
16352 /* Only certain alignment specifiers are supported by the hardware. */
16353 if (modesize == 16 && (align % 32) == 0)
16355 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16357 else if ((align % 8) == 0)
16362 if (align_bits != 0)
16363 asm_fprintf (stream, ":%d", align_bits);
16365 asm_fprintf (stream, "]");
16368 fputs("!", stream);
16376 gcc_assert (GET_CODE (x) == MEM);
16377 addr = XEXP (x, 0);
16378 gcc_assert (GET_CODE (addr) == REG);
16379 asm_fprintf (stream, "[%r]", REGNO (addr));
16383 /* Translate an S register number into a D register number and element index. */
16386 int mode = GET_MODE (x);
16389 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16391 output_operand_lossage ("invalid operand for code '%c'", code);
16396 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16398 output_operand_lossage ("invalid operand for code '%c'", code);
16402 regno = regno - FIRST_VFP_REGNUM;
16403 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16407 /* Register specifier for vld1.16/vst1.16. Translate the S register
16408 number into a D register number and element index. */
16411 int mode = GET_MODE (x);
16414 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16416 output_operand_lossage ("invalid operand for code '%c'", code);
16421 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16423 output_operand_lossage ("invalid operand for code '%c'", code);
16427 regno = regno - FIRST_VFP_REGNUM;
16428 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16435 output_operand_lossage ("missing operand");
16439 switch (GET_CODE (x))
16442 asm_fprintf (stream, "%r", REGNO (x));
16446 output_memory_reference_mode = GET_MODE (x);
16447 output_address (XEXP (x, 0));
16454 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16455 sizeof (fpstr), 0, 1);
16456 fprintf (stream, "#%s", fpstr);
16459 fprintf (stream, "#%s", fp_immediate_constant (x));
16463 gcc_assert (GET_CODE (x) != NEG);
16464 fputc ('#', stream);
16465 if (GET_CODE (x) == HIGH)
16467 fputs (":lower16:", stream);
16471 output_addr_const (stream, x);
16477 /* Target hook for printing a memory address. */
16479 arm_print_operand_address (FILE *stream, rtx x)
16483 int is_minus = GET_CODE (x) == MINUS;
16485 if (GET_CODE (x) == REG)
16486 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16487 else if (GET_CODE (x) == PLUS || is_minus)
16489 rtx base = XEXP (x, 0);
16490 rtx index = XEXP (x, 1);
16491 HOST_WIDE_INT offset = 0;
16492 if (GET_CODE (base) != REG
16493 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16495 /* Ensure that BASE is a register. */
16496 /* (one of them must be). */
16497 /* Also ensure the SP is not used as in index register. */
16502 switch (GET_CODE (index))
16505 offset = INTVAL (index);
16508 asm_fprintf (stream, "[%r, #%wd]",
16509 REGNO (base), offset);
16513 asm_fprintf (stream, "[%r, %s%r]",
16514 REGNO (base), is_minus ? "-" : "",
16524 asm_fprintf (stream, "[%r, %s%r",
16525 REGNO (base), is_minus ? "-" : "",
16526 REGNO (XEXP (index, 0)));
16527 arm_print_operand (stream, index, 'S');
16528 fputs ("]", stream);
16533 gcc_unreachable ();
16536 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16537 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16539 extern enum machine_mode output_memory_reference_mode;
16541 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16543 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16544 asm_fprintf (stream, "[%r, #%s%d]!",
16545 REGNO (XEXP (x, 0)),
16546 GET_CODE (x) == PRE_DEC ? "-" : "",
16547 GET_MODE_SIZE (output_memory_reference_mode));
16549 asm_fprintf (stream, "[%r], #%s%d",
16550 REGNO (XEXP (x, 0)),
16551 GET_CODE (x) == POST_DEC ? "-" : "",
16552 GET_MODE_SIZE (output_memory_reference_mode));
16554 else if (GET_CODE (x) == PRE_MODIFY)
16556 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16557 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16558 asm_fprintf (stream, "#%wd]!",
16559 INTVAL (XEXP (XEXP (x, 1), 1)));
16561 asm_fprintf (stream, "%r]!",
16562 REGNO (XEXP (XEXP (x, 1), 1)));
16564 else if (GET_CODE (x) == POST_MODIFY)
16566 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16567 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16568 asm_fprintf (stream, "#%wd",
16569 INTVAL (XEXP (XEXP (x, 1), 1)));
16571 asm_fprintf (stream, "%r",
16572 REGNO (XEXP (XEXP (x, 1), 1)));
16574 else output_addr_const (stream, x);
16578 if (GET_CODE (x) == REG)
16579 asm_fprintf (stream, "[%r]", REGNO (x));
16580 else if (GET_CODE (x) == POST_INC)
16581 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16582 else if (GET_CODE (x) == PLUS)
16584 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16585 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16586 asm_fprintf (stream, "[%r, #%wd]",
16587 REGNO (XEXP (x, 0)),
16588 INTVAL (XEXP (x, 1)));
16590 asm_fprintf (stream, "[%r, %r]",
16591 REGNO (XEXP (x, 0)),
16592 REGNO (XEXP (x, 1)));
16595 output_addr_const (stream, x);
16599 /* Target hook for indicating whether a punctuation character for
16600 TARGET_PRINT_OPERAND is valid. */
16602 arm_print_operand_punct_valid_p (unsigned char code)
16604 return (code == '@' || code == '|' || code == '.'
16605 || code == '(' || code == ')' || code == '#'
16606 || (TARGET_32BIT && (code == '?'))
16607 || (TARGET_THUMB2 && (code == '!'))
16608 || (TARGET_THUMB && (code == '_')));
16611 /* Target hook for assembling integer objects. The ARM version needs to
16612 handle word-sized values specially. */
16614 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16616 enum machine_mode mode;
16618 if (size == UNITS_PER_WORD && aligned_p)
16620 fputs ("\t.word\t", asm_out_file);
16621 output_addr_const (asm_out_file, x);
16623 /* Mark symbols as position independent. We only do this in the
16624 .text segment, not in the .data segment. */
16625 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16626 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16628 /* See legitimize_pic_address for an explanation of the
16629 TARGET_VXWORKS_RTP check. */
16630 if (TARGET_VXWORKS_RTP
16631 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16632 fputs ("(GOT)", asm_out_file);
16634 fputs ("(GOTOFF)", asm_out_file);
16636 fputc ('\n', asm_out_file);
16640 mode = GET_MODE (x);
16642 if (arm_vector_mode_supported_p (mode))
16646 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16648 units = CONST_VECTOR_NUNITS (x);
16649 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16651 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16652 for (i = 0; i < units; i++)
16654 rtx elt = CONST_VECTOR_ELT (x, i);
16656 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16659 for (i = 0; i < units; i++)
16661 rtx elt = CONST_VECTOR_ELT (x, i);
16662 REAL_VALUE_TYPE rval;
16664 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16667 (rval, GET_MODE_INNER (mode),
16668 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16674 return default_assemble_integer (x, size, aligned_p);
16678 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16682 if (!TARGET_AAPCS_BASED)
16685 default_named_section_asm_out_constructor
16686 : default_named_section_asm_out_destructor) (symbol, priority);
16690 /* Put these in the .init_array section, using a special relocation. */
16691 if (priority != DEFAULT_INIT_PRIORITY)
16694 sprintf (buf, "%s.%.5u",
16695 is_ctor ? ".init_array" : ".fini_array",
16697 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16704 switch_to_section (s);
16705 assemble_align (POINTER_SIZE);
16706 fputs ("\t.word\t", asm_out_file);
16707 output_addr_const (asm_out_file, symbol);
16708 fputs ("(target1)\n", asm_out_file);
16711 /* Add a function to the list of static constructors. */
16714 arm_elf_asm_constructor (rtx symbol, int priority)
16716 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16719 /* Add a function to the list of static destructors. */
16722 arm_elf_asm_destructor (rtx symbol, int priority)
16724 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16727 /* A finite state machine takes care of noticing whether or not instructions
16728 can be conditionally executed, and thus decrease execution time and code
16729 size by deleting branch instructions. The fsm is controlled by
16730 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16732 /* The state of the fsm controlling condition codes are:
16733 0: normal, do nothing special
16734 1: make ASM_OUTPUT_OPCODE not output this instruction
16735 2: make ASM_OUTPUT_OPCODE not output this instruction
16736 3: make instructions conditional
16737 4: make instructions conditional
16739 State transitions (state->state by whom under condition):
16740 0 -> 1 final_prescan_insn if the `target' is a label
16741 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16742 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16743 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16744 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16745 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16746 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16747 (the target insn is arm_target_insn).
16749 If the jump clobbers the conditions then we use states 2 and 4.
16751 A similar thing can be done with conditional return insns.
16753 XXX In case the `target' is an unconditional branch, this conditionalising
16754 of the instructions always reduces code size, but not always execution
16755 time. But then, I want to reduce the code size to somewhere near what
16756 /bin/cc produces. */
16758 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16759 instructions. When a COND_EXEC instruction is seen the subsequent
16760 instructions are scanned so that multiple conditional instructions can be
16761 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16762 specify the length and true/false mask for the IT block. These will be
16763 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16765 /* Returns the index of the ARM condition code string in
16766 `arm_condition_codes'. COMPARISON should be an rtx like
16767 `(eq (...) (...))'. */
16768 static enum arm_cond_code
16769 get_arm_condition_code (rtx comparison)
16771 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16772 enum arm_cond_code code;
16773 enum rtx_code comp_code = GET_CODE (comparison);
16775 if (GET_MODE_CLASS (mode) != MODE_CC)
16776 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16777 XEXP (comparison, 1));
16781 case CC_DNEmode: code = ARM_NE; goto dominance;
16782 case CC_DEQmode: code = ARM_EQ; goto dominance;
16783 case CC_DGEmode: code = ARM_GE; goto dominance;
16784 case CC_DGTmode: code = ARM_GT; goto dominance;
16785 case CC_DLEmode: code = ARM_LE; goto dominance;
16786 case CC_DLTmode: code = ARM_LT; goto dominance;
16787 case CC_DGEUmode: code = ARM_CS; goto dominance;
16788 case CC_DGTUmode: code = ARM_HI; goto dominance;
16789 case CC_DLEUmode: code = ARM_LS; goto dominance;
16790 case CC_DLTUmode: code = ARM_CC;
16793 gcc_assert (comp_code == EQ || comp_code == NE);
16795 if (comp_code == EQ)
16796 return ARM_INVERSE_CONDITION_CODE (code);
16802 case NE: return ARM_NE;
16803 case EQ: return ARM_EQ;
16804 case GE: return ARM_PL;
16805 case LT: return ARM_MI;
16806 default: gcc_unreachable ();
16812 case NE: return ARM_NE;
16813 case EQ: return ARM_EQ;
16814 default: gcc_unreachable ();
16820 case NE: return ARM_MI;
16821 case EQ: return ARM_PL;
16822 default: gcc_unreachable ();
16827 /* These encodings assume that AC=1 in the FPA system control
16828 byte. This allows us to handle all cases except UNEQ and
16832 case GE: return ARM_GE;
16833 case GT: return ARM_GT;
16834 case LE: return ARM_LS;
16835 case LT: return ARM_MI;
16836 case NE: return ARM_NE;
16837 case EQ: return ARM_EQ;
16838 case ORDERED: return ARM_VC;
16839 case UNORDERED: return ARM_VS;
16840 case UNLT: return ARM_LT;
16841 case UNLE: return ARM_LE;
16842 case UNGT: return ARM_HI;
16843 case UNGE: return ARM_PL;
16844 /* UNEQ and LTGT do not have a representation. */
16845 case UNEQ: /* Fall through. */
16846 case LTGT: /* Fall through. */
16847 default: gcc_unreachable ();
16853 case NE: return ARM_NE;
16854 case EQ: return ARM_EQ;
16855 case GE: return ARM_LE;
16856 case GT: return ARM_LT;
16857 case LE: return ARM_GE;
16858 case LT: return ARM_GT;
16859 case GEU: return ARM_LS;
16860 case GTU: return ARM_CC;
16861 case LEU: return ARM_CS;
16862 case LTU: return ARM_HI;
16863 default: gcc_unreachable ();
16869 case LTU: return ARM_CS;
16870 case GEU: return ARM_CC;
16871 default: gcc_unreachable ();
16877 case NE: return ARM_NE;
16878 case EQ: return ARM_EQ;
16879 case GEU: return ARM_CS;
16880 case GTU: return ARM_HI;
16881 case LEU: return ARM_LS;
16882 case LTU: return ARM_CC;
16883 default: gcc_unreachable ();
16889 case GE: return ARM_GE;
16890 case LT: return ARM_LT;
16891 case GEU: return ARM_CS;
16892 case LTU: return ARM_CC;
16893 default: gcc_unreachable ();
16899 case NE: return ARM_NE;
16900 case EQ: return ARM_EQ;
16901 case GE: return ARM_GE;
16902 case GT: return ARM_GT;
16903 case LE: return ARM_LE;
16904 case LT: return ARM_LT;
16905 case GEU: return ARM_CS;
16906 case GTU: return ARM_HI;
16907 case LEU: return ARM_LS;
16908 case LTU: return ARM_CC;
16909 default: gcc_unreachable ();
16912 default: gcc_unreachable ();
16916 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16919 thumb2_final_prescan_insn (rtx insn)
16921 rtx first_insn = insn;
16922 rtx body = PATTERN (insn);
16924 enum arm_cond_code code;
16928 /* Remove the previous insn from the count of insns to be output. */
16929 if (arm_condexec_count)
16930 arm_condexec_count--;
16932 /* Nothing to do if we are already inside a conditional block. */
16933 if (arm_condexec_count)
16936 if (GET_CODE (body) != COND_EXEC)
16939 /* Conditional jumps are implemented directly. */
16940 if (GET_CODE (insn) == JUMP_INSN)
16943 predicate = COND_EXEC_TEST (body);
16944 arm_current_cc = get_arm_condition_code (predicate);
16946 n = get_attr_ce_count (insn);
16947 arm_condexec_count = 1;
16948 arm_condexec_mask = (1 << n) - 1;
16949 arm_condexec_masklen = n;
16950 /* See if subsequent instructions can be combined into the same block. */
16953 insn = next_nonnote_insn (insn);
16955 /* Jumping into the middle of an IT block is illegal, so a label or
16956 barrier terminates the block. */
16957 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16960 body = PATTERN (insn);
16961 /* USE and CLOBBER aren't really insns, so just skip them. */
16962 if (GET_CODE (body) == USE
16963 || GET_CODE (body) == CLOBBER)
16966 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16967 if (GET_CODE (body) != COND_EXEC)
16969 /* Allow up to 4 conditionally executed instructions in a block. */
16970 n = get_attr_ce_count (insn);
16971 if (arm_condexec_masklen + n > 4)
16974 predicate = COND_EXEC_TEST (body);
16975 code = get_arm_condition_code (predicate);
16976 mask = (1 << n) - 1;
16977 if (arm_current_cc == code)
16978 arm_condexec_mask |= (mask << arm_condexec_masklen);
16979 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16982 arm_condexec_count++;
16983 arm_condexec_masklen += n;
16985 /* A jump must be the last instruction in a conditional block. */
16986 if (GET_CODE(insn) == JUMP_INSN)
16989 /* Restore recog_data (getting the attributes of other insns can
16990 destroy this array, but final.c assumes that it remains intact
16991 across this call). */
16992 extract_constrain_insn_cached (first_insn);
16996 arm_final_prescan_insn (rtx insn)
16998 /* BODY will hold the body of INSN. */
16999 rtx body = PATTERN (insn);
17001 /* This will be 1 if trying to repeat the trick, and things need to be
17002 reversed if it appears to fail. */
17005 /* If we start with a return insn, we only succeed if we find another one. */
17006 int seeking_return = 0;
17008 /* START_INSN will hold the insn from where we start looking. This is the
17009 first insn after the following code_label if REVERSE is true. */
17010 rtx start_insn = insn;
17012 /* If in state 4, check if the target branch is reached, in order to
17013 change back to state 0. */
17014 if (arm_ccfsm_state == 4)
17016 if (insn == arm_target_insn)
17018 arm_target_insn = NULL;
17019 arm_ccfsm_state = 0;
17024 /* If in state 3, it is possible to repeat the trick, if this insn is an
17025 unconditional branch to a label, and immediately following this branch
17026 is the previous target label which is only used once, and the label this
17027 branch jumps to is not too far off. */
17028 if (arm_ccfsm_state == 3)
17030 if (simplejump_p (insn))
17032 start_insn = next_nonnote_insn (start_insn);
17033 if (GET_CODE (start_insn) == BARRIER)
17035 /* XXX Isn't this always a barrier? */
17036 start_insn = next_nonnote_insn (start_insn);
17038 if (GET_CODE (start_insn) == CODE_LABEL
17039 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17040 && LABEL_NUSES (start_insn) == 1)
17045 else if (GET_CODE (body) == RETURN)
17047 start_insn = next_nonnote_insn (start_insn);
17048 if (GET_CODE (start_insn) == BARRIER)
17049 start_insn = next_nonnote_insn (start_insn);
17050 if (GET_CODE (start_insn) == CODE_LABEL
17051 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17052 && LABEL_NUSES (start_insn) == 1)
17055 seeking_return = 1;
17064 gcc_assert (!arm_ccfsm_state || reverse);
17065 if (GET_CODE (insn) != JUMP_INSN)
17068 /* This jump might be paralleled with a clobber of the condition codes
17069 the jump should always come first */
17070 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17071 body = XVECEXP (body, 0, 0);
17074 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17075 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17078 int fail = FALSE, succeed = FALSE;
17079 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17080 int then_not_else = TRUE;
17081 rtx this_insn = start_insn, label = 0;
17083 /* Register the insn jumped to. */
17086 if (!seeking_return)
17087 label = XEXP (SET_SRC (body), 0);
17089 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17090 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17091 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17093 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17094 then_not_else = FALSE;
17096 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17097 seeking_return = 1;
17098 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17100 seeking_return = 1;
17101 then_not_else = FALSE;
17104 gcc_unreachable ();
17106 /* See how many insns this branch skips, and what kind of insns. If all
17107 insns are okay, and the label or unconditional branch to the same
17108 label is not too far away, succeed. */
17109 for (insns_skipped = 0;
17110 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17114 this_insn = next_nonnote_insn (this_insn);
17118 switch (GET_CODE (this_insn))
17121 /* Succeed if it is the target label, otherwise fail since
17122 control falls in from somewhere else. */
17123 if (this_insn == label)
17125 arm_ccfsm_state = 1;
17133 /* Succeed if the following insn is the target label.
17135 If return insns are used then the last insn in a function
17136 will be a barrier. */
17137 this_insn = next_nonnote_insn (this_insn);
17138 if (this_insn && this_insn == label)
17140 arm_ccfsm_state = 1;
17148 /* The AAPCS says that conditional calls should not be
17149 used since they make interworking inefficient (the
17150 linker can't transform BL<cond> into BLX). That's
17151 only a problem if the machine has BLX. */
17158 /* Succeed if the following insn is the target label, or
17159 if the following two insns are a barrier and the
17161 this_insn = next_nonnote_insn (this_insn);
17162 if (this_insn && GET_CODE (this_insn) == BARRIER)
17163 this_insn = next_nonnote_insn (this_insn);
17165 if (this_insn && this_insn == label
17166 && insns_skipped < max_insns_skipped)
17168 arm_ccfsm_state = 1;
17176 /* If this is an unconditional branch to the same label, succeed.
17177 If it is to another label, do nothing. If it is conditional,
17179 /* XXX Probably, the tests for SET and the PC are
17182 scanbody = PATTERN (this_insn);
17183 if (GET_CODE (scanbody) == SET
17184 && GET_CODE (SET_DEST (scanbody)) == PC)
17186 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17187 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17189 arm_ccfsm_state = 2;
17192 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17195 /* Fail if a conditional return is undesirable (e.g. on a
17196 StrongARM), but still allow this if optimizing for size. */
17197 else if (GET_CODE (scanbody) == RETURN
17198 && !use_return_insn (TRUE, NULL)
17201 else if (GET_CODE (scanbody) == RETURN
17204 arm_ccfsm_state = 2;
17207 else if (GET_CODE (scanbody) == PARALLEL)
17209 switch (get_attr_conds (this_insn))
17219 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17224 /* Instructions using or affecting the condition codes make it
17226 scanbody = PATTERN (this_insn);
17227 if (!(GET_CODE (scanbody) == SET
17228 || GET_CODE (scanbody) == PARALLEL)
17229 || get_attr_conds (this_insn) != CONDS_NOCOND)
17232 /* A conditional cirrus instruction must be followed by
17233 a non Cirrus instruction. However, since we
17234 conditionalize instructions in this function and by
17235 the time we get here we can't add instructions
17236 (nops), because shorten_branches() has already been
17237 called, we will disable conditionalizing Cirrus
17238 instructions to be safe. */
17239 if (GET_CODE (scanbody) != USE
17240 && GET_CODE (scanbody) != CLOBBER
17241 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17251 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17252 arm_target_label = CODE_LABEL_NUMBER (label);
17255 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17257 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17259 this_insn = next_nonnote_insn (this_insn);
17260 gcc_assert (!this_insn
17261 || (GET_CODE (this_insn) != BARRIER
17262 && GET_CODE (this_insn) != CODE_LABEL));
17266 /* Oh, dear! we ran off the end.. give up. */
17267 extract_constrain_insn_cached (insn);
17268 arm_ccfsm_state = 0;
17269 arm_target_insn = NULL;
17272 arm_target_insn = this_insn;
17275 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17278 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17280 if (reverse || then_not_else)
17281 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17284 /* Restore recog_data (getting the attributes of other insns can
17285 destroy this array, but final.c assumes that it remains intact
17286 across this call. */
17287 extract_constrain_insn_cached (insn);
17291 /* Output IT instructions. */
17293 thumb2_asm_output_opcode (FILE * stream)
17298 if (arm_condexec_mask)
17300 for (n = 0; n < arm_condexec_masklen; n++)
17301 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17303 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17304 arm_condition_codes[arm_current_cc]);
17305 arm_condexec_mask = 0;
17309 /* Returns true if REGNO is a valid register
17310 for holding a quantity of type MODE. */
17312 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17314 if (GET_MODE_CLASS (mode) == MODE_CC)
17315 return (regno == CC_REGNUM
17316 || (TARGET_HARD_FLOAT && TARGET_VFP
17317 && regno == VFPCC_REGNUM));
17320 /* For the Thumb we only allow values bigger than SImode in
17321 registers 0 - 6, so that there is always a second low
17322 register available to hold the upper part of the value.
17323 We probably we ought to ensure that the register is the
17324 start of an even numbered register pair. */
17325 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17327 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17328 && IS_CIRRUS_REGNUM (regno))
17329 /* We have outlawed SI values in Cirrus registers because they
17330 reside in the lower 32 bits, but SF values reside in the
17331 upper 32 bits. This causes gcc all sorts of grief. We can't
17332 even split the registers into pairs because Cirrus SI values
17333 get sign extended to 64bits-- aldyh. */
17334 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17336 if (TARGET_HARD_FLOAT && TARGET_VFP
17337 && IS_VFP_REGNUM (regno))
17339 if (mode == SFmode || mode == SImode)
17340 return VFP_REGNO_OK_FOR_SINGLE (regno);
17342 if (mode == DFmode)
17343 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17345 /* VFP registers can hold HFmode values, but there is no point in
17346 putting them there unless we have hardware conversion insns. */
17347 if (mode == HFmode)
17348 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17351 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17352 || (VALID_NEON_QREG_MODE (mode)
17353 && NEON_REGNO_OK_FOR_QUAD (regno))
17354 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17355 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17356 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17357 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17358 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17363 if (TARGET_REALLY_IWMMXT)
17365 if (IS_IWMMXT_GR_REGNUM (regno))
17366 return mode == SImode;
17368 if (IS_IWMMXT_REGNUM (regno))
17369 return VALID_IWMMXT_REG_MODE (mode);
17372 /* We allow almost any value to be stored in the general registers.
17373 Restrict doubleword quantities to even register pairs so that we can
17374 use ldrd. Do not allow very large Neon structure opaque modes in
17375 general registers; they would use too many. */
17376 if (regno <= LAST_ARM_REGNUM)
17377 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17378 && ARM_NUM_REGS (mode) <= 4;
17380 if (regno == FRAME_POINTER_REGNUM
17381 || regno == ARG_POINTER_REGNUM)
17382 /* We only allow integers in the fake hard registers. */
17383 return GET_MODE_CLASS (mode) == MODE_INT;
17385 /* The only registers left are the FPA registers
17386 which we only allow to hold FP values. */
17387 return (TARGET_HARD_FLOAT && TARGET_FPA
17388 && GET_MODE_CLASS (mode) == MODE_FLOAT
17389 && regno >= FIRST_FPA_REGNUM
17390 && regno <= LAST_FPA_REGNUM);
17393 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17394 not used in arm mode. */
17397 arm_regno_class (int regno)
17401 if (regno == STACK_POINTER_REGNUM)
17403 if (regno == CC_REGNUM)
17410 if (TARGET_THUMB2 && regno < 8)
17413 if ( regno <= LAST_ARM_REGNUM
17414 || regno == FRAME_POINTER_REGNUM
17415 || regno == ARG_POINTER_REGNUM)
17416 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17418 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17419 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17421 if (IS_CIRRUS_REGNUM (regno))
17422 return CIRRUS_REGS;
17424 if (IS_VFP_REGNUM (regno))
17426 if (regno <= D7_VFP_REGNUM)
17427 return VFP_D0_D7_REGS;
17428 else if (regno <= LAST_LO_VFP_REGNUM)
17429 return VFP_LO_REGS;
17431 return VFP_HI_REGS;
17434 if (IS_IWMMXT_REGNUM (regno))
17435 return IWMMXT_REGS;
17437 if (IS_IWMMXT_GR_REGNUM (regno))
17438 return IWMMXT_GR_REGS;
17443 /* Handle a special case when computing the offset
17444 of an argument from the frame pointer. */
17446 arm_debugger_arg_offset (int value, rtx addr)
17450 /* We are only interested if dbxout_parms() failed to compute the offset. */
17454 /* We can only cope with the case where the address is held in a register. */
17455 if (GET_CODE (addr) != REG)
17458 /* If we are using the frame pointer to point at the argument, then
17459 an offset of 0 is correct. */
17460 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17463 /* If we are using the stack pointer to point at the
17464 argument, then an offset of 0 is correct. */
17465 /* ??? Check this is consistent with thumb2 frame layout. */
17466 if ((TARGET_THUMB || !frame_pointer_needed)
17467 && REGNO (addr) == SP_REGNUM)
17470 /* Oh dear. The argument is pointed to by a register rather
17471 than being held in a register, or being stored at a known
17472 offset from the frame pointer. Since GDB only understands
17473 those two kinds of argument we must translate the address
17474 held in the register into an offset from the frame pointer.
17475 We do this by searching through the insns for the function
17476 looking to see where this register gets its value. If the
17477 register is initialized from the frame pointer plus an offset
17478 then we are in luck and we can continue, otherwise we give up.
17480 This code is exercised by producing debugging information
17481 for a function with arguments like this:
17483 double func (double a, double b, int c, double d) {return d;}
17485 Without this code the stab for parameter 'd' will be set to
17486 an offset of 0 from the frame pointer, rather than 8. */
17488 /* The if() statement says:
17490 If the insn is a normal instruction
17491 and if the insn is setting the value in a register
17492 and if the register being set is the register holding the address of the argument
17493 and if the address is computing by an addition
17494 that involves adding to a register
17495 which is the frame pointer
17500 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17502 if ( GET_CODE (insn) == INSN
17503 && GET_CODE (PATTERN (insn)) == SET
17504 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17505 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17506 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17507 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17508 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17511 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17520 warning (0, "unable to compute real location of stacked parameter");
17521 value = 8; /* XXX magic hack */
17527 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17530 if ((MASK) & insn_flags) \
17531 add_builtin_function ((NAME), (TYPE), (CODE), \
17532 BUILT_IN_MD, NULL, NULL_TREE); \
17536 struct builtin_description
17538 const unsigned int mask;
17539 const enum insn_code icode;
17540 const char * const name;
17541 const enum arm_builtins code;
17542 const enum rtx_code comparison;
17543 const unsigned int flag;
17546 static const struct builtin_description bdesc_2arg[] =
17548 #define IWMMXT_BUILTIN(code, string, builtin) \
17549 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17550 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17552 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17553 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17554 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17555 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17556 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17557 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17558 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17559 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17560 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17561 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17562 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17563 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17564 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17565 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17566 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17567 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17568 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17569 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17570 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17571 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17572 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17573 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17574 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17575 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17576 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17577 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17578 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17579 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17580 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17581 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17582 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17583 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17584 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17585 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17586 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17587 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17588 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17589 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17590 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17591 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17592 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17593 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17594 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17595 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17596 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17597 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17598 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17599 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17600 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17601 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17602 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17603 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17604 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17605 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17606 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17607 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17608 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17609 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17611 #define IWMMXT_BUILTIN2(code, builtin) \
17612 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17614 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17615 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17616 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17617 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17618 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17619 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17620 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17621 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17622 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17623 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17624 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17625 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17626 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17627 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17628 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17629 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17630 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17631 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17632 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17633 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17634 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17635 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17636 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17637 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17638 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17639 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17640 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17641 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17642 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17643 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17644 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17645 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17648 static const struct builtin_description bdesc_1arg[] =
17650 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17651 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17652 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17653 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17654 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17655 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17656 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17657 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17658 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17659 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17660 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17661 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17662 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17663 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17664 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17665 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17666 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17667 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17670 /* Set up all the iWMMXt builtins. This is
17671 not called if TARGET_IWMMXT is zero. */
17674 arm_init_iwmmxt_builtins (void)
17676 const struct builtin_description * d;
17678 tree endlink = void_list_node;
17680 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17681 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17682 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17685 = build_function_type (integer_type_node,
17686 tree_cons (NULL_TREE, integer_type_node, endlink));
17687 tree v8qi_ftype_v8qi_v8qi_int
17688 = build_function_type (V8QI_type_node,
17689 tree_cons (NULL_TREE, V8QI_type_node,
17690 tree_cons (NULL_TREE, V8QI_type_node,
17691 tree_cons (NULL_TREE,
17694 tree v4hi_ftype_v4hi_int
17695 = build_function_type (V4HI_type_node,
17696 tree_cons (NULL_TREE, V4HI_type_node,
17697 tree_cons (NULL_TREE, integer_type_node,
17699 tree v2si_ftype_v2si_int
17700 = build_function_type (V2SI_type_node,
17701 tree_cons (NULL_TREE, V2SI_type_node,
17702 tree_cons (NULL_TREE, integer_type_node,
17704 tree v2si_ftype_di_di
17705 = build_function_type (V2SI_type_node,
17706 tree_cons (NULL_TREE, long_long_integer_type_node,
17707 tree_cons (NULL_TREE, long_long_integer_type_node,
17709 tree di_ftype_di_int
17710 = build_function_type (long_long_integer_type_node,
17711 tree_cons (NULL_TREE, long_long_integer_type_node,
17712 tree_cons (NULL_TREE, integer_type_node,
17714 tree di_ftype_di_int_int
17715 = build_function_type (long_long_integer_type_node,
17716 tree_cons (NULL_TREE, long_long_integer_type_node,
17717 tree_cons (NULL_TREE, integer_type_node,
17718 tree_cons (NULL_TREE,
17721 tree int_ftype_v8qi
17722 = build_function_type (integer_type_node,
17723 tree_cons (NULL_TREE, V8QI_type_node,
17725 tree int_ftype_v4hi
17726 = build_function_type (integer_type_node,
17727 tree_cons (NULL_TREE, V4HI_type_node,
17729 tree int_ftype_v2si
17730 = build_function_type (integer_type_node,
17731 tree_cons (NULL_TREE, V2SI_type_node,
17733 tree int_ftype_v8qi_int
17734 = build_function_type (integer_type_node,
17735 tree_cons (NULL_TREE, V8QI_type_node,
17736 tree_cons (NULL_TREE, integer_type_node,
17738 tree int_ftype_v4hi_int
17739 = build_function_type (integer_type_node,
17740 tree_cons (NULL_TREE, V4HI_type_node,
17741 tree_cons (NULL_TREE, integer_type_node,
17743 tree int_ftype_v2si_int
17744 = build_function_type (integer_type_node,
17745 tree_cons (NULL_TREE, V2SI_type_node,
17746 tree_cons (NULL_TREE, integer_type_node,
17748 tree v8qi_ftype_v8qi_int_int
17749 = build_function_type (V8QI_type_node,
17750 tree_cons (NULL_TREE, V8QI_type_node,
17751 tree_cons (NULL_TREE, integer_type_node,
17752 tree_cons (NULL_TREE,
17755 tree v4hi_ftype_v4hi_int_int
17756 = build_function_type (V4HI_type_node,
17757 tree_cons (NULL_TREE, V4HI_type_node,
17758 tree_cons (NULL_TREE, integer_type_node,
17759 tree_cons (NULL_TREE,
17762 tree v2si_ftype_v2si_int_int
17763 = build_function_type (V2SI_type_node,
17764 tree_cons (NULL_TREE, V2SI_type_node,
17765 tree_cons (NULL_TREE, integer_type_node,
17766 tree_cons (NULL_TREE,
17769 /* Miscellaneous. */
17770 tree v8qi_ftype_v4hi_v4hi
17771 = build_function_type (V8QI_type_node,
17772 tree_cons (NULL_TREE, V4HI_type_node,
17773 tree_cons (NULL_TREE, V4HI_type_node,
17775 tree v4hi_ftype_v2si_v2si
17776 = build_function_type (V4HI_type_node,
17777 tree_cons (NULL_TREE, V2SI_type_node,
17778 tree_cons (NULL_TREE, V2SI_type_node,
17780 tree v2si_ftype_v4hi_v4hi
17781 = build_function_type (V2SI_type_node,
17782 tree_cons (NULL_TREE, V4HI_type_node,
17783 tree_cons (NULL_TREE, V4HI_type_node,
17785 tree v2si_ftype_v8qi_v8qi
17786 = build_function_type (V2SI_type_node,
17787 tree_cons (NULL_TREE, V8QI_type_node,
17788 tree_cons (NULL_TREE, V8QI_type_node,
17790 tree v4hi_ftype_v4hi_di
17791 = build_function_type (V4HI_type_node,
17792 tree_cons (NULL_TREE, V4HI_type_node,
17793 tree_cons (NULL_TREE,
17794 long_long_integer_type_node,
17796 tree v2si_ftype_v2si_di
17797 = build_function_type (V2SI_type_node,
17798 tree_cons (NULL_TREE, V2SI_type_node,
17799 tree_cons (NULL_TREE,
17800 long_long_integer_type_node,
17802 tree void_ftype_int_int
17803 = build_function_type (void_type_node,
17804 tree_cons (NULL_TREE, integer_type_node,
17805 tree_cons (NULL_TREE, integer_type_node,
17808 = build_function_type (long_long_unsigned_type_node, endlink);
17810 = build_function_type (long_long_integer_type_node,
17811 tree_cons (NULL_TREE, V8QI_type_node,
17814 = build_function_type (long_long_integer_type_node,
17815 tree_cons (NULL_TREE, V4HI_type_node,
17818 = build_function_type (long_long_integer_type_node,
17819 tree_cons (NULL_TREE, V2SI_type_node,
17821 tree v2si_ftype_v4hi
17822 = build_function_type (V2SI_type_node,
17823 tree_cons (NULL_TREE, V4HI_type_node,
17825 tree v4hi_ftype_v8qi
17826 = build_function_type (V4HI_type_node,
17827 tree_cons (NULL_TREE, V8QI_type_node,
17830 tree di_ftype_di_v4hi_v4hi
17831 = build_function_type (long_long_unsigned_type_node,
17832 tree_cons (NULL_TREE,
17833 long_long_unsigned_type_node,
17834 tree_cons (NULL_TREE, V4HI_type_node,
17835 tree_cons (NULL_TREE,
17839 tree di_ftype_v4hi_v4hi
17840 = build_function_type (long_long_unsigned_type_node,
17841 tree_cons (NULL_TREE, V4HI_type_node,
17842 tree_cons (NULL_TREE, V4HI_type_node,
17845 /* Normal vector binops. */
17846 tree v8qi_ftype_v8qi_v8qi
17847 = build_function_type (V8QI_type_node,
17848 tree_cons (NULL_TREE, V8QI_type_node,
17849 tree_cons (NULL_TREE, V8QI_type_node,
17851 tree v4hi_ftype_v4hi_v4hi
17852 = build_function_type (V4HI_type_node,
17853 tree_cons (NULL_TREE, V4HI_type_node,
17854 tree_cons (NULL_TREE, V4HI_type_node,
17856 tree v2si_ftype_v2si_v2si
17857 = build_function_type (V2SI_type_node,
17858 tree_cons (NULL_TREE, V2SI_type_node,
17859 tree_cons (NULL_TREE, V2SI_type_node,
17861 tree di_ftype_di_di
17862 = build_function_type (long_long_unsigned_type_node,
17863 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17864 tree_cons (NULL_TREE,
17865 long_long_unsigned_type_node,
17868 /* Add all builtins that are more or less simple operations on two
17870 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17872 /* Use one of the operands; the target can have a different mode for
17873 mask-generating compares. */
17874 enum machine_mode mode;
17880 mode = insn_data[d->icode].operand[1].mode;
17885 type = v8qi_ftype_v8qi_v8qi;
17888 type = v4hi_ftype_v4hi_v4hi;
17891 type = v2si_ftype_v2si_v2si;
17894 type = di_ftype_di_di;
17898 gcc_unreachable ();
17901 def_mbuiltin (d->mask, d->name, type, d->code);
17904 /* Add the remaining MMX insns with somewhat more complicated types. */
17905 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17906 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17907 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17909 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17910 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17911 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17912 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17913 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17914 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17916 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17917 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17918 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17919 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17920 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17921 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17923 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17924 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17925 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17926 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17927 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17928 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17930 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17931 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17932 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17933 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17934 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17935 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17937 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17939 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17940 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17941 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17942 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17944 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17945 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17946 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17947 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17948 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17949 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17950 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17951 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17952 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17954 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17955 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17956 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17959 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17966 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17997 arm_init_tls_builtins (void)
18001 ftype = build_function_type (ptr_type_node, void_list_node);
18002 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18003 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18005 TREE_NOTHROW (decl) = 1;
18006 TREE_READONLY (decl) = 1;
18009 enum neon_builtin_type_bits {
18025 #define v8qi_UP T_V8QI
18026 #define v4hi_UP T_V4HI
18027 #define v2si_UP T_V2SI
18028 #define v2sf_UP T_V2SF
18030 #define v16qi_UP T_V16QI
18031 #define v8hi_UP T_V8HI
18032 #define v4si_UP T_V4SI
18033 #define v4sf_UP T_V4SF
18034 #define v2di_UP T_V2DI
18039 #define UP(X) X##_UP
18074 NEON_LOADSTRUCTLANE,
18076 NEON_STORESTRUCTLANE,
18085 const neon_itype itype;
18087 const enum insn_code codes[T_MAX];
18088 const unsigned int num_vars;
18089 unsigned int base_fcode;
18090 } neon_builtin_datum;
18092 #define CF(N,X) CODE_FOR_neon_##N##X
18094 #define VAR1(T, N, A) \
18095 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18096 #define VAR2(T, N, A, B) \
18097 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18098 #define VAR3(T, N, A, B, C) \
18099 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18100 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18101 #define VAR4(T, N, A, B, C, D) \
18102 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18103 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18104 #define VAR5(T, N, A, B, C, D, E) \
18105 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18106 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18107 #define VAR6(T, N, A, B, C, D, E, F) \
18108 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18109 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18110 #define VAR7(T, N, A, B, C, D, E, F, G) \
18111 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18112 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18114 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18115 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18117 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18118 CF (N, G), CF (N, H) }, 8, 0
18119 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18120 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18121 | UP (H) | UP (I), \
18122 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18123 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18124 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18125 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18126 | UP (H) | UP (I) | UP (J), \
18127 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18128 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18130 /* The mode entries in the following table correspond to the "key" type of the
18131 instruction variant, i.e. equivalent to that which would be specified after
18132 the assembler mnemonic, which usually refers to the last vector operand.
18133 (Signed/unsigned/polynomial types are not differentiated between though, and
18134 are all mapped onto the same mode for a given element size.) The modes
18135 listed per instruction should be the same as those defined for that
18136 instruction's pattern in neon.md.
18137 WARNING: Variants should be listed in the same increasing order as
18138 neon_builtin_type_bits. */
18140 static neon_builtin_datum neon_builtin_data[] =
18142 { VAR10 (BINOP, vadd,
18143 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18144 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18145 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18146 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18147 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18148 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18149 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18150 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18151 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18152 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18153 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18154 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18155 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18156 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18157 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18158 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18159 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18160 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18161 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18162 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18163 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18164 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18165 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18166 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18167 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18168 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18169 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18170 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18171 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18172 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18173 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18174 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18175 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18176 { VAR10 (BINOP, vsub,
18177 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18178 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18179 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18180 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18181 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18182 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18183 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18184 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18185 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18186 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18187 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18188 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18189 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18190 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18191 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18192 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18193 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18194 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18195 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18196 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18197 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18198 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18199 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18200 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18201 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18202 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18203 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18204 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18205 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18206 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18207 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18208 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18209 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18210 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18211 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18212 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18213 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18214 /* FIXME: vget_lane supports more variants than this! */
18215 { VAR10 (GETLANE, vget_lane,
18216 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18217 { VAR10 (SETLANE, vset_lane,
18218 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18219 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18220 { VAR10 (DUP, vdup_n,
18221 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18222 { VAR10 (DUPLANE, vdup_lane,
18223 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18224 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18225 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18226 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18227 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18228 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18229 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18230 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18231 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18232 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18233 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18234 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18235 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18236 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18237 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18238 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18239 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18240 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18241 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18242 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18243 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18244 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18245 { VAR10 (BINOP, vext,
18246 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18247 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18248 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18249 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18250 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18251 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18252 { VAR10 (SELECT, vbsl,
18253 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18254 { VAR1 (VTBL, vtbl1, v8qi) },
18255 { VAR1 (VTBL, vtbl2, v8qi) },
18256 { VAR1 (VTBL, vtbl3, v8qi) },
18257 { VAR1 (VTBL, vtbl4, v8qi) },
18258 { VAR1 (VTBX, vtbx1, v8qi) },
18259 { VAR1 (VTBX, vtbx2, v8qi) },
18260 { VAR1 (VTBX, vtbx3, v8qi) },
18261 { VAR1 (VTBX, vtbx4, v8qi) },
18262 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18263 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18264 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18265 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18266 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18267 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18268 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18269 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18270 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18271 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18272 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18273 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18274 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18275 { VAR10 (LOAD1, vld1,
18276 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18277 { VAR10 (LOAD1LANE, vld1_lane,
18278 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18279 { VAR10 (LOAD1, vld1_dup,
18280 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18281 { VAR10 (STORE1, vst1,
18282 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18283 { VAR10 (STORE1LANE, vst1_lane,
18284 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18285 { VAR9 (LOADSTRUCT,
18286 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18287 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18288 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18289 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18290 { VAR9 (STORESTRUCT, vst2,
18291 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18292 { VAR7 (STORESTRUCTLANE, vst2_lane,
18293 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18294 { VAR9 (LOADSTRUCT,
18295 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18296 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18297 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18298 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18299 { VAR9 (STORESTRUCT, vst3,
18300 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18301 { VAR7 (STORESTRUCTLANE, vst3_lane,
18302 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18303 { VAR9 (LOADSTRUCT, vld4,
18304 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18305 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18306 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18307 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18308 { VAR9 (STORESTRUCT, vst4,
18309 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18310 { VAR7 (STORESTRUCTLANE, vst4_lane,
18311 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18312 { VAR10 (LOGICBINOP, vand,
18313 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18314 { VAR10 (LOGICBINOP, vorr,
18315 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18316 { VAR10 (BINOP, veor,
18317 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18318 { VAR10 (LOGICBINOP, vbic,
18319 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18320 { VAR10 (LOGICBINOP, vorn,
18321 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18337 arm_init_neon_builtins (void)
18339 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18341 tree neon_intQI_type_node;
18342 tree neon_intHI_type_node;
18343 tree neon_polyQI_type_node;
18344 tree neon_polyHI_type_node;
18345 tree neon_intSI_type_node;
18346 tree neon_intDI_type_node;
18347 tree neon_float_type_node;
18349 tree intQI_pointer_node;
18350 tree intHI_pointer_node;
18351 tree intSI_pointer_node;
18352 tree intDI_pointer_node;
18353 tree float_pointer_node;
18355 tree const_intQI_node;
18356 tree const_intHI_node;
18357 tree const_intSI_node;
18358 tree const_intDI_node;
18359 tree const_float_node;
18361 tree const_intQI_pointer_node;
18362 tree const_intHI_pointer_node;
18363 tree const_intSI_pointer_node;
18364 tree const_intDI_pointer_node;
18365 tree const_float_pointer_node;
18367 tree V8QI_type_node;
18368 tree V4HI_type_node;
18369 tree V2SI_type_node;
18370 tree V2SF_type_node;
18371 tree V16QI_type_node;
18372 tree V8HI_type_node;
18373 tree V4SI_type_node;
18374 tree V4SF_type_node;
18375 tree V2DI_type_node;
18377 tree intUQI_type_node;
18378 tree intUHI_type_node;
18379 tree intUSI_type_node;
18380 tree intUDI_type_node;
18382 tree intEI_type_node;
18383 tree intOI_type_node;
18384 tree intCI_type_node;
18385 tree intXI_type_node;
18387 tree V8QI_pointer_node;
18388 tree V4HI_pointer_node;
18389 tree V2SI_pointer_node;
18390 tree V2SF_pointer_node;
18391 tree V16QI_pointer_node;
18392 tree V8HI_pointer_node;
18393 tree V4SI_pointer_node;
18394 tree V4SF_pointer_node;
18395 tree V2DI_pointer_node;
18397 tree void_ftype_pv8qi_v8qi_v8qi;
18398 tree void_ftype_pv4hi_v4hi_v4hi;
18399 tree void_ftype_pv2si_v2si_v2si;
18400 tree void_ftype_pv2sf_v2sf_v2sf;
18401 tree void_ftype_pdi_di_di;
18402 tree void_ftype_pv16qi_v16qi_v16qi;
18403 tree void_ftype_pv8hi_v8hi_v8hi;
18404 tree void_ftype_pv4si_v4si_v4si;
18405 tree void_ftype_pv4sf_v4sf_v4sf;
18406 tree void_ftype_pv2di_v2di_v2di;
18408 tree reinterp_ftype_dreg[5][5];
18409 tree reinterp_ftype_qreg[5][5];
18410 tree dreg_types[5], qreg_types[5];
18412 /* Create distinguished type nodes for NEON vector element types,
18413 and pointers to values of such types, so we can detect them later. */
18414 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18415 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18416 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18417 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18418 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18419 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18420 neon_float_type_node = make_node (REAL_TYPE);
18421 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18422 layout_type (neon_float_type_node);
18424 /* Define typedefs which exactly correspond to the modes we are basing vector
18425 types on. If you change these names you'll need to change
18426 the table used by arm_mangle_type too. */
18427 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18428 "__builtin_neon_qi");
18429 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18430 "__builtin_neon_hi");
18431 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18432 "__builtin_neon_si");
18433 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18434 "__builtin_neon_sf");
18435 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18436 "__builtin_neon_di");
18437 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18438 "__builtin_neon_poly8");
18439 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18440 "__builtin_neon_poly16");
18442 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18443 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18444 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18445 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18446 float_pointer_node = build_pointer_type (neon_float_type_node);
18448 /* Next create constant-qualified versions of the above types. */
18449 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18451 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18453 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18455 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18457 const_float_node = build_qualified_type (neon_float_type_node,
18460 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18461 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18462 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18463 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18464 const_float_pointer_node = build_pointer_type (const_float_node);
18466 /* Now create vector types based on our NEON element types. */
18467 /* 64-bit vectors. */
18469 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18471 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18473 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18475 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18476 /* 128-bit vectors. */
18478 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18480 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18482 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18484 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18486 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18488 /* Unsigned integer types for various mode sizes. */
18489 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18490 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18491 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18492 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18494 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18495 "__builtin_neon_uqi");
18496 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18497 "__builtin_neon_uhi");
18498 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18499 "__builtin_neon_usi");
18500 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18501 "__builtin_neon_udi");
18503 /* Opaque integer types for structures of vectors. */
18504 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18505 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18506 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18507 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18509 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18510 "__builtin_neon_ti");
18511 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18512 "__builtin_neon_ei");
18513 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18514 "__builtin_neon_oi");
18515 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18516 "__builtin_neon_ci");
18517 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18518 "__builtin_neon_xi");
18520 /* Pointers to vector types. */
18521 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18522 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18523 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18524 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18525 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18526 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18527 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18528 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18529 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18531 /* Operations which return results as pairs. */
18532 void_ftype_pv8qi_v8qi_v8qi =
18533 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18534 V8QI_type_node, NULL);
18535 void_ftype_pv4hi_v4hi_v4hi =
18536 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18537 V4HI_type_node, NULL);
18538 void_ftype_pv2si_v2si_v2si =
18539 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18540 V2SI_type_node, NULL);
18541 void_ftype_pv2sf_v2sf_v2sf =
18542 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18543 V2SF_type_node, NULL);
18544 void_ftype_pdi_di_di =
18545 build_function_type_list (void_type_node, intDI_pointer_node,
18546 neon_intDI_type_node, neon_intDI_type_node, NULL);
18547 void_ftype_pv16qi_v16qi_v16qi =
18548 build_function_type_list (void_type_node, V16QI_pointer_node,
18549 V16QI_type_node, V16QI_type_node, NULL);
18550 void_ftype_pv8hi_v8hi_v8hi =
18551 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18552 V8HI_type_node, NULL);
18553 void_ftype_pv4si_v4si_v4si =
18554 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18555 V4SI_type_node, NULL);
18556 void_ftype_pv4sf_v4sf_v4sf =
18557 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18558 V4SF_type_node, NULL);
18559 void_ftype_pv2di_v2di_v2di =
18560 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18561 V2DI_type_node, NULL);
18563 dreg_types[0] = V8QI_type_node;
18564 dreg_types[1] = V4HI_type_node;
18565 dreg_types[2] = V2SI_type_node;
18566 dreg_types[3] = V2SF_type_node;
18567 dreg_types[4] = neon_intDI_type_node;
18569 qreg_types[0] = V16QI_type_node;
18570 qreg_types[1] = V8HI_type_node;
18571 qreg_types[2] = V4SI_type_node;
18572 qreg_types[3] = V4SF_type_node;
18573 qreg_types[4] = V2DI_type_node;
18575 for (i = 0; i < 5; i++)
18578 for (j = 0; j < 5; j++)
18580 reinterp_ftype_dreg[i][j]
18581 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18582 reinterp_ftype_qreg[i][j]
18583 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18587 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18589 neon_builtin_datum *d = &neon_builtin_data[i];
18590 unsigned int j, codeidx = 0;
18592 d->base_fcode = fcode;
18594 for (j = 0; j < T_MAX; j++)
18596 const char* const modenames[] = {
18597 "v8qi", "v4hi", "v2si", "v2sf", "di",
18598 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18602 enum insn_code icode;
18603 int is_load = 0, is_store = 0;
18605 if ((d->bits & (1 << j)) == 0)
18608 icode = d->codes[codeidx++];
18613 case NEON_LOAD1LANE:
18614 case NEON_LOADSTRUCT:
18615 case NEON_LOADSTRUCTLANE:
18617 /* Fall through. */
18619 case NEON_STORE1LANE:
18620 case NEON_STORESTRUCT:
18621 case NEON_STORESTRUCTLANE:
18624 /* Fall through. */
18627 case NEON_LOGICBINOP:
18628 case NEON_SHIFTINSERT:
18635 case NEON_SHIFTIMM:
18636 case NEON_SHIFTACC:
18642 case NEON_LANEMULL:
18643 case NEON_LANEMULH:
18645 case NEON_SCALARMUL:
18646 case NEON_SCALARMULL:
18647 case NEON_SCALARMULH:
18648 case NEON_SCALARMAC:
18654 tree return_type = void_type_node, args = void_list_node;
18656 /* Build a function type directly from the insn_data for this
18657 builtin. The build_function_type() function takes care of
18658 removing duplicates for us. */
18659 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18663 if (is_load && k == 1)
18665 /* Neon load patterns always have the memory operand
18666 (a SImode pointer) in the operand 1 position. We
18667 want a const pointer to the element type in that
18669 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18675 eltype = const_intQI_pointer_node;
18680 eltype = const_intHI_pointer_node;
18685 eltype = const_intSI_pointer_node;
18690 eltype = const_float_pointer_node;
18695 eltype = const_intDI_pointer_node;
18698 default: gcc_unreachable ();
18701 else if (is_store && k == 0)
18703 /* Similarly, Neon store patterns use operand 0 as
18704 the memory location to store to (a SImode pointer).
18705 Use a pointer to the element type of the store in
18707 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18713 eltype = intQI_pointer_node;
18718 eltype = intHI_pointer_node;
18723 eltype = intSI_pointer_node;
18728 eltype = float_pointer_node;
18733 eltype = intDI_pointer_node;
18736 default: gcc_unreachable ();
18741 switch (insn_data[icode].operand[k].mode)
18743 case VOIDmode: eltype = void_type_node; break;
18745 case QImode: eltype = neon_intQI_type_node; break;
18746 case HImode: eltype = neon_intHI_type_node; break;
18747 case SImode: eltype = neon_intSI_type_node; break;
18748 case SFmode: eltype = neon_float_type_node; break;
18749 case DImode: eltype = neon_intDI_type_node; break;
18750 case TImode: eltype = intTI_type_node; break;
18751 case EImode: eltype = intEI_type_node; break;
18752 case OImode: eltype = intOI_type_node; break;
18753 case CImode: eltype = intCI_type_node; break;
18754 case XImode: eltype = intXI_type_node; break;
18755 /* 64-bit vectors. */
18756 case V8QImode: eltype = V8QI_type_node; break;
18757 case V4HImode: eltype = V4HI_type_node; break;
18758 case V2SImode: eltype = V2SI_type_node; break;
18759 case V2SFmode: eltype = V2SF_type_node; break;
18760 /* 128-bit vectors. */
18761 case V16QImode: eltype = V16QI_type_node; break;
18762 case V8HImode: eltype = V8HI_type_node; break;
18763 case V4SImode: eltype = V4SI_type_node; break;
18764 case V4SFmode: eltype = V4SF_type_node; break;
18765 case V2DImode: eltype = V2DI_type_node; break;
18766 default: gcc_unreachable ();
18770 if (k == 0 && !is_store)
18771 return_type = eltype;
18773 args = tree_cons (NULL_TREE, eltype, args);
18776 ftype = build_function_type (return_type, args);
18780 case NEON_RESULTPAIR:
18782 switch (insn_data[icode].operand[1].mode)
18784 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18785 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18786 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18787 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18788 case DImode: ftype = void_ftype_pdi_di_di; break;
18789 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18790 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18791 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18792 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18793 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18794 default: gcc_unreachable ();
18799 case NEON_REINTERP:
18801 /* We iterate over 5 doubleword types, then 5 quadword
18804 switch (insn_data[icode].operand[0].mode)
18806 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18807 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18808 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18809 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18810 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18811 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18812 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18813 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18814 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18815 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18816 default: gcc_unreachable ();
18822 gcc_unreachable ();
18825 gcc_assert (ftype != NULL);
18827 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18829 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18836 arm_init_fp16_builtins (void)
18838 tree fp16_type = make_node (REAL_TYPE);
18839 TYPE_PRECISION (fp16_type) = 16;
18840 layout_type (fp16_type);
18841 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18845 arm_init_builtins (void)
18847 arm_init_tls_builtins ();
18849 if (TARGET_REALLY_IWMMXT)
18850 arm_init_iwmmxt_builtins ();
18853 arm_init_neon_builtins ();
18855 if (arm_fp16_format)
18856 arm_init_fp16_builtins ();
18859 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18861 static const char *
18862 arm_invalid_parameter_type (const_tree t)
18864 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18865 return N_("function parameters cannot have __fp16 type");
18869 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18871 static const char *
18872 arm_invalid_return_type (const_tree t)
18874 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18875 return N_("functions cannot return __fp16 type");
18879 /* Implement TARGET_PROMOTED_TYPE. */
18882 arm_promoted_type (const_tree t)
18884 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18885 return float_type_node;
18889 /* Implement TARGET_CONVERT_TO_TYPE.
18890 Specifically, this hook implements the peculiarity of the ARM
18891 half-precision floating-point C semantics that requires conversions between
18892 __fp16 to or from double to do an intermediate conversion to float. */
18895 arm_convert_to_type (tree type, tree expr)
18897 tree fromtype = TREE_TYPE (expr);
18898 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18900 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18901 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18902 return convert (type, convert (float_type_node, expr));
18906 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18907 This simply adds HFmode as a supported mode; even though we don't
18908 implement arithmetic on this type directly, it's supported by
18909 optabs conversions, much the way the double-word arithmetic is
18910 special-cased in the default hook. */
18913 arm_scalar_mode_supported_p (enum machine_mode mode)
18915 if (mode == HFmode)
18916 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18918 return default_scalar_mode_supported_p (mode);
18921 /* Errors in the source file can cause expand_expr to return const0_rtx
18922 where we expect a vector. To avoid crashing, use one of the vector
18923 clear instructions. */
18926 safe_vector_operand (rtx x, enum machine_mode mode)
18928 if (x != const0_rtx)
18930 x = gen_reg_rtx (mode);
18932 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18933 : gen_rtx_SUBREG (DImode, x, 0)));
18937 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18940 arm_expand_binop_builtin (enum insn_code icode,
18941 tree exp, rtx target)
18944 tree arg0 = CALL_EXPR_ARG (exp, 0);
18945 tree arg1 = CALL_EXPR_ARG (exp, 1);
18946 rtx op0 = expand_normal (arg0);
18947 rtx op1 = expand_normal (arg1);
18948 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18949 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18950 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18952 if (VECTOR_MODE_P (mode0))
18953 op0 = safe_vector_operand (op0, mode0);
18954 if (VECTOR_MODE_P (mode1))
18955 op1 = safe_vector_operand (op1, mode1);
18958 || GET_MODE (target) != tmode
18959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18960 target = gen_reg_rtx (tmode);
18962 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18964 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18965 op0 = copy_to_mode_reg (mode0, op0);
18966 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18967 op1 = copy_to_mode_reg (mode1, op1);
18969 pat = GEN_FCN (icode) (target, op0, op1);
18976 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18979 arm_expand_unop_builtin (enum insn_code icode,
18980 tree exp, rtx target, int do_load)
18983 tree arg0 = CALL_EXPR_ARG (exp, 0);
18984 rtx op0 = expand_normal (arg0);
18985 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18986 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18989 || GET_MODE (target) != tmode
18990 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18991 target = gen_reg_rtx (tmode);
18993 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18996 if (VECTOR_MODE_P (mode0))
18997 op0 = safe_vector_operand (op0, mode0);
18999 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19000 op0 = copy_to_mode_reg (mode0, op0);
19003 pat = GEN_FCN (icode) (target, op0);
19011 neon_builtin_compare (const void *a, const void *b)
19013 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19014 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19015 unsigned int soughtcode = key->base_fcode;
19017 if (soughtcode >= memb->base_fcode
19018 && soughtcode < memb->base_fcode + memb->num_vars)
19020 else if (soughtcode < memb->base_fcode)
19026 static enum insn_code
19027 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19029 neon_builtin_datum key, *found;
19032 key.base_fcode = fcode;
19033 found = (neon_builtin_datum *)
19034 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19035 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19036 gcc_assert (found);
19037 idx = fcode - (int) found->base_fcode;
19038 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19041 *itype = found->itype;
19043 return found->codes[idx];
19047 NEON_ARG_COPY_TO_REG,
19052 #define NEON_MAX_BUILTIN_ARGS 5
19054 /* Expand a Neon builtin. */
19056 arm_expand_neon_args (rtx target, int icode, int have_retval,
19061 tree arg[NEON_MAX_BUILTIN_ARGS];
19062 rtx op[NEON_MAX_BUILTIN_ARGS];
19063 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19064 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19069 || GET_MODE (target) != tmode
19070 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19071 target = gen_reg_rtx (tmode);
19073 va_start (ap, exp);
19077 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19079 if (thisarg == NEON_ARG_STOP)
19083 arg[argc] = CALL_EXPR_ARG (exp, argc);
19084 op[argc] = expand_normal (arg[argc]);
19085 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19089 case NEON_ARG_COPY_TO_REG:
19090 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19091 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19092 (op[argc], mode[argc]))
19093 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19096 case NEON_ARG_CONSTANT:
19097 /* FIXME: This error message is somewhat unhelpful. */
19098 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19099 (op[argc], mode[argc]))
19100 error ("argument must be a constant");
19103 case NEON_ARG_STOP:
19104 gcc_unreachable ();
19117 pat = GEN_FCN (icode) (target, op[0]);
19121 pat = GEN_FCN (icode) (target, op[0], op[1]);
19125 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19129 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19133 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19137 gcc_unreachable ();
19143 pat = GEN_FCN (icode) (op[0]);
19147 pat = GEN_FCN (icode) (op[0], op[1]);
19151 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19155 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19159 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19163 gcc_unreachable ();
19174 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19175 constants defined per-instruction or per instruction-variant. Instead, the
19176 required info is looked up in the table neon_builtin_data. */
19178 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19181 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19188 return arm_expand_neon_args (target, icode, 1, exp,
19189 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19193 case NEON_SCALARMUL:
19194 case NEON_SCALARMULL:
19195 case NEON_SCALARMULH:
19196 case NEON_SHIFTINSERT:
19197 case NEON_LOGICBINOP:
19198 return arm_expand_neon_args (target, icode, 1, exp,
19199 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19203 return arm_expand_neon_args (target, icode, 1, exp,
19204 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19205 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19209 case NEON_SHIFTIMM:
19210 return arm_expand_neon_args (target, icode, 1, exp,
19211 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19215 return arm_expand_neon_args (target, icode, 1, exp,
19216 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19220 case NEON_REINTERP:
19221 return arm_expand_neon_args (target, icode, 1, exp,
19222 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19226 return arm_expand_neon_args (target, icode, 1, exp,
19227 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19229 case NEON_RESULTPAIR:
19230 return arm_expand_neon_args (target, icode, 0, exp,
19231 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19235 case NEON_LANEMULL:
19236 case NEON_LANEMULH:
19237 return arm_expand_neon_args (target, icode, 1, exp,
19238 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19239 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19242 return arm_expand_neon_args (target, icode, 1, exp,
19243 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19244 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19246 case NEON_SHIFTACC:
19247 return arm_expand_neon_args (target, icode, 1, exp,
19248 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19249 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19251 case NEON_SCALARMAC:
19252 return arm_expand_neon_args (target, icode, 1, exp,
19253 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19254 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19258 return arm_expand_neon_args (target, icode, 1, exp,
19259 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19263 case NEON_LOADSTRUCT:
19264 return arm_expand_neon_args (target, icode, 1, exp,
19265 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19267 case NEON_LOAD1LANE:
19268 case NEON_LOADSTRUCTLANE:
19269 return arm_expand_neon_args (target, icode, 1, exp,
19270 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19274 case NEON_STORESTRUCT:
19275 return arm_expand_neon_args (target, icode, 0, exp,
19276 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19278 case NEON_STORE1LANE:
19279 case NEON_STORESTRUCTLANE:
19280 return arm_expand_neon_args (target, icode, 0, exp,
19281 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19285 gcc_unreachable ();
19288 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19290 neon_reinterpret (rtx dest, rtx src)
19292 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19295 /* Emit code to place a Neon pair result in memory locations (with equal
19298 neon_emit_pair_result_insn (enum machine_mode mode,
19299 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19302 rtx mem = gen_rtx_MEM (mode, destaddr);
19303 rtx tmp1 = gen_reg_rtx (mode);
19304 rtx tmp2 = gen_reg_rtx (mode);
19306 emit_insn (intfn (tmp1, op1, tmp2, op2));
19308 emit_move_insn (mem, tmp1);
19309 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19310 emit_move_insn (mem, tmp2);
19313 /* Set up operands for a register copy from src to dest, taking care not to
19314 clobber registers in the process.
19315 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19316 be called with a large N, so that should be OK. */
19319 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19321 unsigned int copied = 0, opctr = 0;
19322 unsigned int done = (1 << count) - 1;
19325 while (copied != done)
19327 for (i = 0; i < count; i++)
19331 for (j = 0; good && j < count; j++)
19332 if (i != j && (copied & (1 << j)) == 0
19333 && reg_overlap_mentioned_p (src[j], dest[i]))
19338 operands[opctr++] = dest[i];
19339 operands[opctr++] = src[i];
19345 gcc_assert (opctr == count * 2);
19348 /* Expand an expression EXP that calls a built-in function,
19349 with result going to TARGET if that's convenient
19350 (and in mode MODE if that's convenient).
19351 SUBTARGET may be used as the target for computing one of EXP's operands.
19352 IGNORE is nonzero if the value is to be ignored. */
19355 arm_expand_builtin (tree exp,
19357 rtx subtarget ATTRIBUTE_UNUSED,
19358 enum machine_mode mode ATTRIBUTE_UNUSED,
19359 int ignore ATTRIBUTE_UNUSED)
19361 const struct builtin_description * d;
19362 enum insn_code icode;
19363 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19371 int fcode = DECL_FUNCTION_CODE (fndecl);
19373 enum machine_mode tmode;
19374 enum machine_mode mode0;
19375 enum machine_mode mode1;
19376 enum machine_mode mode2;
19378 if (fcode >= ARM_BUILTIN_NEON_BASE)
19379 return arm_expand_neon_builtin (fcode, exp, target);
19383 case ARM_BUILTIN_TEXTRMSB:
19384 case ARM_BUILTIN_TEXTRMUB:
19385 case ARM_BUILTIN_TEXTRMSH:
19386 case ARM_BUILTIN_TEXTRMUH:
19387 case ARM_BUILTIN_TEXTRMSW:
19388 case ARM_BUILTIN_TEXTRMUW:
19389 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19390 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19391 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19392 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19393 : CODE_FOR_iwmmxt_textrmw);
19395 arg0 = CALL_EXPR_ARG (exp, 0);
19396 arg1 = CALL_EXPR_ARG (exp, 1);
19397 op0 = expand_normal (arg0);
19398 op1 = expand_normal (arg1);
19399 tmode = insn_data[icode].operand[0].mode;
19400 mode0 = insn_data[icode].operand[1].mode;
19401 mode1 = insn_data[icode].operand[2].mode;
19403 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19404 op0 = copy_to_mode_reg (mode0, op0);
19405 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19407 /* @@@ better error message */
19408 error ("selector must be an immediate");
19409 return gen_reg_rtx (tmode);
19412 || GET_MODE (target) != tmode
19413 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19414 target = gen_reg_rtx (tmode);
19415 pat = GEN_FCN (icode) (target, op0, op1);
19421 case ARM_BUILTIN_TINSRB:
19422 case ARM_BUILTIN_TINSRH:
19423 case ARM_BUILTIN_TINSRW:
19424 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19425 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19426 : CODE_FOR_iwmmxt_tinsrw);
19427 arg0 = CALL_EXPR_ARG (exp, 0);
19428 arg1 = CALL_EXPR_ARG (exp, 1);
19429 arg2 = CALL_EXPR_ARG (exp, 2);
19430 op0 = expand_normal (arg0);
19431 op1 = expand_normal (arg1);
19432 op2 = expand_normal (arg2);
19433 tmode = insn_data[icode].operand[0].mode;
19434 mode0 = insn_data[icode].operand[1].mode;
19435 mode1 = insn_data[icode].operand[2].mode;
19436 mode2 = insn_data[icode].operand[3].mode;
19438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19439 op0 = copy_to_mode_reg (mode0, op0);
19440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19441 op1 = copy_to_mode_reg (mode1, op1);
19442 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19444 /* @@@ better error message */
19445 error ("selector must be an immediate");
19449 || GET_MODE (target) != tmode
19450 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19451 target = gen_reg_rtx (tmode);
19452 pat = GEN_FCN (icode) (target, op0, op1, op2);
19458 case ARM_BUILTIN_SETWCX:
19459 arg0 = CALL_EXPR_ARG (exp, 0);
19460 arg1 = CALL_EXPR_ARG (exp, 1);
19461 op0 = force_reg (SImode, expand_normal (arg0));
19462 op1 = expand_normal (arg1);
19463 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19466 case ARM_BUILTIN_GETWCX:
19467 arg0 = CALL_EXPR_ARG (exp, 0);
19468 op0 = expand_normal (arg0);
19469 target = gen_reg_rtx (SImode);
19470 emit_insn (gen_iwmmxt_tmrc (target, op0));
19473 case ARM_BUILTIN_WSHUFH:
19474 icode = CODE_FOR_iwmmxt_wshufh;
19475 arg0 = CALL_EXPR_ARG (exp, 0);
19476 arg1 = CALL_EXPR_ARG (exp, 1);
19477 op0 = expand_normal (arg0);
19478 op1 = expand_normal (arg1);
19479 tmode = insn_data[icode].operand[0].mode;
19480 mode1 = insn_data[icode].operand[1].mode;
19481 mode2 = insn_data[icode].operand[2].mode;
19483 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19484 op0 = copy_to_mode_reg (mode1, op0);
19485 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19487 /* @@@ better error message */
19488 error ("mask must be an immediate");
19492 || GET_MODE (target) != tmode
19493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19494 target = gen_reg_rtx (tmode);
19495 pat = GEN_FCN (icode) (target, op0, op1);
19501 case ARM_BUILTIN_WSADB:
19502 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19503 case ARM_BUILTIN_WSADH:
19504 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19505 case ARM_BUILTIN_WSADBZ:
19506 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19507 case ARM_BUILTIN_WSADHZ:
19508 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19510 /* Several three-argument builtins. */
19511 case ARM_BUILTIN_WMACS:
19512 case ARM_BUILTIN_WMACU:
19513 case ARM_BUILTIN_WALIGN:
19514 case ARM_BUILTIN_TMIA:
19515 case ARM_BUILTIN_TMIAPH:
19516 case ARM_BUILTIN_TMIATT:
19517 case ARM_BUILTIN_TMIATB:
19518 case ARM_BUILTIN_TMIABT:
19519 case ARM_BUILTIN_TMIABB:
19520 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19521 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19522 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19523 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19524 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19525 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19526 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19527 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19528 : CODE_FOR_iwmmxt_walign);
19529 arg0 = CALL_EXPR_ARG (exp, 0);
19530 arg1 = CALL_EXPR_ARG (exp, 1);
19531 arg2 = CALL_EXPR_ARG (exp, 2);
19532 op0 = expand_normal (arg0);
19533 op1 = expand_normal (arg1);
19534 op2 = expand_normal (arg2);
19535 tmode = insn_data[icode].operand[0].mode;
19536 mode0 = insn_data[icode].operand[1].mode;
19537 mode1 = insn_data[icode].operand[2].mode;
19538 mode2 = insn_data[icode].operand[3].mode;
19540 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19541 op0 = copy_to_mode_reg (mode0, op0);
19542 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19543 op1 = copy_to_mode_reg (mode1, op1);
19544 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19545 op2 = copy_to_mode_reg (mode2, op2);
19547 || GET_MODE (target) != tmode
19548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19549 target = gen_reg_rtx (tmode);
19550 pat = GEN_FCN (icode) (target, op0, op1, op2);
19556 case ARM_BUILTIN_WZERO:
19557 target = gen_reg_rtx (DImode);
19558 emit_insn (gen_iwmmxt_clrdi (target));
19561 case ARM_BUILTIN_THREAD_POINTER:
19562 return arm_load_tp (target);
19568 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19569 if (d->code == (const enum arm_builtins) fcode)
19570 return arm_expand_binop_builtin (d->icode, exp, target);
19572 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19573 if (d->code == (const enum arm_builtins) fcode)
19574 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19576 /* @@@ Should really do something sensible here. */
19580 /* Return the number (counting from 0) of
19581 the least significant set bit in MASK. */
19584 number_of_first_bit_set (unsigned mask)
19589 (mask & (1 << bit)) == 0;
19596 /* Emit code to push or pop registers to or from the stack. F is the
19597 assembly file. MASK is the registers to push or pop. PUSH is
19598 nonzero if we should push, and zero if we should pop. For debugging
19599 output, if pushing, adjust CFA_OFFSET by the amount of space added
19600 to the stack. REAL_REGS should have the same number of bits set as
19601 MASK, and will be used instead (in the same order) to describe which
19602 registers were saved - this is used to mark the save slots when we
19603 push high registers after moving them to low registers. */
19605 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19606 unsigned long real_regs)
19609 int lo_mask = mask & 0xFF;
19610 int pushed_words = 0;
19614 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19616 /* Special case. Do not generate a POP PC statement here, do it in
19618 thumb_exit (f, -1);
19622 if (push && arm_except_unwind_info () == UI_TARGET)
19624 fprintf (f, "\t.save\t{");
19625 for (regno = 0; regno < 15; regno++)
19627 if (real_regs & (1 << regno))
19629 if (real_regs & ((1 << regno) -1))
19631 asm_fprintf (f, "%r", regno);
19634 fprintf (f, "}\n");
19637 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19639 /* Look at the low registers first. */
19640 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19644 asm_fprintf (f, "%r", regno);
19646 if ((lo_mask & ~1) != 0)
19653 if (push && (mask & (1 << LR_REGNUM)))
19655 /* Catch pushing the LR. */
19659 asm_fprintf (f, "%r", LR_REGNUM);
19663 else if (!push && (mask & (1 << PC_REGNUM)))
19665 /* Catch popping the PC. */
19666 if (TARGET_INTERWORK || TARGET_BACKTRACE
19667 || crtl->calls_eh_return)
19669 /* The PC is never poped directly, instead
19670 it is popped into r3 and then BX is used. */
19671 fprintf (f, "}\n");
19673 thumb_exit (f, -1);
19682 asm_fprintf (f, "%r", PC_REGNUM);
19686 fprintf (f, "}\n");
19688 if (push && pushed_words && dwarf2out_do_frame ())
19690 char *l = dwarf2out_cfi_label (false);
19691 int pushed_mask = real_regs;
19693 *cfa_offset += pushed_words * 4;
19694 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19697 pushed_mask = real_regs;
19698 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19700 if (pushed_mask & 1)
19701 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19706 /* Generate code to return from a thumb function.
19707 If 'reg_containing_return_addr' is -1, then the return address is
19708 actually on the stack, at the stack pointer. */
19710 thumb_exit (FILE *f, int reg_containing_return_addr)
19712 unsigned regs_available_for_popping;
19713 unsigned regs_to_pop;
19715 unsigned available;
19719 int restore_a4 = FALSE;
19721 /* Compute the registers we need to pop. */
19725 if (reg_containing_return_addr == -1)
19727 regs_to_pop |= 1 << LR_REGNUM;
19731 if (TARGET_BACKTRACE)
19733 /* Restore the (ARM) frame pointer and stack pointer. */
19734 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19738 /* If there is nothing to pop then just emit the BX instruction and
19740 if (pops_needed == 0)
19742 if (crtl->calls_eh_return)
19743 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19745 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19748 /* Otherwise if we are not supporting interworking and we have not created
19749 a backtrace structure and the function was not entered in ARM mode then
19750 just pop the return address straight into the PC. */
19751 else if (!TARGET_INTERWORK
19752 && !TARGET_BACKTRACE
19753 && !is_called_in_ARM_mode (current_function_decl)
19754 && !crtl->calls_eh_return)
19756 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19760 /* Find out how many of the (return) argument registers we can corrupt. */
19761 regs_available_for_popping = 0;
19763 /* If returning via __builtin_eh_return, the bottom three registers
19764 all contain information needed for the return. */
19765 if (crtl->calls_eh_return)
19769 /* If we can deduce the registers used from the function's
19770 return value. This is more reliable that examining
19771 df_regs_ever_live_p () because that will be set if the register is
19772 ever used in the function, not just if the register is used
19773 to hold a return value. */
19775 if (crtl->return_rtx != 0)
19776 mode = GET_MODE (crtl->return_rtx);
19778 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19780 size = GET_MODE_SIZE (mode);
19784 /* In a void function we can use any argument register.
19785 In a function that returns a structure on the stack
19786 we can use the second and third argument registers. */
19787 if (mode == VOIDmode)
19788 regs_available_for_popping =
19789 (1 << ARG_REGISTER (1))
19790 | (1 << ARG_REGISTER (2))
19791 | (1 << ARG_REGISTER (3));
19793 regs_available_for_popping =
19794 (1 << ARG_REGISTER (2))
19795 | (1 << ARG_REGISTER (3));
19797 else if (size <= 4)
19798 regs_available_for_popping =
19799 (1 << ARG_REGISTER (2))
19800 | (1 << ARG_REGISTER (3));
19801 else if (size <= 8)
19802 regs_available_for_popping =
19803 (1 << ARG_REGISTER (3));
19806 /* Match registers to be popped with registers into which we pop them. */
19807 for (available = regs_available_for_popping,
19808 required = regs_to_pop;
19809 required != 0 && available != 0;
19810 available &= ~(available & - available),
19811 required &= ~(required & - required))
19814 /* If we have any popping registers left over, remove them. */
19816 regs_available_for_popping &= ~available;
19818 /* Otherwise if we need another popping register we can use
19819 the fourth argument register. */
19820 else if (pops_needed)
19822 /* If we have not found any free argument registers and
19823 reg a4 contains the return address, we must move it. */
19824 if (regs_available_for_popping == 0
19825 && reg_containing_return_addr == LAST_ARG_REGNUM)
19827 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19828 reg_containing_return_addr = LR_REGNUM;
19830 else if (size > 12)
19832 /* Register a4 is being used to hold part of the return value,
19833 but we have dire need of a free, low register. */
19836 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19839 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19841 /* The fourth argument register is available. */
19842 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19848 /* Pop as many registers as we can. */
19849 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19850 regs_available_for_popping);
19852 /* Process the registers we popped. */
19853 if (reg_containing_return_addr == -1)
19855 /* The return address was popped into the lowest numbered register. */
19856 regs_to_pop &= ~(1 << LR_REGNUM);
19858 reg_containing_return_addr =
19859 number_of_first_bit_set (regs_available_for_popping);
19861 /* Remove this register for the mask of available registers, so that
19862 the return address will not be corrupted by further pops. */
19863 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19866 /* If we popped other registers then handle them here. */
19867 if (regs_available_for_popping)
19871 /* Work out which register currently contains the frame pointer. */
19872 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19874 /* Move it into the correct place. */
19875 asm_fprintf (f, "\tmov\t%r, %r\n",
19876 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19878 /* (Temporarily) remove it from the mask of popped registers. */
19879 regs_available_for_popping &= ~(1 << frame_pointer);
19880 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19882 if (regs_available_for_popping)
19886 /* We popped the stack pointer as well,
19887 find the register that contains it. */
19888 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19890 /* Move it into the stack register. */
19891 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19893 /* At this point we have popped all necessary registers, so
19894 do not worry about restoring regs_available_for_popping
19895 to its correct value:
19897 assert (pops_needed == 0)
19898 assert (regs_available_for_popping == (1 << frame_pointer))
19899 assert (regs_to_pop == (1 << STACK_POINTER)) */
19903 /* Since we have just move the popped value into the frame
19904 pointer, the popping register is available for reuse, and
19905 we know that we still have the stack pointer left to pop. */
19906 regs_available_for_popping |= (1 << frame_pointer);
19910 /* If we still have registers left on the stack, but we no longer have
19911 any registers into which we can pop them, then we must move the return
19912 address into the link register and make available the register that
19914 if (regs_available_for_popping == 0 && pops_needed > 0)
19916 regs_available_for_popping |= 1 << reg_containing_return_addr;
19918 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19919 reg_containing_return_addr);
19921 reg_containing_return_addr = LR_REGNUM;
19924 /* If we have registers left on the stack then pop some more.
19925 We know that at most we will want to pop FP and SP. */
19926 if (pops_needed > 0)
19931 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19932 regs_available_for_popping);
19934 /* We have popped either FP or SP.
19935 Move whichever one it is into the correct register. */
19936 popped_into = number_of_first_bit_set (regs_available_for_popping);
19937 move_to = number_of_first_bit_set (regs_to_pop);
19939 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19941 regs_to_pop &= ~(1 << move_to);
19946 /* If we still have not popped everything then we must have only
19947 had one register available to us and we are now popping the SP. */
19948 if (pops_needed > 0)
19952 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19953 regs_available_for_popping);
19955 popped_into = number_of_first_bit_set (regs_available_for_popping);
19957 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19959 assert (regs_to_pop == (1 << STACK_POINTER))
19960 assert (pops_needed == 1)
19964 /* If necessary restore the a4 register. */
19967 if (reg_containing_return_addr != LR_REGNUM)
19969 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19970 reg_containing_return_addr = LR_REGNUM;
19973 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19976 if (crtl->calls_eh_return)
19977 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19979 /* Return to caller. */
19980 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19983 /* Scan INSN just before assembler is output for it.
19984 For Thumb-1, we track the status of the condition codes; this
19985 information is used in the cbranchsi4_insn pattern. */
19987 thumb1_final_prescan_insn (rtx insn)
19989 if (flag_print_asm_name)
19990 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19991 INSN_ADDRESSES (INSN_UID (insn)));
19992 /* Don't overwrite the previous setter when we get to a cbranch. */
19993 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19995 enum attr_conds conds;
19997 if (cfun->machine->thumb1_cc_insn)
19999 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20000 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20003 conds = get_attr_conds (insn);
20004 if (conds == CONDS_SET)
20006 rtx set = single_set (insn);
20007 cfun->machine->thumb1_cc_insn = insn;
20008 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20009 cfun->machine->thumb1_cc_op1 = const0_rtx;
20010 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20011 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20013 rtx src1 = XEXP (SET_SRC (set), 1);
20014 if (src1 == const0_rtx)
20015 cfun->machine->thumb1_cc_mode = CCmode;
20018 else if (conds != CONDS_NOCOND)
20019 cfun->machine->thumb1_cc_insn = NULL_RTX;
20024 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20026 unsigned HOST_WIDE_INT mask = 0xff;
20029 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20030 if (val == 0) /* XXX */
20033 for (i = 0; i < 25; i++)
20034 if ((val & (mask << i)) == val)
20040 /* Returns nonzero if the current function contains,
20041 or might contain a far jump. */
20043 thumb_far_jump_used_p (void)
20047 /* This test is only important for leaf functions. */
20048 /* assert (!leaf_function_p ()); */
20050 /* If we have already decided that far jumps may be used,
20051 do not bother checking again, and always return true even if
20052 it turns out that they are not being used. Once we have made
20053 the decision that far jumps are present (and that hence the link
20054 register will be pushed onto the stack) we cannot go back on it. */
20055 if (cfun->machine->far_jump_used)
20058 /* If this function is not being called from the prologue/epilogue
20059 generation code then it must be being called from the
20060 INITIAL_ELIMINATION_OFFSET macro. */
20061 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20063 /* In this case we know that we are being asked about the elimination
20064 of the arg pointer register. If that register is not being used,
20065 then there are no arguments on the stack, and we do not have to
20066 worry that a far jump might force the prologue to push the link
20067 register, changing the stack offsets. In this case we can just
20068 return false, since the presence of far jumps in the function will
20069 not affect stack offsets.
20071 If the arg pointer is live (or if it was live, but has now been
20072 eliminated and so set to dead) then we do have to test to see if
20073 the function might contain a far jump. This test can lead to some
20074 false negatives, since before reload is completed, then length of
20075 branch instructions is not known, so gcc defaults to returning their
20076 longest length, which in turn sets the far jump attribute to true.
20078 A false negative will not result in bad code being generated, but it
20079 will result in a needless push and pop of the link register. We
20080 hope that this does not occur too often.
20082 If we need doubleword stack alignment this could affect the other
20083 elimination offsets so we can't risk getting it wrong. */
20084 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20085 cfun->machine->arg_pointer_live = 1;
20086 else if (!cfun->machine->arg_pointer_live)
20090 /* Check to see if the function contains a branch
20091 insn with the far jump attribute set. */
20092 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20094 if (GET_CODE (insn) == JUMP_INSN
20095 /* Ignore tablejump patterns. */
20096 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20097 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20098 && get_attr_far_jump (insn) == FAR_JUMP_YES
20101 /* Record the fact that we have decided that
20102 the function does use far jumps. */
20103 cfun->machine->far_jump_used = 1;
20111 /* Return nonzero if FUNC must be entered in ARM mode. */
20113 is_called_in_ARM_mode (tree func)
20115 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20117 /* Ignore the problem about functions whose address is taken. */
20118 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20122 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20128 /* Given the stack offsets and register mask in OFFSETS, decide how
20129 many additional registers to push instead of subtracting a constant
20130 from SP. For epilogues the principle is the same except we use pop.
20131 FOR_PROLOGUE indicates which we're generating. */
20133 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20135 HOST_WIDE_INT amount;
20136 unsigned long live_regs_mask = offsets->saved_regs_mask;
20137 /* Extract a mask of the ones we can give to the Thumb's push/pop
20139 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20140 /* Then count how many other high registers will need to be pushed. */
20141 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20142 int n_free, reg_base;
20144 if (!for_prologue && frame_pointer_needed)
20145 amount = offsets->locals_base - offsets->saved_regs;
20147 amount = offsets->outgoing_args - offsets->saved_regs;
20149 /* If the stack frame size is 512 exactly, we can save one load
20150 instruction, which should make this a win even when optimizing
20152 if (!optimize_size && amount != 512)
20155 /* Can't do this if there are high registers to push. */
20156 if (high_regs_pushed != 0)
20159 /* Shouldn't do it in the prologue if no registers would normally
20160 be pushed at all. In the epilogue, also allow it if we'll have
20161 a pop insn for the PC. */
20164 || TARGET_BACKTRACE
20165 || (live_regs_mask & 1 << LR_REGNUM) == 0
20166 || TARGET_INTERWORK
20167 || crtl->args.pretend_args_size != 0))
20170 /* Don't do this if thumb_expand_prologue wants to emit instructions
20171 between the push and the stack frame allocation. */
20173 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20174 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20181 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20182 live_regs_mask >>= reg_base;
20185 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20186 && (for_prologue || call_used_regs[reg_base + n_free]))
20188 live_regs_mask >>= 1;
20194 gcc_assert (amount / 4 * 4 == amount);
20196 if (amount >= 512 && (amount - n_free * 4) < 512)
20197 return (amount - 508) / 4;
20198 if (amount <= n_free * 4)
20203 /* The bits which aren't usefully expanded as rtl. */
20205 thumb_unexpanded_epilogue (void)
20207 arm_stack_offsets *offsets;
20209 unsigned long live_regs_mask = 0;
20210 int high_regs_pushed = 0;
20212 int had_to_push_lr;
20215 if (cfun->machine->return_used_this_function != 0)
20218 if (IS_NAKED (arm_current_func_type ()))
20221 offsets = arm_get_frame_offsets ();
20222 live_regs_mask = offsets->saved_regs_mask;
20223 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20225 /* If we can deduce the registers used from the function's return value.
20226 This is more reliable that examining df_regs_ever_live_p () because that
20227 will be set if the register is ever used in the function, not just if
20228 the register is used to hold a return value. */
20229 size = arm_size_return_regs ();
20231 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20234 unsigned long extra_mask = (1 << extra_pop) - 1;
20235 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20238 /* The prolog may have pushed some high registers to use as
20239 work registers. e.g. the testsuite file:
20240 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20241 compiles to produce:
20242 push {r4, r5, r6, r7, lr}
20246 as part of the prolog. We have to undo that pushing here. */
20248 if (high_regs_pushed)
20250 unsigned long mask = live_regs_mask & 0xff;
20253 /* The available low registers depend on the size of the value we are
20261 /* Oh dear! We have no low registers into which we can pop
20264 ("no low registers available for popping high registers");
20266 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20267 if (live_regs_mask & (1 << next_hi_reg))
20270 while (high_regs_pushed)
20272 /* Find lo register(s) into which the high register(s) can
20274 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20276 if (mask & (1 << regno))
20277 high_regs_pushed--;
20278 if (high_regs_pushed == 0)
20282 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20284 /* Pop the values into the low register(s). */
20285 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20287 /* Move the value(s) into the high registers. */
20288 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20290 if (mask & (1 << regno))
20292 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20295 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20296 if (live_regs_mask & (1 << next_hi_reg))
20301 live_regs_mask &= ~0x0f00;
20304 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20305 live_regs_mask &= 0xff;
20307 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20309 /* Pop the return address into the PC. */
20310 if (had_to_push_lr)
20311 live_regs_mask |= 1 << PC_REGNUM;
20313 /* Either no argument registers were pushed or a backtrace
20314 structure was created which includes an adjusted stack
20315 pointer, so just pop everything. */
20316 if (live_regs_mask)
20317 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20320 /* We have either just popped the return address into the
20321 PC or it is was kept in LR for the entire function.
20322 Note that thumb_pushpop has already called thumb_exit if the
20323 PC was in the list. */
20324 if (!had_to_push_lr)
20325 thumb_exit (asm_out_file, LR_REGNUM);
20329 /* Pop everything but the return address. */
20330 if (live_regs_mask)
20331 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20334 if (had_to_push_lr)
20338 /* We have no free low regs, so save one. */
20339 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20343 /* Get the return address into a temporary register. */
20344 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20345 1 << LAST_ARG_REGNUM);
20349 /* Move the return address to lr. */
20350 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20352 /* Restore the low register. */
20353 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20358 regno = LAST_ARG_REGNUM;
20363 /* Remove the argument registers that were pushed onto the stack. */
20364 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20365 SP_REGNUM, SP_REGNUM,
20366 crtl->args.pretend_args_size);
20368 thumb_exit (asm_out_file, regno);
20374 /* Functions to save and restore machine-specific function data. */
20375 static struct machine_function *
20376 arm_init_machine_status (void)
20378 struct machine_function *machine;
20379 machine = ggc_alloc_cleared_machine_function ();
20381 #if ARM_FT_UNKNOWN != 0
20382 machine->func_type = ARM_FT_UNKNOWN;
20387 /* Return an RTX indicating where the return address to the
20388 calling function can be found. */
20390 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20395 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20398 /* Do anything needed before RTL is emitted for each function. */
20400 arm_init_expanders (void)
20402 /* Arrange to initialize and mark the machine per-function status. */
20403 init_machine_status = arm_init_machine_status;
20405 /* This is to stop the combine pass optimizing away the alignment
20406 adjustment of va_arg. */
20407 /* ??? It is claimed that this should not be necessary. */
20409 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20413 /* Like arm_compute_initial_elimination offset. Simpler because there
20414 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20415 to point at the base of the local variables after static stack
20416 space for a function has been allocated. */
20419 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20421 arm_stack_offsets *offsets;
20423 offsets = arm_get_frame_offsets ();
20427 case ARG_POINTER_REGNUM:
20430 case STACK_POINTER_REGNUM:
20431 return offsets->outgoing_args - offsets->saved_args;
20433 case FRAME_POINTER_REGNUM:
20434 return offsets->soft_frame - offsets->saved_args;
20436 case ARM_HARD_FRAME_POINTER_REGNUM:
20437 return offsets->saved_regs - offsets->saved_args;
20439 case THUMB_HARD_FRAME_POINTER_REGNUM:
20440 return offsets->locals_base - offsets->saved_args;
20443 gcc_unreachable ();
20447 case FRAME_POINTER_REGNUM:
20450 case STACK_POINTER_REGNUM:
20451 return offsets->outgoing_args - offsets->soft_frame;
20453 case ARM_HARD_FRAME_POINTER_REGNUM:
20454 return offsets->saved_regs - offsets->soft_frame;
20456 case THUMB_HARD_FRAME_POINTER_REGNUM:
20457 return offsets->locals_base - offsets->soft_frame;
20460 gcc_unreachable ();
20465 gcc_unreachable ();
20469 /* Generate the rest of a function's prologue. */
20471 thumb1_expand_prologue (void)
20475 HOST_WIDE_INT amount;
20476 arm_stack_offsets *offsets;
20477 unsigned long func_type;
20479 unsigned long live_regs_mask;
20481 func_type = arm_current_func_type ();
20483 /* Naked functions don't have prologues. */
20484 if (IS_NAKED (func_type))
20487 if (IS_INTERRUPT (func_type))
20489 error ("interrupt Service Routines cannot be coded in Thumb mode");
20493 offsets = arm_get_frame_offsets ();
20494 live_regs_mask = offsets->saved_regs_mask;
20495 /* Load the pic register before setting the frame pointer,
20496 so we can use r7 as a temporary work register. */
20497 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20498 arm_load_pic_register (live_regs_mask);
20500 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20501 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20502 stack_pointer_rtx);
20504 amount = offsets->outgoing_args - offsets->saved_regs;
20505 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20510 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20511 GEN_INT (- amount)));
20512 RTX_FRAME_RELATED_P (insn) = 1;
20518 /* The stack decrement is too big for an immediate value in a single
20519 insn. In theory we could issue multiple subtracts, but after
20520 three of them it becomes more space efficient to place the full
20521 value in the constant pool and load into a register. (Also the
20522 ARM debugger really likes to see only one stack decrement per
20523 function). So instead we look for a scratch register into which
20524 we can load the decrement, and then we subtract this from the
20525 stack pointer. Unfortunately on the thumb the only available
20526 scratch registers are the argument registers, and we cannot use
20527 these as they may hold arguments to the function. Instead we
20528 attempt to locate a call preserved register which is used by this
20529 function. If we can find one, then we know that it will have
20530 been pushed at the start of the prologue and so we can corrupt
20532 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20533 if (live_regs_mask & (1 << regno))
20536 gcc_assert(regno <= LAST_LO_REGNUM);
20538 reg = gen_rtx_REG (SImode, regno);
20540 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20542 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20543 stack_pointer_rtx, reg));
20544 RTX_FRAME_RELATED_P (insn) = 1;
20545 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20546 plus_constant (stack_pointer_rtx,
20548 RTX_FRAME_RELATED_P (dwarf) = 1;
20549 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20553 if (frame_pointer_needed)
20554 thumb_set_frame_pointer (offsets);
20556 /* If we are profiling, make sure no instructions are scheduled before
20557 the call to mcount. Similarly if the user has requested no
20558 scheduling in the prolog. Similarly if we want non-call exceptions
20559 using the EABI unwinder, to prevent faulting instructions from being
20560 swapped with a stack adjustment. */
20561 if (crtl->profile || !TARGET_SCHED_PROLOG
20562 || (arm_except_unwind_info () == UI_TARGET
20563 && cfun->can_throw_non_call_exceptions))
20564 emit_insn (gen_blockage ());
20566 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20567 if (live_regs_mask & 0xff)
20568 cfun->machine->lr_save_eliminated = 0;
20573 thumb1_expand_epilogue (void)
20575 HOST_WIDE_INT amount;
20576 arm_stack_offsets *offsets;
20579 /* Naked functions don't have prologues. */
20580 if (IS_NAKED (arm_current_func_type ()))
20583 offsets = arm_get_frame_offsets ();
20584 amount = offsets->outgoing_args - offsets->saved_regs;
20586 if (frame_pointer_needed)
20588 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20589 amount = offsets->locals_base - offsets->saved_regs;
20591 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20593 gcc_assert (amount >= 0);
20597 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20598 GEN_INT (amount)));
20601 /* r3 is always free in the epilogue. */
20602 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20604 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20605 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20609 /* Emit a USE (stack_pointer_rtx), so that
20610 the stack adjustment will not be deleted. */
20611 emit_insn (gen_prologue_use (stack_pointer_rtx));
20613 if (crtl->profile || !TARGET_SCHED_PROLOG)
20614 emit_insn (gen_blockage ());
20616 /* Emit a clobber for each insn that will be restored in the epilogue,
20617 so that flow2 will get register lifetimes correct. */
20618 for (regno = 0; regno < 13; regno++)
20619 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20620 emit_clobber (gen_rtx_REG (SImode, regno));
20622 if (! df_regs_ever_live_p (LR_REGNUM))
20623 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20627 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20629 arm_stack_offsets *offsets;
20630 unsigned long live_regs_mask = 0;
20631 unsigned long l_mask;
20632 unsigned high_regs_pushed = 0;
20633 int cfa_offset = 0;
20636 if (IS_NAKED (arm_current_func_type ()))
20639 if (is_called_in_ARM_mode (current_function_decl))
20643 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20644 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20646 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20648 /* Generate code sequence to switch us into Thumb mode. */
20649 /* The .code 32 directive has already been emitted by
20650 ASM_DECLARE_FUNCTION_NAME. */
20651 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20652 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20654 /* Generate a label, so that the debugger will notice the
20655 change in instruction sets. This label is also used by
20656 the assembler to bypass the ARM code when this function
20657 is called from a Thumb encoded function elsewhere in the
20658 same file. Hence the definition of STUB_NAME here must
20659 agree with the definition in gas/config/tc-arm.c. */
20661 #define STUB_NAME ".real_start_of"
20663 fprintf (f, "\t.code\t16\n");
20665 if (arm_dllexport_name_p (name))
20666 name = arm_strip_name_encoding (name);
20668 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20669 fprintf (f, "\t.thumb_func\n");
20670 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20673 if (crtl->args.pretend_args_size)
20675 /* Output unwind directive for the stack adjustment. */
20676 if (arm_except_unwind_info () == UI_TARGET)
20677 fprintf (f, "\t.pad #%d\n",
20678 crtl->args.pretend_args_size);
20680 if (cfun->machine->uses_anonymous_args)
20684 fprintf (f, "\tpush\t{");
20686 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20688 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20689 regno <= LAST_ARG_REGNUM;
20691 asm_fprintf (f, "%r%s", regno,
20692 regno == LAST_ARG_REGNUM ? "" : ", ");
20694 fprintf (f, "}\n");
20697 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20698 SP_REGNUM, SP_REGNUM,
20699 crtl->args.pretend_args_size);
20701 /* We don't need to record the stores for unwinding (would it
20702 help the debugger any if we did?), but record the change in
20703 the stack pointer. */
20704 if (dwarf2out_do_frame ())
20706 char *l = dwarf2out_cfi_label (false);
20708 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20709 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20713 /* Get the registers we are going to push. */
20714 offsets = arm_get_frame_offsets ();
20715 live_regs_mask = offsets->saved_regs_mask;
20716 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20717 l_mask = live_regs_mask & 0x40ff;
20718 /* Then count how many other high registers will need to be pushed. */
20719 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20721 if (TARGET_BACKTRACE)
20724 unsigned work_register;
20726 /* We have been asked to create a stack backtrace structure.
20727 The code looks like this:
20731 0 sub SP, #16 Reserve space for 4 registers.
20732 2 push {R7} Push low registers.
20733 4 add R7, SP, #20 Get the stack pointer before the push.
20734 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20735 8 mov R7, PC Get hold of the start of this code plus 12.
20736 10 str R7, [SP, #16] Store it.
20737 12 mov R7, FP Get hold of the current frame pointer.
20738 14 str R7, [SP, #4] Store it.
20739 16 mov R7, LR Get hold of the current return address.
20740 18 str R7, [SP, #12] Store it.
20741 20 add R7, SP, #16 Point at the start of the backtrace structure.
20742 22 mov FP, R7 Put this value into the frame pointer. */
20744 work_register = thumb_find_work_register (live_regs_mask);
20746 if (arm_except_unwind_info () == UI_TARGET)
20747 asm_fprintf (f, "\t.pad #16\n");
20750 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20751 SP_REGNUM, SP_REGNUM);
20753 if (dwarf2out_do_frame ())
20755 char *l = dwarf2out_cfi_label (false);
20757 cfa_offset = cfa_offset + 16;
20758 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20763 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20764 offset = bit_count (l_mask) * UNITS_PER_WORD;
20769 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20770 offset + 16 + crtl->args.pretend_args_size);
20772 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20775 /* Make sure that the instruction fetching the PC is in the right place
20776 to calculate "start of backtrace creation code + 12". */
20779 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20780 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20782 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20783 ARM_HARD_FRAME_POINTER_REGNUM);
20784 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20789 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20790 ARM_HARD_FRAME_POINTER_REGNUM);
20791 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20793 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20794 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20798 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20799 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20801 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20803 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20804 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20806 /* Optimization: If we are not pushing any low registers but we are going
20807 to push some high registers then delay our first push. This will just
20808 be a push of LR and we can combine it with the push of the first high
20810 else if ((l_mask & 0xff) != 0
20811 || (high_regs_pushed == 0 && l_mask))
20813 unsigned long mask = l_mask;
20814 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20815 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20818 if (high_regs_pushed)
20820 unsigned pushable_regs;
20821 unsigned next_hi_reg;
20823 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20824 if (live_regs_mask & (1 << next_hi_reg))
20827 pushable_regs = l_mask & 0xff;
20829 if (pushable_regs == 0)
20830 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20832 while (high_regs_pushed > 0)
20834 unsigned long real_regs_mask = 0;
20836 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20838 if (pushable_regs & (1 << regno))
20840 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20842 high_regs_pushed --;
20843 real_regs_mask |= (1 << next_hi_reg);
20845 if (high_regs_pushed)
20847 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20849 if (live_regs_mask & (1 << next_hi_reg))
20854 pushable_regs &= ~((1 << regno) - 1);
20860 /* If we had to find a work register and we have not yet
20861 saved the LR then add it to the list of regs to push. */
20862 if (l_mask == (1 << LR_REGNUM))
20864 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20866 real_regs_mask | (1 << LR_REGNUM));
20870 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20875 /* Handle the case of a double word load into a low register from
20876 a computed memory address. The computed address may involve a
20877 register which is overwritten by the load. */
20879 thumb_load_double_from_address (rtx *operands)
20887 gcc_assert (GET_CODE (operands[0]) == REG);
20888 gcc_assert (GET_CODE (operands[1]) == MEM);
20890 /* Get the memory address. */
20891 addr = XEXP (operands[1], 0);
20893 /* Work out how the memory address is computed. */
20894 switch (GET_CODE (addr))
20897 operands[2] = adjust_address (operands[1], SImode, 4);
20899 if (REGNO (operands[0]) == REGNO (addr))
20901 output_asm_insn ("ldr\t%H0, %2", operands);
20902 output_asm_insn ("ldr\t%0, %1", operands);
20906 output_asm_insn ("ldr\t%0, %1", operands);
20907 output_asm_insn ("ldr\t%H0, %2", operands);
20912 /* Compute <address> + 4 for the high order load. */
20913 operands[2] = adjust_address (operands[1], SImode, 4);
20915 output_asm_insn ("ldr\t%0, %1", operands);
20916 output_asm_insn ("ldr\t%H0, %2", operands);
20920 arg1 = XEXP (addr, 0);
20921 arg2 = XEXP (addr, 1);
20923 if (CONSTANT_P (arg1))
20924 base = arg2, offset = arg1;
20926 base = arg1, offset = arg2;
20928 gcc_assert (GET_CODE (base) == REG);
20930 /* Catch the case of <address> = <reg> + <reg> */
20931 if (GET_CODE (offset) == REG)
20933 int reg_offset = REGNO (offset);
20934 int reg_base = REGNO (base);
20935 int reg_dest = REGNO (operands[0]);
20937 /* Add the base and offset registers together into the
20938 higher destination register. */
20939 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20940 reg_dest + 1, reg_base, reg_offset);
20942 /* Load the lower destination register from the address in
20943 the higher destination register. */
20944 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20945 reg_dest, reg_dest + 1);
20947 /* Load the higher destination register from its own address
20949 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20950 reg_dest + 1, reg_dest + 1);
20954 /* Compute <address> + 4 for the high order load. */
20955 operands[2] = adjust_address (operands[1], SImode, 4);
20957 /* If the computed address is held in the low order register
20958 then load the high order register first, otherwise always
20959 load the low order register first. */
20960 if (REGNO (operands[0]) == REGNO (base))
20962 output_asm_insn ("ldr\t%H0, %2", operands);
20963 output_asm_insn ("ldr\t%0, %1", operands);
20967 output_asm_insn ("ldr\t%0, %1", operands);
20968 output_asm_insn ("ldr\t%H0, %2", operands);
20974 /* With no registers to worry about we can just load the value
20976 operands[2] = adjust_address (operands[1], SImode, 4);
20978 output_asm_insn ("ldr\t%H0, %2", operands);
20979 output_asm_insn ("ldr\t%0, %1", operands);
20983 gcc_unreachable ();
20990 thumb_output_move_mem_multiple (int n, rtx *operands)
20997 if (REGNO (operands[4]) > REGNO (operands[5]))
21000 operands[4] = operands[5];
21003 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21004 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21008 if (REGNO (operands[4]) > REGNO (operands[5]))
21011 operands[4] = operands[5];
21014 if (REGNO (operands[5]) > REGNO (operands[6]))
21017 operands[5] = operands[6];
21020 if (REGNO (operands[4]) > REGNO (operands[5]))
21023 operands[4] = operands[5];
21027 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21028 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21032 gcc_unreachable ();
21038 /* Output a call-via instruction for thumb state. */
21040 thumb_call_via_reg (rtx reg)
21042 int regno = REGNO (reg);
21045 gcc_assert (regno < LR_REGNUM);
21047 /* If we are in the normal text section we can use a single instance
21048 per compilation unit. If we are doing function sections, then we need
21049 an entry per section, since we can't rely on reachability. */
21050 if (in_section == text_section)
21052 thumb_call_reg_needed = 1;
21054 if (thumb_call_via_label[regno] == NULL)
21055 thumb_call_via_label[regno] = gen_label_rtx ();
21056 labelp = thumb_call_via_label + regno;
21060 if (cfun->machine->call_via[regno] == NULL)
21061 cfun->machine->call_via[regno] = gen_label_rtx ();
21062 labelp = cfun->machine->call_via + regno;
21065 output_asm_insn ("bl\t%a0", labelp);
21069 /* Routines for generating rtl. */
21071 thumb_expand_movmemqi (rtx *operands)
21073 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21074 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21075 HOST_WIDE_INT len = INTVAL (operands[2]);
21076 HOST_WIDE_INT offset = 0;
21080 emit_insn (gen_movmem12b (out, in, out, in));
21086 emit_insn (gen_movmem8b (out, in, out, in));
21092 rtx reg = gen_reg_rtx (SImode);
21093 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21094 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21101 rtx reg = gen_reg_rtx (HImode);
21102 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21103 plus_constant (in, offset))));
21104 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21112 rtx reg = gen_reg_rtx (QImode);
21113 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21114 plus_constant (in, offset))));
21115 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21121 thumb_reload_out_hi (rtx *operands)
21123 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21126 /* Handle reading a half-word from memory during reload. */
21128 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21130 gcc_unreachable ();
21133 /* Return the length of a function name prefix
21134 that starts with the character 'c'. */
21136 arm_get_strip_length (int c)
21140 ARM_NAME_ENCODING_LENGTHS
21145 /* Return a pointer to a function's name with any
21146 and all prefix encodings stripped from it. */
21148 arm_strip_name_encoding (const char *name)
21152 while ((skip = arm_get_strip_length (* name)))
21158 /* If there is a '*' anywhere in the name's prefix, then
21159 emit the stripped name verbatim, otherwise prepend an
21160 underscore if leading underscores are being used. */
21162 arm_asm_output_labelref (FILE *stream, const char *name)
21167 while ((skip = arm_get_strip_length (* name)))
21169 verbatim |= (*name == '*');
21174 fputs (name, stream);
21176 asm_fprintf (stream, "%U%s", name);
21180 arm_file_start (void)
21184 if (TARGET_UNIFIED_ASM)
21185 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21189 const char *fpu_name;
21190 if (arm_selected_arch)
21191 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21193 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21195 if (TARGET_SOFT_FLOAT)
21198 fpu_name = "softvfp";
21200 fpu_name = "softfpa";
21204 fpu_name = arm_fpu_desc->name;
21205 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21207 if (TARGET_HARD_FLOAT)
21208 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21209 if (TARGET_HARD_FLOAT_ABI)
21210 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21213 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21215 /* Some of these attributes only apply when the corresponding features
21216 are used. However we don't have any easy way of figuring this out.
21217 Conservatively record the setting that would have been used. */
21219 /* Tag_ABI_FP_rounding. */
21220 if (flag_rounding_math)
21221 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21222 if (!flag_unsafe_math_optimizations)
21224 /* Tag_ABI_FP_denomal. */
21225 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21226 /* Tag_ABI_FP_exceptions. */
21227 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21229 /* Tag_ABI_FP_user_exceptions. */
21230 if (flag_signaling_nans)
21231 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21232 /* Tag_ABI_FP_number_model. */
21233 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21234 flag_finite_math_only ? 1 : 3);
21236 /* Tag_ABI_align8_needed. */
21237 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21238 /* Tag_ABI_align8_preserved. */
21239 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21240 /* Tag_ABI_enum_size. */
21241 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21242 flag_short_enums ? 1 : 2);
21244 /* Tag_ABI_optimization_goals. */
21247 else if (optimize >= 2)
21253 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21255 /* Tag_ABI_FP_16bit_format. */
21256 if (arm_fp16_format)
21257 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21258 (int)arm_fp16_format);
21260 if (arm_lang_output_object_attributes_hook)
21261 arm_lang_output_object_attributes_hook();
21263 default_file_start();
21267 arm_file_end (void)
21271 if (NEED_INDICATE_EXEC_STACK)
21272 /* Add .note.GNU-stack. */
21273 file_end_indicate_exec_stack ();
21275 if (! thumb_call_reg_needed)
21278 switch_to_section (text_section);
21279 asm_fprintf (asm_out_file, "\t.code 16\n");
21280 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21282 for (regno = 0; regno < LR_REGNUM; regno++)
21284 rtx label = thumb_call_via_label[regno];
21288 targetm.asm_out.internal_label (asm_out_file, "L",
21289 CODE_LABEL_NUMBER (label));
21290 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21296 /* Symbols in the text segment can be accessed without indirecting via the
21297 constant pool; it may take an extra binary operation, but this is still
21298 faster than indirecting via memory. Don't do this when not optimizing,
21299 since we won't be calculating al of the offsets necessary to do this
21303 arm_encode_section_info (tree decl, rtx rtl, int first)
21305 if (optimize > 0 && TREE_CONSTANT (decl))
21306 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21308 default_encode_section_info (decl, rtl, first);
21310 #endif /* !ARM_PE */
21313 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21315 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21316 && !strcmp (prefix, "L"))
21318 arm_ccfsm_state = 0;
21319 arm_target_insn = NULL;
21321 default_internal_label (stream, prefix, labelno);
21324 /* Output code to add DELTA to the first argument, and then jump
21325 to FUNCTION. Used for C++ multiple inheritance. */
21327 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21328 HOST_WIDE_INT delta,
21329 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21332 static int thunk_label = 0;
21335 int mi_delta = delta;
21336 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21338 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21341 mi_delta = - mi_delta;
21345 int labelno = thunk_label++;
21346 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21347 /* Thunks are entered in arm mode when avaiable. */
21348 if (TARGET_THUMB1_ONLY)
21350 /* push r3 so we can use it as a temporary. */
21351 /* TODO: Omit this save if r3 is not used. */
21352 fputs ("\tpush {r3}\n", file);
21353 fputs ("\tldr\tr3, ", file);
21357 fputs ("\tldr\tr12, ", file);
21359 assemble_name (file, label);
21360 fputc ('\n', file);
21363 /* If we are generating PIC, the ldr instruction below loads
21364 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21365 the address of the add + 8, so we have:
21367 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21370 Note that we have "+ 1" because some versions of GNU ld
21371 don't set the low bit of the result for R_ARM_REL32
21372 relocations against thumb function symbols.
21373 On ARMv6M this is +4, not +8. */
21374 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21375 assemble_name (file, labelpc);
21376 fputs (":\n", file);
21377 if (TARGET_THUMB1_ONLY)
21379 /* This is 2 insns after the start of the thunk, so we know it
21380 is 4-byte aligned. */
21381 fputs ("\tadd\tr3, pc, r3\n", file);
21382 fputs ("\tmov r12, r3\n", file);
21385 fputs ("\tadd\tr12, pc, r12\n", file);
21387 else if (TARGET_THUMB1_ONLY)
21388 fputs ("\tmov r12, r3\n", file);
21390 if (TARGET_THUMB1_ONLY)
21392 if (mi_delta > 255)
21394 fputs ("\tldr\tr3, ", file);
21395 assemble_name (file, label);
21396 fputs ("+4\n", file);
21397 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21398 mi_op, this_regno, this_regno);
21400 else if (mi_delta != 0)
21402 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21403 mi_op, this_regno, this_regno,
21409 /* TODO: Use movw/movt for large constants when available. */
21410 while (mi_delta != 0)
21412 if ((mi_delta & (3 << shift)) == 0)
21416 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21417 mi_op, this_regno, this_regno,
21418 mi_delta & (0xff << shift));
21419 mi_delta &= ~(0xff << shift);
21426 if (TARGET_THUMB1_ONLY)
21427 fputs ("\tpop\t{r3}\n", file);
21429 fprintf (file, "\tbx\tr12\n");
21430 ASM_OUTPUT_ALIGN (file, 2);
21431 assemble_name (file, label);
21432 fputs (":\n", file);
21435 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21436 rtx tem = XEXP (DECL_RTL (function), 0);
21437 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21438 tem = gen_rtx_MINUS (GET_MODE (tem),
21440 gen_rtx_SYMBOL_REF (Pmode,
21441 ggc_strdup (labelpc)));
21442 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21445 /* Output ".word .LTHUNKn". */
21446 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21448 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21449 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21453 fputs ("\tb\t", file);
21454 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21455 if (NEED_PLT_RELOC)
21456 fputs ("(PLT)", file);
21457 fputc ('\n', file);
21462 arm_emit_vector_const (FILE *file, rtx x)
21465 const char * pattern;
21467 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21469 switch (GET_MODE (x))
21471 case V2SImode: pattern = "%08x"; break;
21472 case V4HImode: pattern = "%04x"; break;
21473 case V8QImode: pattern = "%02x"; break;
21474 default: gcc_unreachable ();
21477 fprintf (file, "0x");
21478 for (i = CONST_VECTOR_NUNITS (x); i--;)
21482 element = CONST_VECTOR_ELT (x, i);
21483 fprintf (file, pattern, INTVAL (element));
21489 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21490 HFmode constant pool entries are actually loaded with ldr. */
21492 arm_emit_fp16_const (rtx c)
21497 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21498 bits = real_to_target (NULL, &r, HFmode);
21499 if (WORDS_BIG_ENDIAN)
21500 assemble_zeros (2);
21501 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21502 if (!WORDS_BIG_ENDIAN)
21503 assemble_zeros (2);
21507 arm_output_load_gr (rtx *operands)
21514 if (GET_CODE (operands [1]) != MEM
21515 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21516 || GET_CODE (reg = XEXP (sum, 0)) != REG
21517 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21518 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21519 return "wldrw%?\t%0, %1";
21521 /* Fix up an out-of-range load of a GR register. */
21522 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21523 wcgr = operands[0];
21525 output_asm_insn ("ldr%?\t%0, %1", operands);
21527 operands[0] = wcgr;
21529 output_asm_insn ("tmcr%?\t%0, %1", operands);
21530 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21535 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21537 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21538 named arg and all anonymous args onto the stack.
21539 XXX I know the prologue shouldn't be pushing registers, but it is faster
21543 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21544 enum machine_mode mode,
21547 int second_time ATTRIBUTE_UNUSED)
21551 cfun->machine->uses_anonymous_args = 1;
21552 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21554 nregs = pcum->aapcs_ncrn;
21555 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21559 nregs = pcum->nregs;
21561 if (nregs < NUM_ARG_REGS)
21562 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21565 /* Return nonzero if the CONSUMER instruction (a store) does not need
21566 PRODUCER's value to calculate the address. */
21569 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21571 rtx value = PATTERN (producer);
21572 rtx addr = PATTERN (consumer);
21574 if (GET_CODE (value) == COND_EXEC)
21575 value = COND_EXEC_CODE (value);
21576 if (GET_CODE (value) == PARALLEL)
21577 value = XVECEXP (value, 0, 0);
21578 value = XEXP (value, 0);
21579 if (GET_CODE (addr) == COND_EXEC)
21580 addr = COND_EXEC_CODE (addr);
21581 if (GET_CODE (addr) == PARALLEL)
21582 addr = XVECEXP (addr, 0, 0);
21583 addr = XEXP (addr, 0);
21585 return !reg_overlap_mentioned_p (value, addr);
21588 /* Return nonzero if the CONSUMER instruction (a store) does need
21589 PRODUCER's value to calculate the address. */
21592 arm_early_store_addr_dep (rtx producer, rtx consumer)
21594 return !arm_no_early_store_addr_dep (producer, consumer);
21597 /* Return nonzero if the CONSUMER instruction (a load) does need
21598 PRODUCER's value to calculate the address. */
21601 arm_early_load_addr_dep (rtx producer, rtx consumer)
21603 rtx value = PATTERN (producer);
21604 rtx addr = PATTERN (consumer);
21606 if (GET_CODE (value) == COND_EXEC)
21607 value = COND_EXEC_CODE (value);
21608 if (GET_CODE (value) == PARALLEL)
21609 value = XVECEXP (value, 0, 0);
21610 value = XEXP (value, 0);
21611 if (GET_CODE (addr) == COND_EXEC)
21612 addr = COND_EXEC_CODE (addr);
21613 if (GET_CODE (addr) == PARALLEL)
21614 addr = XVECEXP (addr, 0, 0);
21615 addr = XEXP (addr, 1);
21617 return reg_overlap_mentioned_p (value, addr);
21620 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21621 have an early register shift value or amount dependency on the
21622 result of PRODUCER. */
21625 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21627 rtx value = PATTERN (producer);
21628 rtx op = PATTERN (consumer);
21631 if (GET_CODE (value) == COND_EXEC)
21632 value = COND_EXEC_CODE (value);
21633 if (GET_CODE (value) == PARALLEL)
21634 value = XVECEXP (value, 0, 0);
21635 value = XEXP (value, 0);
21636 if (GET_CODE (op) == COND_EXEC)
21637 op = COND_EXEC_CODE (op);
21638 if (GET_CODE (op) == PARALLEL)
21639 op = XVECEXP (op, 0, 0);
21642 early_op = XEXP (op, 0);
21643 /* This is either an actual independent shift, or a shift applied to
21644 the first operand of another operation. We want the whole shift
21646 if (GET_CODE (early_op) == REG)
21649 return !reg_overlap_mentioned_p (value, early_op);
21652 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21653 have an early register shift value dependency on the result of
21657 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21659 rtx value = PATTERN (producer);
21660 rtx op = PATTERN (consumer);
21663 if (GET_CODE (value) == COND_EXEC)
21664 value = COND_EXEC_CODE (value);
21665 if (GET_CODE (value) == PARALLEL)
21666 value = XVECEXP (value, 0, 0);
21667 value = XEXP (value, 0);
21668 if (GET_CODE (op) == COND_EXEC)
21669 op = COND_EXEC_CODE (op);
21670 if (GET_CODE (op) == PARALLEL)
21671 op = XVECEXP (op, 0, 0);
21674 early_op = XEXP (op, 0);
21676 /* This is either an actual independent shift, or a shift applied to
21677 the first operand of another operation. We want the value being
21678 shifted, in either case. */
21679 if (GET_CODE (early_op) != REG)
21680 early_op = XEXP (early_op, 0);
21682 return !reg_overlap_mentioned_p (value, early_op);
21685 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21686 have an early register mult dependency on the result of
21690 arm_no_early_mul_dep (rtx producer, rtx consumer)
21692 rtx value = PATTERN (producer);
21693 rtx op = PATTERN (consumer);
21695 if (GET_CODE (value) == COND_EXEC)
21696 value = COND_EXEC_CODE (value);
21697 if (GET_CODE (value) == PARALLEL)
21698 value = XVECEXP (value, 0, 0);
21699 value = XEXP (value, 0);
21700 if (GET_CODE (op) == COND_EXEC)
21701 op = COND_EXEC_CODE (op);
21702 if (GET_CODE (op) == PARALLEL)
21703 op = XVECEXP (op, 0, 0);
21706 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21708 if (GET_CODE (XEXP (op, 0)) == MULT)
21709 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21711 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21717 /* We can't rely on the caller doing the proper promotion when
21718 using APCS or ATPCS. */
21721 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21723 return !TARGET_AAPCS_BASED;
21726 static enum machine_mode
21727 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21728 enum machine_mode mode,
21729 int *punsignedp ATTRIBUTE_UNUSED,
21730 const_tree fntype ATTRIBUTE_UNUSED,
21731 int for_return ATTRIBUTE_UNUSED)
21733 if (GET_MODE_CLASS (mode) == MODE_INT
21734 && GET_MODE_SIZE (mode) < 4)
21740 /* AAPCS based ABIs use short enums by default. */
21743 arm_default_short_enums (void)
21745 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21749 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21752 arm_align_anon_bitfield (void)
21754 return TARGET_AAPCS_BASED;
21758 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21761 arm_cxx_guard_type (void)
21763 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21766 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21767 has an accumulator dependency on the result of the producer (a
21768 multiplication instruction) and no other dependency on that result. */
21770 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21772 rtx mul = PATTERN (producer);
21773 rtx mac = PATTERN (consumer);
21775 rtx mac_op0, mac_op1, mac_acc;
21777 if (GET_CODE (mul) == COND_EXEC)
21778 mul = COND_EXEC_CODE (mul);
21779 if (GET_CODE (mac) == COND_EXEC)
21780 mac = COND_EXEC_CODE (mac);
21782 /* Check that mul is of the form (set (...) (mult ...))
21783 and mla is of the form (set (...) (plus (mult ...) (...))). */
21784 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21785 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21786 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21789 mul_result = XEXP (mul, 0);
21790 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21791 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21792 mac_acc = XEXP (XEXP (mac, 1), 1);
21794 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21795 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21796 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21800 /* The EABI says test the least significant bit of a guard variable. */
21803 arm_cxx_guard_mask_bit (void)
21805 return TARGET_AAPCS_BASED;
21809 /* The EABI specifies that all array cookies are 8 bytes long. */
21812 arm_get_cookie_size (tree type)
21816 if (!TARGET_AAPCS_BASED)
21817 return default_cxx_get_cookie_size (type);
21819 size = build_int_cst (sizetype, 8);
21824 /* The EABI says that array cookies should also contain the element size. */
21827 arm_cookie_has_size (void)
21829 return TARGET_AAPCS_BASED;
21833 /* The EABI says constructors and destructors should return a pointer to
21834 the object constructed/destroyed. */
21837 arm_cxx_cdtor_returns_this (void)
21839 return TARGET_AAPCS_BASED;
21842 /* The EABI says that an inline function may never be the key
21846 arm_cxx_key_method_may_be_inline (void)
21848 return !TARGET_AAPCS_BASED;
21852 arm_cxx_determine_class_data_visibility (tree decl)
21854 if (!TARGET_AAPCS_BASED
21855 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21858 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21859 is exported. However, on systems without dynamic vague linkage,
21860 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21861 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21862 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21864 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21865 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21869 arm_cxx_class_data_always_comdat (void)
21871 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21872 vague linkage if the class has no key function. */
21873 return !TARGET_AAPCS_BASED;
21877 /* The EABI says __aeabi_atexit should be used to register static
21881 arm_cxx_use_aeabi_atexit (void)
21883 return TARGET_AAPCS_BASED;
21888 arm_set_return_address (rtx source, rtx scratch)
21890 arm_stack_offsets *offsets;
21891 HOST_WIDE_INT delta;
21893 unsigned long saved_regs;
21895 offsets = arm_get_frame_offsets ();
21896 saved_regs = offsets->saved_regs_mask;
21898 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21899 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21902 if (frame_pointer_needed)
21903 addr = plus_constant(hard_frame_pointer_rtx, -4);
21906 /* LR will be the first saved register. */
21907 delta = offsets->outgoing_args - (offsets->frame + 4);
21912 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21913 GEN_INT (delta & ~4095)));
21918 addr = stack_pointer_rtx;
21920 addr = plus_constant (addr, delta);
21922 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21928 thumb_set_return_address (rtx source, rtx scratch)
21930 arm_stack_offsets *offsets;
21931 HOST_WIDE_INT delta;
21932 HOST_WIDE_INT limit;
21935 unsigned long mask;
21939 offsets = arm_get_frame_offsets ();
21940 mask = offsets->saved_regs_mask;
21941 if (mask & (1 << LR_REGNUM))
21944 /* Find the saved regs. */
21945 if (frame_pointer_needed)
21947 delta = offsets->soft_frame - offsets->saved_args;
21948 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21954 delta = offsets->outgoing_args - offsets->saved_args;
21957 /* Allow for the stack frame. */
21958 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21960 /* The link register is always the first saved register. */
21963 /* Construct the address. */
21964 addr = gen_rtx_REG (SImode, reg);
21967 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21968 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21972 addr = plus_constant (addr, delta);
21974 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21977 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21980 /* Implements target hook vector_mode_supported_p. */
21982 arm_vector_mode_supported_p (enum machine_mode mode)
21984 /* Neon also supports V2SImode, etc. listed in the clause below. */
21985 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21986 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21989 if ((TARGET_NEON || TARGET_IWMMXT)
21990 && ((mode == V2SImode)
21991 || (mode == V4HImode)
21992 || (mode == V8QImode)))
21998 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
21999 registers when autovectorizing for Neon, at least until multiple vector
22000 widths are supported properly by the middle-end. */
22002 static enum machine_mode
22003 arm_preferred_simd_mode (enum machine_mode mode)
22009 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22011 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22013 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22015 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22017 if (TARGET_NEON_VECTORIZE_QUAD)
22024 if (TARGET_REALLY_IWMMXT)
22040 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22042 We need to define this for LO_REGS on thumb. Otherwise we can end up
22043 using r0-r4 for function arguments, r7 for the stack frame and don't
22044 have enough left over to do doubleword arithmetic. */
22047 arm_class_likely_spilled_p (reg_class_t rclass)
22049 if ((TARGET_THUMB && rclass == LO_REGS)
22050 || rclass == CC_REG)
22056 /* Implements target hook small_register_classes_for_mode_p. */
22058 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22060 return TARGET_THUMB1;
22063 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22064 ARM insns and therefore guarantee that the shift count is modulo 256.
22065 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22066 guarantee no particular behavior for out-of-range counts. */
22068 static unsigned HOST_WIDE_INT
22069 arm_shift_truncation_mask (enum machine_mode mode)
22071 return mode == SImode ? 255 : 0;
22075 /* Map internal gcc register numbers to DWARF2 register numbers. */
22078 arm_dbx_register_number (unsigned int regno)
22083 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22084 compatibility. The EABI defines them as registers 96-103. */
22085 if (IS_FPA_REGNUM (regno))
22086 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22088 if (IS_VFP_REGNUM (regno))
22090 /* See comment in arm_dwarf_register_span. */
22091 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22092 return 64 + regno - FIRST_VFP_REGNUM;
22094 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22097 if (IS_IWMMXT_GR_REGNUM (regno))
22098 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22100 if (IS_IWMMXT_REGNUM (regno))
22101 return 112 + regno - FIRST_IWMMXT_REGNUM;
22103 gcc_unreachable ();
22106 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22107 GCC models tham as 64 32-bit registers, so we need to describe this to
22108 the DWARF generation code. Other registers can use the default. */
22110 arm_dwarf_register_span (rtx rtl)
22117 regno = REGNO (rtl);
22118 if (!IS_VFP_REGNUM (regno))
22121 /* XXX FIXME: The EABI defines two VFP register ranges:
22122 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22124 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22125 corresponding D register. Until GDB supports this, we shall use the
22126 legacy encodings. We also use these encodings for D0-D15 for
22127 compatibility with older debuggers. */
22128 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22131 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22132 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22133 regno = (regno - FIRST_VFP_REGNUM) / 2;
22134 for (i = 0; i < nregs; i++)
22135 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22140 #if ARM_UNWIND_INFO
22141 /* Emit unwind directives for a store-multiple instruction or stack pointer
22142 push during alignment.
22143 These should only ever be generated by the function prologue code, so
22144 expect them to have a particular form. */
22147 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22150 HOST_WIDE_INT offset;
22151 HOST_WIDE_INT nregs;
22157 e = XVECEXP (p, 0, 0);
22158 if (GET_CODE (e) != SET)
22161 /* First insn will adjust the stack pointer. */
22162 if (GET_CODE (e) != SET
22163 || GET_CODE (XEXP (e, 0)) != REG
22164 || REGNO (XEXP (e, 0)) != SP_REGNUM
22165 || GET_CODE (XEXP (e, 1)) != PLUS)
22168 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22169 nregs = XVECLEN (p, 0) - 1;
22171 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22174 /* The function prologue may also push pc, but not annotate it as it is
22175 never restored. We turn this into a stack pointer adjustment. */
22176 if (nregs * 4 == offset - 4)
22178 fprintf (asm_out_file, "\t.pad #4\n");
22182 fprintf (asm_out_file, "\t.save {");
22184 else if (IS_VFP_REGNUM (reg))
22187 fprintf (asm_out_file, "\t.vsave {");
22189 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22191 /* FPA registers are done differently. */
22192 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22196 /* Unknown register type. */
22199 /* If the stack increment doesn't match the size of the saved registers,
22200 something has gone horribly wrong. */
22201 if (offset != nregs * reg_size)
22206 /* The remaining insns will describe the stores. */
22207 for (i = 1; i <= nregs; i++)
22209 /* Expect (set (mem <addr>) (reg)).
22210 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22211 e = XVECEXP (p, 0, i);
22212 if (GET_CODE (e) != SET
22213 || GET_CODE (XEXP (e, 0)) != MEM
22214 || GET_CODE (XEXP (e, 1)) != REG)
22217 reg = REGNO (XEXP (e, 1));
22222 fprintf (asm_out_file, ", ");
22223 /* We can't use %r for vfp because we need to use the
22224 double precision register names. */
22225 if (IS_VFP_REGNUM (reg))
22226 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22228 asm_fprintf (asm_out_file, "%r", reg);
22230 #ifdef ENABLE_CHECKING
22231 /* Check that the addresses are consecutive. */
22232 e = XEXP (XEXP (e, 0), 0);
22233 if (GET_CODE (e) == PLUS)
22235 offset += reg_size;
22236 if (GET_CODE (XEXP (e, 0)) != REG
22237 || REGNO (XEXP (e, 0)) != SP_REGNUM
22238 || GET_CODE (XEXP (e, 1)) != CONST_INT
22239 || offset != INTVAL (XEXP (e, 1)))
22243 || GET_CODE (e) != REG
22244 || REGNO (e) != SP_REGNUM)
22248 fprintf (asm_out_file, "}\n");
22251 /* Emit unwind directives for a SET. */
22254 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22262 switch (GET_CODE (e0))
22265 /* Pushing a single register. */
22266 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22267 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22268 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22271 asm_fprintf (asm_out_file, "\t.save ");
22272 if (IS_VFP_REGNUM (REGNO (e1)))
22273 asm_fprintf(asm_out_file, "{d%d}\n",
22274 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22276 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22280 if (REGNO (e0) == SP_REGNUM)
22282 /* A stack increment. */
22283 if (GET_CODE (e1) != PLUS
22284 || GET_CODE (XEXP (e1, 0)) != REG
22285 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22286 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22289 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22290 -INTVAL (XEXP (e1, 1)));
22292 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22294 HOST_WIDE_INT offset;
22296 if (GET_CODE (e1) == PLUS)
22298 if (GET_CODE (XEXP (e1, 0)) != REG
22299 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22301 reg = REGNO (XEXP (e1, 0));
22302 offset = INTVAL (XEXP (e1, 1));
22303 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22304 HARD_FRAME_POINTER_REGNUM, reg,
22307 else if (GET_CODE (e1) == REG)
22310 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22311 HARD_FRAME_POINTER_REGNUM, reg);
22316 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22318 /* Move from sp to reg. */
22319 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22321 else if (GET_CODE (e1) == PLUS
22322 && GET_CODE (XEXP (e1, 0)) == REG
22323 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22324 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22326 /* Set reg to offset from sp. */
22327 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22328 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22330 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22332 /* Stack pointer save before alignment. */
22334 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22347 /* Emit unwind directives for the given insn. */
22350 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22354 if (arm_except_unwind_info () != UI_TARGET)
22357 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22358 && (TREE_NOTHROW (current_function_decl)
22359 || crtl->all_throwers_are_sibcalls))
22362 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22365 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22367 pat = XEXP (pat, 0);
22369 pat = PATTERN (insn);
22371 switch (GET_CODE (pat))
22374 arm_unwind_emit_set (asm_out_file, pat);
22378 /* Store multiple. */
22379 arm_unwind_emit_sequence (asm_out_file, pat);
22388 /* Output a reference from a function exception table to the type_info
22389 object X. The EABI specifies that the symbol should be relocated by
22390 an R_ARM_TARGET2 relocation. */
22393 arm_output_ttype (rtx x)
22395 fputs ("\t.word\t", asm_out_file);
22396 output_addr_const (asm_out_file, x);
22397 /* Use special relocations for symbol references. */
22398 if (GET_CODE (x) != CONST_INT)
22399 fputs ("(TARGET2)", asm_out_file);
22400 fputc ('\n', asm_out_file);
22405 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22408 arm_asm_emit_except_personality (rtx personality)
22410 fputs ("\t.personality\t", asm_out_file);
22411 output_addr_const (asm_out_file, personality);
22412 fputc ('\n', asm_out_file);
22415 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22418 arm_asm_init_sections (void)
22420 exception_section = get_unnamed_section (0, output_section_asm_op,
22423 #endif /* ARM_UNWIND_INFO */
22425 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22427 static enum unwind_info_type
22428 arm_except_unwind_info (void)
22430 /* Honor the --enable-sjlj-exceptions configure switch. */
22431 #ifdef CONFIG_SJLJ_EXCEPTIONS
22432 if (CONFIG_SJLJ_EXCEPTIONS)
22436 /* If not using ARM EABI unwind tables... */
22437 if (ARM_UNWIND_INFO)
22439 /* For simplicity elsewhere in this file, indicate that all unwind
22440 info is disabled if we're not emitting unwind tables. */
22441 if (!flag_exceptions && !flag_unwind_tables)
22447 /* ... we use sjlj exceptions for backwards compatibility. */
22452 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22453 stack alignment. */
22456 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22458 rtx unspec = SET_SRC (pattern);
22459 gcc_assert (GET_CODE (unspec) == UNSPEC);
22463 case UNSPEC_STACK_ALIGN:
22464 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22465 put anything on the stack, so hopefully it won't matter.
22466 CFA = SP will be correct after alignment. */
22467 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22468 SET_DEST (pattern));
22471 gcc_unreachable ();
22476 /* Output unwind directives for the start/end of a function. */
22479 arm_output_fn_unwind (FILE * f, bool prologue)
22481 if (arm_except_unwind_info () != UI_TARGET)
22485 fputs ("\t.fnstart\n", f);
22488 /* If this function will never be unwound, then mark it as such.
22489 The came condition is used in arm_unwind_emit to suppress
22490 the frame annotations. */
22491 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22492 && (TREE_NOTHROW (current_function_decl)
22493 || crtl->all_throwers_are_sibcalls))
22494 fputs("\t.cantunwind\n", f);
22496 fputs ("\t.fnend\n", f);
22501 arm_emit_tls_decoration (FILE *fp, rtx x)
22503 enum tls_reloc reloc;
22506 val = XVECEXP (x, 0, 0);
22507 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22509 output_addr_const (fp, val);
22514 fputs ("(tlsgd)", fp);
22517 fputs ("(tlsldm)", fp);
22520 fputs ("(tlsldo)", fp);
22523 fputs ("(gottpoff)", fp);
22526 fputs ("(tpoff)", fp);
22529 gcc_unreachable ();
22537 fputs (" + (. - ", fp);
22538 output_addr_const (fp, XVECEXP (x, 0, 2));
22540 output_addr_const (fp, XVECEXP (x, 0, 3));
22550 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22553 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22555 gcc_assert (size == 4);
22556 fputs ("\t.word\t", file);
22557 output_addr_const (file, x);
22558 fputs ("(tlsldo)", file);
22561 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22564 arm_output_addr_const_extra (FILE *fp, rtx x)
22566 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22567 return arm_emit_tls_decoration (fp, x);
22568 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22571 int labelno = INTVAL (XVECEXP (x, 0, 0));
22573 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22574 assemble_name_raw (fp, label);
22578 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22580 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22584 output_addr_const (fp, XVECEXP (x, 0, 0));
22588 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22590 output_addr_const (fp, XVECEXP (x, 0, 0));
22594 output_addr_const (fp, XVECEXP (x, 0, 1));
22598 else if (GET_CODE (x) == CONST_VECTOR)
22599 return arm_emit_vector_const (fp, x);
22604 /* Output assembly for a shift instruction.
22605 SET_FLAGS determines how the instruction modifies the condition codes.
22606 0 - Do not set condition codes.
22607 1 - Set condition codes.
22608 2 - Use smallest instruction. */
22610 arm_output_shift(rtx * operands, int set_flags)
22613 static const char flag_chars[3] = {'?', '.', '!'};
22618 c = flag_chars[set_flags];
22619 if (TARGET_UNIFIED_ASM)
22621 shift = shift_op(operands[3], &val);
22625 operands[2] = GEN_INT(val);
22626 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22629 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22632 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22633 output_asm_insn (pattern, operands);
22637 /* Output a Thumb-1 casesi dispatch sequence. */
22639 thumb1_output_casesi (rtx *operands)
22641 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22643 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22645 switch (GET_MODE(diff_vec))
22648 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22649 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22651 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22652 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22654 return "bl\t%___gnu_thumb1_case_si";
22656 gcc_unreachable ();
22660 /* Output a Thumb-2 casesi instruction. */
22662 thumb2_output_casesi (rtx *operands)
22664 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22666 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22668 output_asm_insn ("cmp\t%0, %1", operands);
22669 output_asm_insn ("bhi\t%l3", operands);
22670 switch (GET_MODE(diff_vec))
22673 return "tbb\t[%|pc, %0]";
22675 return "tbh\t[%|pc, %0, lsl #1]";
22679 output_asm_insn ("adr\t%4, %l2", operands);
22680 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22681 output_asm_insn ("add\t%4, %4, %5", operands);
22686 output_asm_insn ("adr\t%4, %l2", operands);
22687 return "ldr\t%|pc, [%4, %0, lsl #2]";
22690 gcc_unreachable ();
22694 /* Most ARM cores are single issue, but some newer ones can dual issue.
22695 The scheduler descriptions rely on this being correct. */
22697 arm_issue_rate (void)
22713 /* A table and a function to perform ARM-specific name mangling for
22714 NEON vector types in order to conform to the AAPCS (see "Procedure
22715 Call Standard for the ARM Architecture", Appendix A). To qualify
22716 for emission with the mangled names defined in that document, a
22717 vector type must not only be of the correct mode but also be
22718 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22721 enum machine_mode mode;
22722 const char *element_type_name;
22723 const char *aapcs_name;
22724 } arm_mangle_map_entry;
22726 static arm_mangle_map_entry arm_mangle_map[] = {
22727 /* 64-bit containerized types. */
22728 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22729 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22730 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22731 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22732 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22733 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22734 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22735 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22736 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22737 /* 128-bit containerized types. */
22738 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22739 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22740 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22741 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22742 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22743 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22744 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22745 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22746 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22747 { VOIDmode, NULL, NULL }
22751 arm_mangle_type (const_tree type)
22753 arm_mangle_map_entry *pos = arm_mangle_map;
22755 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22756 has to be managled as if it is in the "std" namespace. */
22757 if (TARGET_AAPCS_BASED
22758 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22760 static bool warned;
22761 if (!warned && warn_psabi && !in_system_header)
22764 inform (input_location,
22765 "the mangling of %<va_list%> has changed in GCC 4.4");
22767 return "St9__va_list";
22770 /* Half-precision float. */
22771 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22774 if (TREE_CODE (type) != VECTOR_TYPE)
22777 /* Check the mode of the vector type, and the name of the vector
22778 element type, against the table. */
22779 while (pos->mode != VOIDmode)
22781 tree elt_type = TREE_TYPE (type);
22783 if (pos->mode == TYPE_MODE (type)
22784 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22785 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22786 pos->element_type_name))
22787 return pos->aapcs_name;
22792 /* Use the default mangling for unrecognized (possibly user-defined)
22797 /* Order of allocation of core registers for Thumb: this allocation is
22798 written over the corresponding initial entries of the array
22799 initialized with REG_ALLOC_ORDER. We allocate all low registers
22800 first. Saving and restoring a low register is usually cheaper than
22801 using a call-clobbered high register. */
22803 static const int thumb_core_reg_alloc_order[] =
22805 3, 2, 1, 0, 4, 5, 6, 7,
22806 14, 12, 8, 9, 10, 11, 13, 15
22809 /* Adjust register allocation order when compiling for Thumb. */
22812 arm_order_regs_for_local_alloc (void)
22814 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22815 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22817 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22818 sizeof (thumb_core_reg_alloc_order));
22821 /* Set default optimization options. */
22823 arm_option_optimization (int level, int size ATTRIBUTE_UNUSED)
22825 /* Enable section anchors by default at -O1 or higher.
22826 Use 2 to distinguish from an explicit -fsection-anchors
22827 given on the command line. */
22829 flag_section_anchors = 2;
22832 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22835 arm_frame_pointer_required (void)
22837 return (cfun->has_nonlocal_label
22838 || SUBTARGET_FRAME_POINTER_REQUIRED
22839 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22842 /* Only thumb1 can't support conditional execution, so return true if
22843 the target is not thumb1. */
22845 arm_have_conditional_execution (void)
22847 return !TARGET_THUMB1;
22850 /* Legitimize a memory reference for sync primitive implemented using
22851 ldrex / strex. We currently force the form of the reference to be
22852 indirect without offset. We do not yet support the indirect offset
22853 addressing supported by some ARM targets for these
22856 arm_legitimize_sync_memory (rtx memory)
22858 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22859 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22861 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22862 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22863 return legitimate_memory;
22866 /* An instruction emitter. */
22867 typedef void (* emit_f) (int label, const char *, rtx *);
22869 /* An instruction emitter that emits via the conventional
22870 output_asm_insn. */
22872 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22874 output_asm_insn (pattern, operands);
22877 /* Count the number of emitted synchronization instructions. */
22878 static unsigned arm_insn_count;
22880 /* An emitter that counts emitted instructions but does not actually
22881 emit instruction into the the instruction stream. */
22883 arm_count (int label,
22884 const char *pattern ATTRIBUTE_UNUSED,
22885 rtx *operands ATTRIBUTE_UNUSED)
22891 /* Construct a pattern using conventional output formatting and feed
22892 it to output_asm_insn. Provides a mechanism to construct the
22893 output pattern on the fly. Note the hard limit on the pattern
22896 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
22897 const char *pattern, ...)
22902 va_start (ap, pattern);
22903 vsprintf (buffer, pattern, ap);
22905 emit (label, buffer, operands);
22908 /* Emit the memory barrier instruction, if any, provided by this
22909 target to a specified emitter. */
22911 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
22913 if (TARGET_HAVE_DMB)
22915 /* Note we issue a system level barrier. We should consider
22916 issuing a inner shareabilty zone barrier here instead, ie.
22918 emit (0, "dmb\tsy", operands);
22922 if (TARGET_HAVE_DMB_MCR)
22924 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
22928 gcc_unreachable ();
22931 /* Emit the memory barrier instruction, if any, provided by this
22934 arm_output_memory_barrier (rtx *operands)
22936 arm_process_output_memory_barrier (arm_emit, operands);
22940 /* Helper to figure out the instruction suffix required on ldrex/strex
22941 for operations on an object of the specified mode. */
22942 static const char *
22943 arm_ldrex_suffix (enum machine_mode mode)
22947 case QImode: return "b";
22948 case HImode: return "h";
22949 case SImode: return "";
22950 case DImode: return "d";
22952 gcc_unreachable ();
22957 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
22960 arm_output_ldrex (emit_f emit,
22961 enum machine_mode mode,
22965 const char *suffix = arm_ldrex_suffix (mode);
22968 operands[0] = target;
22969 operands[1] = memory;
22970 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
22973 /* Emit a strex{b,h,d, } instruction appropriate for the specified
22976 arm_output_strex (emit_f emit,
22977 enum machine_mode mode,
22983 const char *suffix = arm_ldrex_suffix (mode);
22986 operands[0] = result;
22987 operands[1] = value;
22988 operands[2] = memory;
22989 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
22993 /* Helper to emit a two operand instruction. */
22995 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23001 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23004 /* Helper to emit a three operand instruction. */
23006 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23013 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23016 /* Emit a load store exclusive synchronization loop.
23020 if old_value != required_value
23022 t1 = sync_op (old_value, new_value)
23023 [mem] = t1, t2 = [0|1]
23027 t1 == t2 is not permitted
23028 t1 == old_value is permitted
23032 RTX register or const_int representing the required old_value for
23033 the modify to continue, if NULL no comparsion is performed. */
23035 arm_output_sync_loop (emit_f emit,
23036 enum machine_mode mode,
23039 rtx required_value,
23043 enum attr_sync_op sync_op,
23044 int early_barrier_required)
23048 gcc_assert (t1 != t2);
23050 if (early_barrier_required)
23051 arm_process_output_memory_barrier (emit, NULL);
23053 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23055 arm_output_ldrex (emit, mode, old_value, memory);
23057 if (required_value)
23061 operands[0] = old_value;
23062 operands[1] = required_value;
23063 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23064 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23070 arm_output_op3 (emit, "add", t1, old_value, new_value);
23074 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23078 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23082 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23086 arm_output_op3 (emit,"and", t1, old_value, new_value);
23090 arm_output_op3 (emit, "and", t1, old_value, new_value);
23091 arm_output_op2 (emit, "mvn", t1, t1);
23099 arm_output_strex (emit, mode, "", t2, t1, memory);
23101 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23102 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
23104 arm_process_output_memory_barrier (emit, NULL);
23105 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23109 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23112 default_value = operands[index - 1];
23114 return default_value;
23117 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23118 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23120 /* Extract the operands for a synchroniztion instruction from the
23121 instructions attributes and emit the instruction. */
23123 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23125 rtx result, memory, required_value, new_value, t1, t2;
23127 enum machine_mode mode;
23128 enum attr_sync_op sync_op;
23130 result = FETCH_SYNC_OPERAND(result, 0);
23131 memory = FETCH_SYNC_OPERAND(memory, 0);
23132 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23133 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23134 t1 = FETCH_SYNC_OPERAND(t1, 0);
23135 t2 = FETCH_SYNC_OPERAND(t2, 0);
23137 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23138 sync_op = get_attr_sync_op (insn);
23139 mode = GET_MODE (memory);
23141 arm_output_sync_loop (emit, mode, result, memory, required_value,
23142 new_value, t1, t2, sync_op, early_barrier);
23145 /* Emit a synchronization instruction loop. */
23147 arm_output_sync_insn (rtx insn, rtx *operands)
23149 arm_process_output_sync_insn (arm_emit, insn, operands);
23153 /* Count the number of machine instruction that will be emitted for a
23154 synchronization instruction. Note that the emitter used does not
23155 emit instructions, it just counts instructions being carefull not
23156 to count labels. */
23158 arm_sync_loop_insns (rtx insn, rtx *operands)
23160 arm_insn_count = 0;
23161 arm_process_output_sync_insn (arm_count, insn, operands);
23162 return arm_insn_count;
23165 /* Helper to call a target sync instruction generator, dealing with
23166 the variation in operands required by the different generators. */
23168 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23169 rtx memory, rtx required_value, rtx new_value)
23171 switch (generator->op)
23173 case arm_sync_generator_omn:
23174 gcc_assert (! required_value);
23175 return generator->u.omn (old_value, memory, new_value);
23177 case arm_sync_generator_omrn:
23178 gcc_assert (required_value);
23179 return generator->u.omrn (old_value, memory, required_value, new_value);
23185 /* Expand a synchronization loop. The synchronization loop is expanded
23186 as an opaque block of instructions in order to ensure that we do
23187 not subsequently get extraneous memory accesses inserted within the
23188 critical region. The exclusive access property of ldrex/strex is
23189 only guaranteed in there are no intervening memory accesses. */
23191 arm_expand_sync (enum machine_mode mode,
23192 struct arm_sync_generator *generator,
23193 rtx target, rtx memory, rtx required_value, rtx new_value)
23195 if (target == NULL)
23196 target = gen_reg_rtx (mode);
23198 memory = arm_legitimize_sync_memory (memory);
23199 if (mode != SImode)
23201 rtx load_temp = gen_reg_rtx (SImode);
23203 if (required_value)
23204 required_value = convert_modes (SImode, mode, required_value, true);
23206 new_value = convert_modes (SImode, mode, new_value, true);
23207 emit_insn (arm_call_generator (generator, load_temp, memory,
23208 required_value, new_value));
23209 emit_move_insn (target, gen_lowpart (mode, load_temp));
23213 emit_insn (arm_call_generator (generator, target, memory, required_value,
23219 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23221 /* Vectors which aren't in packed structures will not be less aligned than
23222 the natural alignment of their element type, so this is safe. */
23223 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23226 return default_builtin_vector_alignment_reachable (type, is_packed);
23230 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23231 const_tree type, int misalignment,
23234 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23236 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23241 /* If the misalignment is unknown, we should be able to handle the access
23242 so long as it is not to a member of a packed data structure. */
23243 if (misalignment == -1)
23246 /* Return true if the misalignment is a multiple of the natural alignment
23247 of the vector's element type. This is probably always going to be
23248 true in practice, since we've already established that this isn't a
23250 return ((misalignment % align) == 0);
23253 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23257 #include "gt-arm.h"