1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static bool arm_memory_load_p (rtx);
168 static bool arm_cirrus_insn_p (rtx);
169 static void cirrus_reorg (rtx);
170 static void arm_init_builtins (void);
171 static void arm_init_iwmmxt_builtins (void);
172 static rtx safe_vector_operand (rtx, enum machine_mode);
173 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
174 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
175 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
176 static tree arm_builtin_decl (unsigned, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx emit_set_insn (rtx, rtx);
179 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
181 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
183 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
185 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
186 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
188 static rtx aapcs_libcall_value (enum machine_mode);
189 static int aapcs_select_return_coproc (const_tree, const_tree);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 static void arm_encode_section_info (tree, rtx, int);
199 static void arm_file_end (void);
200 static void arm_file_start (void);
202 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
204 static bool arm_pass_by_reference (cumulative_args_t,
205 enum machine_mode, const_tree, bool);
206 static bool arm_promote_prototypes (const_tree);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree);
210 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
211 static bool arm_return_in_memory (const_tree, const_tree);
213 static void arm_unwind_emit (FILE *, rtx);
214 static bool arm_output_ttype (rtx);
215 static void arm_asm_emit_except_personality (rtx);
216 static void arm_asm_init_sections (void);
218 static rtx arm_dwarf_register_span (rtx);
220 static tree arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree arm_get_cookie_size (tree);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree, rtx);
232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
233 static void arm_option_override (void);
234 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
235 static bool arm_cannot_copy_insn_p (rtx);
236 static bool arm_tls_symbol_p (rtx x);
237 static int arm_issue_rate (void);
238 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
239 static bool arm_output_addr_const_extra (FILE *, rtx);
240 static bool arm_allocate_stack_slots_for_args (void);
241 static const char *arm_invalid_parameter_type (const_tree t);
242 static const char *arm_invalid_return_type (const_tree t);
243 static tree arm_promoted_type (const_tree t);
244 static tree arm_convert_to_type (tree type, tree expr);
245 static bool arm_scalar_mode_supported_p (enum machine_mode);
246 static bool arm_frame_pointer_required (void);
247 static bool arm_can_eliminate (const int, const int);
248 static void arm_asm_trampoline_template (FILE *);
249 static void arm_trampoline_init (rtx, tree, rtx);
250 static rtx arm_trampoline_adjust_address (rtx);
251 static rtx arm_pic_static_addr (rtx orig, rtx reg);
252 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool arm_array_mode_supported_p (enum machine_mode,
256 unsigned HOST_WIDE_INT);
257 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
258 static bool arm_class_likely_spilled_p (reg_class_t);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
271 /* Table of machine attributes. */
272 static const struct attribute_spec arm_attribute_table[] =
274 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
275 affects_type_identity } */
276 /* Function calls made to this symbol must be done indirectly, because
277 it may lie outside of the 26 bit addressing range of a normal function
279 { "long_call", 0, 0, false, true, true, NULL, false },
280 /* Whereas these functions are always known to reside within the 26 bit
282 { "short_call", 0, 0, false, true, true, NULL, false },
283 /* Specify the procedure call conventions for a function. */
284 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
286 /* Interrupt Service Routines have special prologue and epilogue requirements. */
287 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
289 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
291 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
294 /* ARM/PE has three new attributes:
296 dllexport - for exporting a function/variable that will live in a dll
297 dllimport - for importing a function/variable from a dll
299 Microsoft allows multiple declspecs in one __declspec, separating
300 them with spaces. We do NOT support this. Instead, use __declspec
303 { "dllimport", 0, 0, true, false, false, NULL, false },
304 { "dllexport", 0, 0, true, false, false, NULL, false },
305 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
307 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
308 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
309 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
310 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
313 { NULL, 0, 0, false, false, false, NULL, false }
316 /* Initialize the GCC target structure. */
317 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
318 #undef TARGET_MERGE_DECL_ATTRIBUTES
319 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
322 #undef TARGET_LEGITIMIZE_ADDRESS
323 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
325 #undef TARGET_ATTRIBUTE_TABLE
326 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
328 #undef TARGET_ASM_FILE_START
329 #define TARGET_ASM_FILE_START arm_file_start
330 #undef TARGET_ASM_FILE_END
331 #define TARGET_ASM_FILE_END arm_file_end
333 #undef TARGET_ASM_ALIGNED_SI_OP
334 #define TARGET_ASM_ALIGNED_SI_OP NULL
335 #undef TARGET_ASM_INTEGER
336 #define TARGET_ASM_INTEGER arm_assemble_integer
338 #undef TARGET_PRINT_OPERAND
339 #define TARGET_PRINT_OPERAND arm_print_operand
340 #undef TARGET_PRINT_OPERAND_ADDRESS
341 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
342 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
343 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
345 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
346 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
348 #undef TARGET_ASM_FUNCTION_PROLOGUE
349 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_EPILOGUE
352 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
354 #undef TARGET_OPTION_OVERRIDE
355 #define TARGET_OPTION_OVERRIDE arm_option_override
357 #undef TARGET_COMP_TYPE_ATTRIBUTES
358 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
360 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
361 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
363 #undef TARGET_SCHED_ADJUST_COST
364 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
366 #undef TARGET_ENCODE_SECTION_INFO
368 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
370 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
373 #undef TARGET_STRIP_NAME_ENCODING
374 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
376 #undef TARGET_ASM_INTERNAL_LABEL
377 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
379 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
380 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
382 #undef TARGET_FUNCTION_VALUE
383 #define TARGET_FUNCTION_VALUE arm_function_value
385 #undef TARGET_LIBCALL_VALUE
386 #define TARGET_LIBCALL_VALUE arm_libcall_value
388 #undef TARGET_FUNCTION_VALUE_REGNO_P
389 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
391 #undef TARGET_ASM_OUTPUT_MI_THUNK
392 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
393 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
394 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS arm_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST arm_address_cost
401 #undef TARGET_SHIFT_TRUNCATION_MASK
402 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
403 #undef TARGET_VECTOR_MODE_SUPPORTED_P
404 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
405 #undef TARGET_ARRAY_MODE_SUPPORTED_P
406 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
407 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
408 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
409 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
410 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
411 arm_autovectorize_vector_sizes
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
416 #undef TARGET_INIT_BUILTINS
417 #define TARGET_INIT_BUILTINS arm_init_builtins
418 #undef TARGET_EXPAND_BUILTIN
419 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
420 #undef TARGET_BUILTIN_DECL
421 #define TARGET_BUILTIN_DECL arm_builtin_decl
423 #undef TARGET_INIT_LIBFUNCS
424 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
426 #undef TARGET_PROMOTE_FUNCTION_MODE
427 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
428 #undef TARGET_PROMOTE_PROTOTYPES
429 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
430 #undef TARGET_PASS_BY_REFERENCE
431 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
432 #undef TARGET_ARG_PARTIAL_BYTES
433 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
434 #undef TARGET_FUNCTION_ARG
435 #define TARGET_FUNCTION_ARG arm_function_arg
436 #undef TARGET_FUNCTION_ARG_ADVANCE
437 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
438 #undef TARGET_FUNCTION_ARG_BOUNDARY
439 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
441 #undef TARGET_SETUP_INCOMING_VARARGS
442 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
444 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
445 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
447 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
448 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
449 #undef TARGET_TRAMPOLINE_INIT
450 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
451 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
452 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
454 #undef TARGET_DEFAULT_SHORT_ENUMS
455 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
457 #undef TARGET_ALIGN_ANON_BITFIELD
458 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
460 #undef TARGET_NARROW_VOLATILE_BITFIELD
461 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
463 #undef TARGET_CXX_GUARD_TYPE
464 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
466 #undef TARGET_CXX_GUARD_MASK_BIT
467 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
469 #undef TARGET_CXX_GET_COOKIE_SIZE
470 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
472 #undef TARGET_CXX_COOKIE_HAS_SIZE
473 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
475 #undef TARGET_CXX_CDTOR_RETURNS_THIS
476 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
478 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
479 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
481 #undef TARGET_CXX_USE_AEABI_ATEXIT
482 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
484 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
485 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
486 arm_cxx_determine_class_data_visibility
488 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
489 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
491 #undef TARGET_RETURN_IN_MSB
492 #define TARGET_RETURN_IN_MSB arm_return_in_msb
494 #undef TARGET_RETURN_IN_MEMORY
495 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
497 #undef TARGET_MUST_PASS_IN_STACK
498 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
501 #undef TARGET_ASM_UNWIND_EMIT
502 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
504 /* EABI unwinding tables use a different format for the typeinfo tables. */
505 #undef TARGET_ASM_TTYPE
506 #define TARGET_ASM_TTYPE arm_output_ttype
508 #undef TARGET_ARM_EABI_UNWINDER
509 #define TARGET_ARM_EABI_UNWINDER true
511 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
512 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
514 #undef TARGET_ASM_INIT_SECTIONS
515 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
516 #endif /* ARM_UNWIND_INFO */
518 #undef TARGET_DWARF_REGISTER_SPAN
519 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
521 #undef TARGET_CANNOT_COPY_INSN_P
522 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
525 #undef TARGET_HAVE_TLS
526 #define TARGET_HAVE_TLS true
529 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
530 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
532 #undef TARGET_LEGITIMATE_CONSTANT_P
533 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
535 #undef TARGET_CANNOT_FORCE_CONST_MEM
536 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
538 #undef TARGET_MAX_ANCHOR_OFFSET
539 #define TARGET_MAX_ANCHOR_OFFSET 4095
541 /* The minimum is set such that the total size of the block
542 for a particular anchor is -4088 + 1 + 4095 bytes, which is
543 divisible by eight, ensuring natural spacing of anchors. */
544 #undef TARGET_MIN_ANCHOR_OFFSET
545 #define TARGET_MIN_ANCHOR_OFFSET -4088
547 #undef TARGET_SCHED_ISSUE_RATE
548 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
550 #undef TARGET_MANGLE_TYPE
551 #define TARGET_MANGLE_TYPE arm_mangle_type
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
561 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
562 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
565 #undef TARGET_LEGITIMATE_ADDRESS_P
566 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
568 #undef TARGET_INVALID_PARAMETER_TYPE
569 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
571 #undef TARGET_INVALID_RETURN_TYPE
572 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
574 #undef TARGET_PROMOTED_TYPE
575 #define TARGET_PROMOTED_TYPE arm_promoted_type
577 #undef TARGET_CONVERT_TO_TYPE
578 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
580 #undef TARGET_SCALAR_MODE_SUPPORTED_P
581 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
583 #undef TARGET_FRAME_POINTER_REQUIRED
584 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
586 #undef TARGET_CAN_ELIMINATE
587 #define TARGET_CAN_ELIMINATE arm_can_eliminate
589 #undef TARGET_CONDITIONAL_REGISTER_USAGE
590 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
592 #undef TARGET_CLASS_LIKELY_SPILLED_P
593 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
595 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
596 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
597 arm_vector_alignment_reachable
599 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
600 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
601 arm_builtin_support_vector_misalignment
603 #undef TARGET_PREFERRED_RENAME_CLASS
604 #define TARGET_PREFERRED_RENAME_CLASS \
605 arm_preferred_rename_class
607 struct gcc_target targetm = TARGET_INITIALIZER;
609 /* Obstack for minipool constant handling. */
610 static struct obstack minipool_obstack;
611 static char * minipool_startobj;
613 /* The maximum number of insns skipped which
614 will be conditionalised if possible. */
615 static int max_insns_skipped = 5;
617 extern FILE * asm_out_file;
619 /* True if we are currently building a constant table. */
620 int making_const_table;
622 /* The processor for which instructions should be scheduled. */
623 enum processor_type arm_tune = arm_none;
625 /* The current tuning set. */
626 const struct tune_params *current_tune;
628 /* Which floating point hardware to schedule for. */
631 /* Which floating popint hardware to use. */
632 const struct arm_fpu_desc *arm_fpu_desc;
634 /* Used for Thumb call_via trampolines. */
635 rtx thumb_call_via_label[14];
636 static int thumb_call_reg_needed;
638 /* Bit values used to identify processor capabilities. */
639 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
640 #define FL_ARCH3M (1 << 1) /* Extended multiply */
641 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
642 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
643 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
644 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
645 #define FL_THUMB (1 << 6) /* Thumb aware */
646 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
647 #define FL_STRONG (1 << 8) /* StrongARM */
648 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
649 #define FL_XSCALE (1 << 10) /* XScale */
650 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
651 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
652 media instructions. */
653 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
654 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
655 Note: ARM6 & 7 derivatives only. */
656 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
657 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
658 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
660 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
661 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
662 #define FL_NEON (1 << 20) /* Neon instructions. */
663 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
665 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
666 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
668 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
670 /* Flags that only effect tuning, not available instructions. */
671 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
674 #define FL_FOR_ARCH2 FL_NOTM
675 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
676 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
677 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
678 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
679 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
680 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
681 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
682 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
683 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
684 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
685 #define FL_FOR_ARCH6J FL_FOR_ARCH6
686 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
687 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
688 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
689 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
690 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
691 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
692 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
693 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
694 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
695 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
697 /* The bits in this mask specify which
698 instructions we are allowed to generate. */
699 static unsigned long insn_flags = 0;
701 /* The bits in this mask specify which instruction scheduling options should
703 static unsigned long tune_flags = 0;
705 /* The following are used in the arm.md file as equivalents to bits
706 in the above two flag variables. */
708 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
711 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
714 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
717 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
720 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
723 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
726 /* Nonzero if this chip supports the ARM 6K extensions. */
729 /* Nonzero if this chip supports the ARM 7 extensions. */
732 /* Nonzero if instructions not present in the 'M' profile can be used. */
733 int arm_arch_notm = 0;
735 /* Nonzero if instructions present in ARMv7E-M can be used. */
738 /* Nonzero if this chip can benefit from load scheduling. */
739 int arm_ld_sched = 0;
741 /* Nonzero if this chip is a StrongARM. */
742 int arm_tune_strongarm = 0;
744 /* Nonzero if this chip is a Cirrus variant. */
745 int arm_arch_cirrus = 0;
747 /* Nonzero if this chip supports Intel Wireless MMX technology. */
748 int arm_arch_iwmmxt = 0;
750 /* Nonzero if this chip is an XScale. */
751 int arm_arch_xscale = 0;
753 /* Nonzero if tuning for XScale */
754 int arm_tune_xscale = 0;
756 /* Nonzero if we want to tune for stores that access the write-buffer.
757 This typically means an ARM6 or ARM7 with MMU or MPU. */
758 int arm_tune_wbuf = 0;
760 /* Nonzero if tuning for Cortex-A9. */
761 int arm_tune_cortex_a9 = 0;
763 /* Nonzero if generating Thumb instructions. */
766 /* Nonzero if generating Thumb-1 instructions. */
769 /* Nonzero if we should define __THUMB_INTERWORK__ in the
771 XXX This is a bit of a hack, it's intended to help work around
772 problems in GLD which doesn't understand that armv5t code is
773 interworking clean. */
774 int arm_cpp_interwork = 0;
776 /* Nonzero if chip supports Thumb 2. */
779 /* Nonzero if chip supports integer division instruction. */
780 int arm_arch_arm_hwdiv;
781 int arm_arch_thumb_hwdiv;
783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
784 we must report the mode of the memory reference from
785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
786 enum machine_mode output_memory_reference_mode;
788 /* The register number to be used for the PIC offset register. */
789 unsigned arm_pic_register = INVALID_REGNUM;
791 /* Set to 1 after arm_reorg has started. Reset to start at the start of
792 the next function. */
793 static int after_arm_reorg = 0;
795 enum arm_pcs arm_pcs_default;
797 /* For an explanation of these variables, see final_prescan_insn below. */
799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
800 enum arm_cond_code arm_current_cc;
803 int arm_target_label;
804 /* The number of conditionally executed insns, including the current insn. */
805 int arm_condexec_count = 0;
806 /* A bitmask specifying the patterns for the IT block.
807 Zero means do not output an IT block before this insn. */
808 int arm_condexec_mask = 0;
809 /* The number of bits used in arm_condexec_mask. */
810 int arm_condexec_masklen = 0;
812 /* The condition codes of the ARM, and the inverse function. */
813 static const char * const arm_condition_codes[] =
815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
820 int arm_regs_in_sequence[] =
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
826 #define streq(string1, string2) (strcmp (string1, string2) == 0)
828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
830 | (1 << PIC_OFFSET_TABLE_REGNUM)))
832 /* Initialization code. */
836 const char *const name;
837 enum processor_type core;
839 const unsigned long flags;
840 const struct tune_params *const tune;
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
850 const struct tune_params arm_slowmul_tune =
852 arm_slowmul_rtx_costs,
854 3, /* Constant limit. */
855 5, /* Max cond insns. */
856 ARM_PREFETCH_NOT_BENEFICIAL,
857 true, /* Prefer constant pool. */
858 arm_default_branch_cost
861 const struct tune_params arm_fastmul_tune =
863 arm_fastmul_rtx_costs,
865 1, /* Constant limit. */
866 5, /* Max cond insns. */
867 ARM_PREFETCH_NOT_BENEFICIAL,
868 true, /* Prefer constant pool. */
869 arm_default_branch_cost
872 /* StrongARM has early execution of branches, so a sequence that is worth
873 skipping is shorter. Set max_insns_skipped to a lower value. */
875 const struct tune_params arm_strongarm_tune =
877 arm_fastmul_rtx_costs,
879 1, /* Constant limit. */
880 3, /* Max cond insns. */
881 ARM_PREFETCH_NOT_BENEFICIAL,
882 true, /* Prefer constant pool. */
883 arm_default_branch_cost
886 const struct tune_params arm_xscale_tune =
888 arm_xscale_rtx_costs,
889 xscale_sched_adjust_cost,
890 2, /* Constant limit. */
891 3, /* Max cond insns. */
892 ARM_PREFETCH_NOT_BENEFICIAL,
893 true, /* Prefer constant pool. */
894 arm_default_branch_cost
897 const struct tune_params arm_9e_tune =
901 1, /* Constant limit. */
902 5, /* Max cond insns. */
903 ARM_PREFETCH_NOT_BENEFICIAL,
904 true, /* Prefer constant pool. */
905 arm_default_branch_cost
908 const struct tune_params arm_v6t2_tune =
912 1, /* Constant limit. */
913 5, /* Max cond insns. */
914 ARM_PREFETCH_NOT_BENEFICIAL,
915 false, /* Prefer constant pool. */
916 arm_default_branch_cost
919 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
920 const struct tune_params arm_cortex_tune =
924 1, /* Constant limit. */
925 5, /* Max cond insns. */
926 ARM_PREFETCH_NOT_BENEFICIAL,
927 false, /* Prefer constant pool. */
928 arm_default_branch_cost
931 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
932 less appealing. Set max_insns_skipped to a low value. */
934 const struct tune_params arm_cortex_a5_tune =
938 1, /* Constant limit. */
939 1, /* Max cond insns. */
940 ARM_PREFETCH_NOT_BENEFICIAL,
941 false, /* Prefer constant pool. */
942 arm_cortex_a5_branch_cost
945 const struct tune_params arm_cortex_a9_tune =
948 cortex_a9_sched_adjust_cost,
949 1, /* Constant limit. */
950 5, /* Max cond insns. */
951 ARM_PREFETCH_BENEFICIAL(4,32,32),
952 false, /* Prefer constant pool. */
953 arm_default_branch_cost
956 const struct tune_params arm_fa726te_tune =
959 fa726te_sched_adjust_cost,
960 1, /* Constant limit. */
961 5, /* Max cond insns. */
962 ARM_PREFETCH_NOT_BENEFICIAL,
963 true, /* Prefer constant pool. */
964 arm_default_branch_cost
968 /* Not all of these give usefully different compilation alternatives,
969 but there is no simple way of generalizing them. */
970 static const struct processors all_cores[] =
973 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
974 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
975 #include "arm-cores.def"
977 {NULL, arm_none, NULL, 0, NULL}
980 static const struct processors all_architectures[] =
982 /* ARM Architectures */
983 /* We don't specify tuning costs here as it will be figured out
986 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
987 {NAME, CORE, #ARCH, FLAGS, NULL},
988 #include "arm-arches.def"
990 {NULL, arm_none, NULL, 0 , NULL}
994 /* These are populated as commandline arguments are processed, or NULL
996 static const struct processors *arm_selected_arch;
997 static const struct processors *arm_selected_cpu;
998 static const struct processors *arm_selected_tune;
1000 /* The name of the preprocessor macro to define for this architecture. */
1002 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1004 /* Available values for -mfpu=. */
1006 static const struct arm_fpu_desc all_fpus[] =
1008 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1009 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1010 #include "arm-fpus.def"
1015 /* Supported TLS relocations. */
1023 TLS_DESCSEQ /* GNU scheme */
1026 /* The maximum number of insns to be used when loading a constant. */
1028 arm_constant_limit (bool size_p)
1030 return size_p ? 1 : current_tune->constant_limit;
1033 /* Emit an insn that's a simple single-set. Both the operands must be known
1036 emit_set_insn (rtx x, rtx y)
1038 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1041 /* Return the number of bits set in VALUE. */
1043 bit_count (unsigned long value)
1045 unsigned long count = 0;
1050 value &= value - 1; /* Clear the least-significant set bit. */
1058 enum machine_mode mode;
1060 } arm_fixed_mode_set;
1062 /* A small helper for setting fixed-point library libfuncs. */
1065 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1066 const char *funcname, const char *modename,
1071 if (num_suffix == 0)
1072 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1074 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1076 set_optab_libfunc (optable, mode, buffer);
1080 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1081 enum machine_mode from, const char *funcname,
1082 const char *toname, const char *fromname)
1085 const char *maybe_suffix_2 = "";
1087 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1088 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1089 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1090 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1091 maybe_suffix_2 = "2";
1093 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1096 set_conv_libfunc (optable, to, from, buffer);
1099 /* Set up library functions unique to ARM. */
1102 arm_init_libfuncs (void)
1104 /* For Linux, we have access to kernel support for atomic operations. */
1105 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1106 init_sync_libfuncs (2 * UNITS_PER_WORD);
1108 /* There are no special library functions unless we are using the
1113 /* The functions below are described in Section 4 of the "Run-Time
1114 ABI for the ARM architecture", Version 1.0. */
1116 /* Double-precision floating-point arithmetic. Table 2. */
1117 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1118 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1119 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1120 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1121 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1123 /* Double-precision comparisons. Table 3. */
1124 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1125 set_optab_libfunc (ne_optab, DFmode, NULL);
1126 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1127 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1128 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1129 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1130 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1132 /* Single-precision floating-point arithmetic. Table 4. */
1133 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1134 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1135 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1136 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1137 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1139 /* Single-precision comparisons. Table 5. */
1140 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1141 set_optab_libfunc (ne_optab, SFmode, NULL);
1142 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1143 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1144 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1145 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1146 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1148 /* Floating-point to integer conversions. Table 6. */
1149 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1150 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1151 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1152 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1153 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1154 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1155 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1156 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1158 /* Conversions between floating types. Table 7. */
1159 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1160 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1162 /* Integer to floating-point conversions. Table 8. */
1163 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1164 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1165 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1166 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1167 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1168 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1169 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1170 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1172 /* Long long. Table 9. */
1173 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1174 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1175 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1176 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1177 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1178 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1179 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1180 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1182 /* Integer (32/32->32) division. \S 4.3.1. */
1183 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1184 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1186 /* The divmod functions are designed so that they can be used for
1187 plain division, even though they return both the quotient and the
1188 remainder. The quotient is returned in the usual location (i.e.,
1189 r0 for SImode, {r0, r1} for DImode), just as would be expected
1190 for an ordinary division routine. Because the AAPCS calling
1191 conventions specify that all of { r0, r1, r2, r3 } are
1192 callee-saved registers, there is no need to tell the compiler
1193 explicitly that those registers are clobbered by these
1195 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1196 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1198 /* For SImode division the ABI provides div-without-mod routines,
1199 which are faster. */
1200 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1201 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1203 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1204 divmod libcalls instead. */
1205 set_optab_libfunc (smod_optab, DImode, NULL);
1206 set_optab_libfunc (umod_optab, DImode, NULL);
1207 set_optab_libfunc (smod_optab, SImode, NULL);
1208 set_optab_libfunc (umod_optab, SImode, NULL);
1210 /* Half-precision float operations. The compiler handles all operations
1211 with NULL libfuncs by converting the SFmode. */
1212 switch (arm_fp16_format)
1214 case ARM_FP16_FORMAT_IEEE:
1215 case ARM_FP16_FORMAT_ALTERNATIVE:
1218 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1219 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1221 : "__gnu_f2h_alternative"));
1222 set_conv_libfunc (sext_optab, SFmode, HFmode,
1223 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1225 : "__gnu_h2f_alternative"));
1228 set_optab_libfunc (add_optab, HFmode, NULL);
1229 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1230 set_optab_libfunc (smul_optab, HFmode, NULL);
1231 set_optab_libfunc (neg_optab, HFmode, NULL);
1232 set_optab_libfunc (sub_optab, HFmode, NULL);
1235 set_optab_libfunc (eq_optab, HFmode, NULL);
1236 set_optab_libfunc (ne_optab, HFmode, NULL);
1237 set_optab_libfunc (lt_optab, HFmode, NULL);
1238 set_optab_libfunc (le_optab, HFmode, NULL);
1239 set_optab_libfunc (ge_optab, HFmode, NULL);
1240 set_optab_libfunc (gt_optab, HFmode, NULL);
1241 set_optab_libfunc (unord_optab, HFmode, NULL);
1248 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1250 const arm_fixed_mode_set fixed_arith_modes[] =
1271 const arm_fixed_mode_set fixed_conv_modes[] =
1301 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1303 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1304 "add", fixed_arith_modes[i].name, 3);
1305 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1306 "ssadd", fixed_arith_modes[i].name, 3);
1307 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1308 "usadd", fixed_arith_modes[i].name, 3);
1309 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1310 "sub", fixed_arith_modes[i].name, 3);
1311 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1312 "sssub", fixed_arith_modes[i].name, 3);
1313 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1314 "ussub", fixed_arith_modes[i].name, 3);
1315 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1316 "mul", fixed_arith_modes[i].name, 3);
1317 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1318 "ssmul", fixed_arith_modes[i].name, 3);
1319 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1320 "usmul", fixed_arith_modes[i].name, 3);
1321 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1322 "div", fixed_arith_modes[i].name, 3);
1323 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1324 "udiv", fixed_arith_modes[i].name, 3);
1325 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1326 "ssdiv", fixed_arith_modes[i].name, 3);
1327 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1328 "usdiv", fixed_arith_modes[i].name, 3);
1329 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1330 "neg", fixed_arith_modes[i].name, 2);
1331 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1332 "ssneg", fixed_arith_modes[i].name, 2);
1333 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1334 "usneg", fixed_arith_modes[i].name, 2);
1335 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1336 "ashl", fixed_arith_modes[i].name, 3);
1337 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1338 "ashr", fixed_arith_modes[i].name, 3);
1339 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1340 "lshr", fixed_arith_modes[i].name, 3);
1341 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1342 "ssashl", fixed_arith_modes[i].name, 3);
1343 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1344 "usashl", fixed_arith_modes[i].name, 3);
1345 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1346 "cmp", fixed_arith_modes[i].name, 2);
1349 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1350 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1353 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1354 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1357 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1358 fixed_conv_modes[j].mode, "fract",
1359 fixed_conv_modes[i].name,
1360 fixed_conv_modes[j].name);
1361 arm_set_fixed_conv_libfunc (satfract_optab,
1362 fixed_conv_modes[i].mode,
1363 fixed_conv_modes[j].mode, "satfract",
1364 fixed_conv_modes[i].name,
1365 fixed_conv_modes[j].name);
1366 arm_set_fixed_conv_libfunc (fractuns_optab,
1367 fixed_conv_modes[i].mode,
1368 fixed_conv_modes[j].mode, "fractuns",
1369 fixed_conv_modes[i].name,
1370 fixed_conv_modes[j].name);
1371 arm_set_fixed_conv_libfunc (satfractuns_optab,
1372 fixed_conv_modes[i].mode,
1373 fixed_conv_modes[j].mode, "satfractuns",
1374 fixed_conv_modes[i].name,
1375 fixed_conv_modes[j].name);
1379 if (TARGET_AAPCS_BASED)
1380 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1383 /* On AAPCS systems, this is the "struct __va_list". */
1384 static GTY(()) tree va_list_type;
1386 /* Return the type to use as __builtin_va_list. */
1388 arm_build_builtin_va_list (void)
1393 if (!TARGET_AAPCS_BASED)
1394 return std_build_builtin_va_list ();
1396 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1404 The C Library ABI further reinforces this definition in \S
1407 We must follow this definition exactly. The structure tag
1408 name is visible in C++ mangled names, and thus forms a part
1409 of the ABI. The field name may be used by people who
1410 #include <stdarg.h>. */
1411 /* Create the type. */
1412 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1413 /* Give it the required name. */
1414 va_list_name = build_decl (BUILTINS_LOCATION,
1416 get_identifier ("__va_list"),
1418 DECL_ARTIFICIAL (va_list_name) = 1;
1419 TYPE_NAME (va_list_type) = va_list_name;
1420 TYPE_STUB_DECL (va_list_type) = va_list_name;
1421 /* Create the __ap field. */
1422 ap_field = build_decl (BUILTINS_LOCATION,
1424 get_identifier ("__ap"),
1426 DECL_ARTIFICIAL (ap_field) = 1;
1427 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1428 TYPE_FIELDS (va_list_type) = ap_field;
1429 /* Compute its layout. */
1430 layout_type (va_list_type);
1432 return va_list_type;
1435 /* Return an expression of type "void *" pointing to the next
1436 available argument in a variable-argument list. VALIST is the
1437 user-level va_list object, of type __builtin_va_list. */
1439 arm_extract_valist_ptr (tree valist)
1441 if (TREE_TYPE (valist) == error_mark_node)
1442 return error_mark_node;
1444 /* On an AAPCS target, the pointer is stored within "struct
1446 if (TARGET_AAPCS_BASED)
1448 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1449 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1450 valist, ap_field, NULL_TREE);
1456 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1458 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1460 valist = arm_extract_valist_ptr (valist);
1461 std_expand_builtin_va_start (valist, nextarg);
1464 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1466 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1469 valist = arm_extract_valist_ptr (valist);
1470 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1473 /* Fix up any incompatible options that the user has specified. */
1475 arm_option_override (void)
1477 if (global_options_set.x_arm_arch_option)
1478 arm_selected_arch = &all_architectures[arm_arch_option];
1480 if (global_options_set.x_arm_cpu_option)
1481 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1483 if (global_options_set.x_arm_tune_option)
1484 arm_selected_tune = &all_cores[(int) arm_tune_option];
1486 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1487 SUBTARGET_OVERRIDE_OPTIONS;
1490 if (arm_selected_arch)
1492 if (arm_selected_cpu)
1494 /* Check for conflict between mcpu and march. */
1495 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1497 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1498 arm_selected_cpu->name, arm_selected_arch->name);
1499 /* -march wins for code generation.
1500 -mcpu wins for default tuning. */
1501 if (!arm_selected_tune)
1502 arm_selected_tune = arm_selected_cpu;
1504 arm_selected_cpu = arm_selected_arch;
1508 arm_selected_arch = NULL;
1511 /* Pick a CPU based on the architecture. */
1512 arm_selected_cpu = arm_selected_arch;
1515 /* If the user did not specify a processor, choose one for them. */
1516 if (!arm_selected_cpu)
1518 const struct processors * sel;
1519 unsigned int sought;
1521 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1522 if (!arm_selected_cpu->name)
1524 #ifdef SUBTARGET_CPU_DEFAULT
1525 /* Use the subtarget default CPU if none was specified by
1527 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1529 /* Default to ARM6. */
1530 if (!arm_selected_cpu->name)
1531 arm_selected_cpu = &all_cores[arm6];
1534 sel = arm_selected_cpu;
1535 insn_flags = sel->flags;
1537 /* Now check to see if the user has specified some command line
1538 switch that require certain abilities from the cpu. */
1541 if (TARGET_INTERWORK || TARGET_THUMB)
1543 sought |= (FL_THUMB | FL_MODE32);
1545 /* There are no ARM processors that support both APCS-26 and
1546 interworking. Therefore we force FL_MODE26 to be removed
1547 from insn_flags here (if it was set), so that the search
1548 below will always be able to find a compatible processor. */
1549 insn_flags &= ~FL_MODE26;
1552 if (sought != 0 && ((sought & insn_flags) != sought))
1554 /* Try to locate a CPU type that supports all of the abilities
1555 of the default CPU, plus the extra abilities requested by
1557 for (sel = all_cores; sel->name != NULL; sel++)
1558 if ((sel->flags & sought) == (sought | insn_flags))
1561 if (sel->name == NULL)
1563 unsigned current_bit_count = 0;
1564 const struct processors * best_fit = NULL;
1566 /* Ideally we would like to issue an error message here
1567 saying that it was not possible to find a CPU compatible
1568 with the default CPU, but which also supports the command
1569 line options specified by the programmer, and so they
1570 ought to use the -mcpu=<name> command line option to
1571 override the default CPU type.
1573 If we cannot find a cpu that has both the
1574 characteristics of the default cpu and the given
1575 command line options we scan the array again looking
1576 for a best match. */
1577 for (sel = all_cores; sel->name != NULL; sel++)
1578 if ((sel->flags & sought) == sought)
1582 count = bit_count (sel->flags & insn_flags);
1584 if (count >= current_bit_count)
1587 current_bit_count = count;
1591 gcc_assert (best_fit);
1595 arm_selected_cpu = sel;
1599 gcc_assert (arm_selected_cpu);
1600 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1601 if (!arm_selected_tune)
1602 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1604 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1605 insn_flags = arm_selected_cpu->flags;
1607 arm_tune = arm_selected_tune->core;
1608 tune_flags = arm_selected_tune->flags;
1609 current_tune = arm_selected_tune->tune;
1611 /* Make sure that the processor choice does not conflict with any of the
1612 other command line choices. */
1613 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1614 error ("target CPU does not support ARM mode");
1616 /* BPABI targets use linker tricks to allow interworking on cores
1617 without thumb support. */
1618 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1620 warning (0, "target CPU does not support interworking" );
1621 target_flags &= ~MASK_INTERWORK;
1624 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1626 warning (0, "target CPU does not support THUMB instructions");
1627 target_flags &= ~MASK_THUMB;
1630 if (TARGET_APCS_FRAME && TARGET_THUMB)
1632 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1633 target_flags &= ~MASK_APCS_FRAME;
1636 /* Callee super interworking implies thumb interworking. Adding
1637 this to the flags here simplifies the logic elsewhere. */
1638 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1639 target_flags |= MASK_INTERWORK;
1641 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1642 from here where no function is being compiled currently. */
1643 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1644 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1646 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1647 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1649 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1651 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1652 target_flags |= MASK_APCS_FRAME;
1655 if (TARGET_POKE_FUNCTION_NAME)
1656 target_flags |= MASK_APCS_FRAME;
1658 if (TARGET_APCS_REENT && flag_pic)
1659 error ("-fpic and -mapcs-reent are incompatible");
1661 if (TARGET_APCS_REENT)
1662 warning (0, "APCS reentrant code not supported. Ignored");
1664 /* If this target is normally configured to use APCS frames, warn if they
1665 are turned off and debugging is turned on. */
1667 && write_symbols != NO_DEBUG
1668 && !TARGET_APCS_FRAME
1669 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1670 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1672 if (TARGET_APCS_FLOAT)
1673 warning (0, "passing floating point arguments in fp regs not yet supported");
1675 if (TARGET_LITTLE_WORDS)
1676 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1677 "will be removed in a future release");
1679 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1680 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1681 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1682 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1683 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1684 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1685 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1686 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1687 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1688 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1689 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1690 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1691 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1692 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1694 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1695 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1696 thumb_code = TARGET_ARM == 0;
1697 thumb1_code = TARGET_THUMB1 != 0;
1698 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1699 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1700 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1701 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1702 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1703 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1705 /* If we are not using the default (ARM mode) section anchor offset
1706 ranges, then set the correct ranges now. */
1709 /* Thumb-1 LDR instructions cannot have negative offsets.
1710 Permissible positive offset ranges are 5-bit (for byte loads),
1711 6-bit (for halfword loads), or 7-bit (for word loads).
1712 Empirical results suggest a 7-bit anchor range gives the best
1713 overall code size. */
1714 targetm.min_anchor_offset = 0;
1715 targetm.max_anchor_offset = 127;
1717 else if (TARGET_THUMB2)
1719 /* The minimum is set such that the total size of the block
1720 for a particular anchor is 248 + 1 + 4095 bytes, which is
1721 divisible by eight, ensuring natural spacing of anchors. */
1722 targetm.min_anchor_offset = -248;
1723 targetm.max_anchor_offset = 4095;
1726 /* V5 code we generate is completely interworking capable, so we turn off
1727 TARGET_INTERWORK here to avoid many tests later on. */
1729 /* XXX However, we must pass the right pre-processor defines to CPP
1730 or GLD can get confused. This is a hack. */
1731 if (TARGET_INTERWORK)
1732 arm_cpp_interwork = 1;
1735 target_flags &= ~MASK_INTERWORK;
1737 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1738 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1740 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1741 error ("iwmmxt abi requires an iwmmxt capable cpu");
1743 if (!global_options_set.x_arm_fpu_index)
1745 const char *target_fpu_name;
1748 #ifdef FPUTYPE_DEFAULT
1749 target_fpu_name = FPUTYPE_DEFAULT;
1751 if (arm_arch_cirrus)
1752 target_fpu_name = "maverick";
1754 target_fpu_name = "fpe2";
1757 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1762 arm_fpu_desc = &all_fpus[arm_fpu_index];
1764 switch (arm_fpu_desc->model)
1766 case ARM_FP_MODEL_FPA:
1767 if (arm_fpu_desc->rev == 2)
1768 arm_fpu_attr = FPU_FPE2;
1769 else if (arm_fpu_desc->rev == 3)
1770 arm_fpu_attr = FPU_FPE3;
1772 arm_fpu_attr = FPU_FPA;
1775 case ARM_FP_MODEL_MAVERICK:
1776 arm_fpu_attr = FPU_MAVERICK;
1779 case ARM_FP_MODEL_VFP:
1780 arm_fpu_attr = FPU_VFP;
1787 if (TARGET_AAPCS_BASED
1788 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1789 error ("FPA is unsupported in the AAPCS");
1791 if (TARGET_AAPCS_BASED)
1793 if (TARGET_CALLER_INTERWORKING)
1794 error ("AAPCS does not support -mcaller-super-interworking");
1796 if (TARGET_CALLEE_INTERWORKING)
1797 error ("AAPCS does not support -mcallee-super-interworking");
1800 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1801 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1802 will ever exist. GCC makes no attempt to support this combination. */
1803 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1804 sorry ("iWMMXt and hardware floating point");
1806 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1807 if (TARGET_THUMB2 && TARGET_IWMMXT)
1808 sorry ("Thumb-2 iWMMXt");
1810 /* __fp16 support currently assumes the core has ldrh. */
1811 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1812 sorry ("__fp16 and no ldrh");
1814 /* If soft-float is specified then don't use FPU. */
1815 if (TARGET_SOFT_FLOAT)
1816 arm_fpu_attr = FPU_NONE;
1818 if (TARGET_AAPCS_BASED)
1820 if (arm_abi == ARM_ABI_IWMMXT)
1821 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1822 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1823 && TARGET_HARD_FLOAT
1825 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1827 arm_pcs_default = ARM_PCS_AAPCS;
1831 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1832 sorry ("-mfloat-abi=hard and VFP");
1834 if (arm_abi == ARM_ABI_APCS)
1835 arm_pcs_default = ARM_PCS_APCS;
1837 arm_pcs_default = ARM_PCS_ATPCS;
1840 /* For arm2/3 there is no need to do any scheduling if there is only
1841 a floating point emulator, or we are doing software floating-point. */
1842 if ((TARGET_SOFT_FLOAT
1843 || (TARGET_FPA && arm_fpu_desc->rev))
1844 && (tune_flags & FL_MODE32) == 0)
1845 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1847 /* Use the cp15 method if it is available. */
1848 if (target_thread_pointer == TP_AUTO)
1850 if (arm_arch6k && !TARGET_THUMB1)
1851 target_thread_pointer = TP_CP15;
1853 target_thread_pointer = TP_SOFT;
1856 if (TARGET_HARD_TP && TARGET_THUMB1)
1857 error ("can not use -mtp=cp15 with 16-bit Thumb");
1859 /* Override the default structure alignment for AAPCS ABI. */
1860 if (!global_options_set.x_arm_structure_size_boundary)
1862 if (TARGET_AAPCS_BASED)
1863 arm_structure_size_boundary = 8;
1867 if (arm_structure_size_boundary != 8
1868 && arm_structure_size_boundary != 32
1869 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1871 if (ARM_DOUBLEWORD_ALIGN)
1873 "structure size boundary can only be set to 8, 32 or 64");
1875 warning (0, "structure size boundary can only be set to 8 or 32");
1876 arm_structure_size_boundary
1877 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1881 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1883 error ("RTP PIC is incompatible with Thumb");
1887 /* If stack checking is disabled, we can use r10 as the PIC register,
1888 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1889 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1891 if (TARGET_VXWORKS_RTP)
1892 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1893 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1896 if (flag_pic && TARGET_VXWORKS_RTP)
1897 arm_pic_register = 9;
1899 if (arm_pic_register_string != NULL)
1901 int pic_register = decode_reg_name (arm_pic_register_string);
1904 warning (0, "-mpic-register= is useless without -fpic");
1906 /* Prevent the user from choosing an obviously stupid PIC register. */
1907 else if (pic_register < 0 || call_used_regs[pic_register]
1908 || pic_register == HARD_FRAME_POINTER_REGNUM
1909 || pic_register == STACK_POINTER_REGNUM
1910 || pic_register >= PC_REGNUM
1911 || (TARGET_VXWORKS_RTP
1912 && (unsigned int) pic_register != arm_pic_register))
1913 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1915 arm_pic_register = pic_register;
1918 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1919 if (fix_cm3_ldrd == 2)
1921 if (arm_selected_cpu->core == cortexm3)
1927 /* Enable -munaligned-access by default for
1928 - all ARMv6 architecture-based processors
1929 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1931 Disable -munaligned-access by default for
1932 - all pre-ARMv6 architecture-based processors
1933 - ARMv6-M architecture-based processors. */
1935 if (unaligned_access == 2)
1937 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1938 unaligned_access = 1;
1940 unaligned_access = 0;
1942 else if (unaligned_access == 1
1943 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1945 warning (0, "target CPU does not support unaligned accesses");
1946 unaligned_access = 0;
1949 if (TARGET_THUMB1 && flag_schedule_insns)
1951 /* Don't warn since it's on by default in -O2. */
1952 flag_schedule_insns = 0;
1957 /* If optimizing for size, bump the number of instructions that we
1958 are prepared to conditionally execute (even on a StrongARM). */
1959 max_insns_skipped = 6;
1962 max_insns_skipped = current_tune->max_insns_skipped;
1964 /* Hot/Cold partitioning is not currently supported, since we can't
1965 handle literal pool placement in that case. */
1966 if (flag_reorder_blocks_and_partition)
1968 inform (input_location,
1969 "-freorder-blocks-and-partition not supported on this architecture");
1970 flag_reorder_blocks_and_partition = 0;
1971 flag_reorder_blocks = 1;
1975 /* Hoisting PIC address calculations more aggressively provides a small,
1976 but measurable, size reduction for PIC code. Therefore, we decrease
1977 the bar for unrestricted expression hoisting to the cost of PIC address
1978 calculation, which is 2 instructions. */
1979 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1980 global_options.x_param_values,
1981 global_options_set.x_param_values);
1983 /* ARM EABI defaults to strict volatile bitfields. */
1984 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1985 flag_strict_volatile_bitfields = 1;
1987 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1988 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1989 if (flag_prefetch_loop_arrays < 0
1992 && current_tune->num_prefetch_slots > 0)
1993 flag_prefetch_loop_arrays = 1;
1995 /* Set up parameters to be used in prefetching algorithm. Do not override the
1996 defaults unless we are tuning for a core we have researched values for. */
1997 if (current_tune->num_prefetch_slots > 0)
1998 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1999 current_tune->num_prefetch_slots,
2000 global_options.x_param_values,
2001 global_options_set.x_param_values);
2002 if (current_tune->l1_cache_line_size >= 0)
2003 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2004 current_tune->l1_cache_line_size,
2005 global_options.x_param_values,
2006 global_options_set.x_param_values);
2007 if (current_tune->l1_cache_size >= 0)
2008 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2009 current_tune->l1_cache_size,
2010 global_options.x_param_values,
2011 global_options_set.x_param_values);
2013 /* Register global variables with the garbage collector. */
2014 arm_add_gc_roots ();
2018 arm_add_gc_roots (void)
2020 gcc_obstack_init(&minipool_obstack);
2021 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2024 /* A table of known ARM exception types.
2025 For use with the interrupt function attribute. */
2029 const char *const arg;
2030 const unsigned long return_value;
2034 static const isr_attribute_arg isr_attribute_args [] =
2036 { "IRQ", ARM_FT_ISR },
2037 { "irq", ARM_FT_ISR },
2038 { "FIQ", ARM_FT_FIQ },
2039 { "fiq", ARM_FT_FIQ },
2040 { "ABORT", ARM_FT_ISR },
2041 { "abort", ARM_FT_ISR },
2042 { "ABORT", ARM_FT_ISR },
2043 { "abort", ARM_FT_ISR },
2044 { "UNDEF", ARM_FT_EXCEPTION },
2045 { "undef", ARM_FT_EXCEPTION },
2046 { "SWI", ARM_FT_EXCEPTION },
2047 { "swi", ARM_FT_EXCEPTION },
2048 { NULL, ARM_FT_NORMAL }
2051 /* Returns the (interrupt) function type of the current
2052 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2054 static unsigned long
2055 arm_isr_value (tree argument)
2057 const isr_attribute_arg * ptr;
2061 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2063 /* No argument - default to IRQ. */
2064 if (argument == NULL_TREE)
2067 /* Get the value of the argument. */
2068 if (TREE_VALUE (argument) == NULL_TREE
2069 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2070 return ARM_FT_UNKNOWN;
2072 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2074 /* Check it against the list of known arguments. */
2075 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2076 if (streq (arg, ptr->arg))
2077 return ptr->return_value;
2079 /* An unrecognized interrupt type. */
2080 return ARM_FT_UNKNOWN;
2083 /* Computes the type of the current function. */
2085 static unsigned long
2086 arm_compute_func_type (void)
2088 unsigned long type = ARM_FT_UNKNOWN;
2092 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2094 /* Decide if the current function is volatile. Such functions
2095 never return, and many memory cycles can be saved by not storing
2096 register values that will never be needed again. This optimization
2097 was added to speed up context switching in a kernel application. */
2099 && (TREE_NOTHROW (current_function_decl)
2100 || !(flag_unwind_tables
2102 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2103 && TREE_THIS_VOLATILE (current_function_decl))
2104 type |= ARM_FT_VOLATILE;
2106 if (cfun->static_chain_decl != NULL)
2107 type |= ARM_FT_NESTED;
2109 attr = DECL_ATTRIBUTES (current_function_decl);
2111 a = lookup_attribute ("naked", attr);
2113 type |= ARM_FT_NAKED;
2115 a = lookup_attribute ("isr", attr);
2117 a = lookup_attribute ("interrupt", attr);
2120 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2122 type |= arm_isr_value (TREE_VALUE (a));
2127 /* Returns the type of the current function. */
2130 arm_current_func_type (void)
2132 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2133 cfun->machine->func_type = arm_compute_func_type ();
2135 return cfun->machine->func_type;
2139 arm_allocate_stack_slots_for_args (void)
2141 /* Naked functions should not allocate stack slots for arguments. */
2142 return !IS_NAKED (arm_current_func_type ());
2146 /* Output assembler code for a block containing the constant parts
2147 of a trampoline, leaving space for the variable parts.
2149 On the ARM, (if r8 is the static chain regnum, and remembering that
2150 referencing pc adds an offset of 8) the trampoline looks like:
2153 .word static chain value
2154 .word function's address
2155 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2158 arm_asm_trampoline_template (FILE *f)
2162 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2163 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2165 else if (TARGET_THUMB2)
2167 /* The Thumb-2 trampoline is similar to the arm implementation.
2168 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2169 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2170 STATIC_CHAIN_REGNUM, PC_REGNUM);
2171 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2175 ASM_OUTPUT_ALIGN (f, 2);
2176 fprintf (f, "\t.code\t16\n");
2177 fprintf (f, ".Ltrampoline_start:\n");
2178 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2179 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2180 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2181 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2182 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2183 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2185 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2186 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2189 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2192 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2194 rtx fnaddr, mem, a_tramp;
2196 emit_block_move (m_tramp, assemble_trampoline_template (),
2197 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2199 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2200 emit_move_insn (mem, chain_value);
2202 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2203 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2204 emit_move_insn (mem, fnaddr);
2206 a_tramp = XEXP (m_tramp, 0);
2207 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2208 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2209 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2212 /* Thumb trampolines should be entered in thumb mode, so set
2213 the bottom bit of the address. */
2216 arm_trampoline_adjust_address (rtx addr)
2219 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2220 NULL, 0, OPTAB_LIB_WIDEN);
2224 /* Return 1 if it is possible to return using a single instruction.
2225 If SIBLING is non-null, this is a test for a return before a sibling
2226 call. SIBLING is the call insn, so we can examine its register usage. */
2229 use_return_insn (int iscond, rtx sibling)
2232 unsigned int func_type;
2233 unsigned long saved_int_regs;
2234 unsigned HOST_WIDE_INT stack_adjust;
2235 arm_stack_offsets *offsets;
2237 /* Never use a return instruction before reload has run. */
2238 if (!reload_completed)
2241 func_type = arm_current_func_type ();
2243 /* Naked, volatile and stack alignment functions need special
2245 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2248 /* So do interrupt functions that use the frame pointer and Thumb
2249 interrupt functions. */
2250 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2253 offsets = arm_get_frame_offsets ();
2254 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2256 /* As do variadic functions. */
2257 if (crtl->args.pretend_args_size
2258 || cfun->machine->uses_anonymous_args
2259 /* Or if the function calls __builtin_eh_return () */
2260 || crtl->calls_eh_return
2261 /* Or if the function calls alloca */
2262 || cfun->calls_alloca
2263 /* Or if there is a stack adjustment. However, if the stack pointer
2264 is saved on the stack, we can use a pre-incrementing stack load. */
2265 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2266 && stack_adjust == 4)))
2269 saved_int_regs = offsets->saved_regs_mask;
2271 /* Unfortunately, the insn
2273 ldmib sp, {..., sp, ...}
2275 triggers a bug on most SA-110 based devices, such that the stack
2276 pointer won't be correctly restored if the instruction takes a
2277 page fault. We work around this problem by popping r3 along with
2278 the other registers, since that is never slower than executing
2279 another instruction.
2281 We test for !arm_arch5 here, because code for any architecture
2282 less than this could potentially be run on one of the buggy
2284 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2286 /* Validate that r3 is a call-clobbered register (always true in
2287 the default abi) ... */
2288 if (!call_used_regs[3])
2291 /* ... that it isn't being used for a return value ... */
2292 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2295 /* ... or for a tail-call argument ... */
2298 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2300 if (find_regno_fusage (sibling, USE, 3))
2304 /* ... and that there are no call-saved registers in r0-r2
2305 (always true in the default ABI). */
2306 if (saved_int_regs & 0x7)
2310 /* Can't be done if interworking with Thumb, and any registers have been
2312 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2315 /* On StrongARM, conditional returns are expensive if they aren't
2316 taken and multiple registers have been stacked. */
2317 if (iscond && arm_tune_strongarm)
2319 /* Conditional return when just the LR is stored is a simple
2320 conditional-load instruction, that's not expensive. */
2321 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2325 && arm_pic_register != INVALID_REGNUM
2326 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2330 /* If there are saved registers but the LR isn't saved, then we need
2331 two instructions for the return. */
2332 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2335 /* Can't be done if any of the FPA regs are pushed,
2336 since this also requires an insn. */
2337 if (TARGET_HARD_FLOAT && TARGET_FPA)
2338 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2339 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2342 /* Likewise VFP regs. */
2343 if (TARGET_HARD_FLOAT && TARGET_VFP)
2344 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2345 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2348 if (TARGET_REALLY_IWMMXT)
2349 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2350 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2356 /* Return TRUE if int I is a valid immediate ARM constant. */
2359 const_ok_for_arm (HOST_WIDE_INT i)
2363 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2364 be all zero, or all one. */
2365 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2366 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2367 != ((~(unsigned HOST_WIDE_INT) 0)
2368 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2371 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2373 /* Fast return for 0 and small values. We must do this for zero, since
2374 the code below can't handle that one case. */
2375 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2378 /* Get the number of trailing zeros. */
2379 lowbit = ffs((int) i) - 1;
2381 /* Only even shifts are allowed in ARM mode so round down to the
2382 nearest even number. */
2386 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2391 /* Allow rotated constants in ARM mode. */
2393 && ((i & ~0xc000003f) == 0
2394 || (i & ~0xf000000f) == 0
2395 || (i & ~0xfc000003) == 0))
2402 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2405 if (i == v || i == (v | (v << 8)))
2408 /* Allow repeated pattern 0xXY00XY00. */
2418 /* Return true if I is a valid constant for the operation CODE. */
2420 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2422 if (const_ok_for_arm (i))
2428 /* See if we can use movw. */
2429 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2432 /* Otherwise, try mvn. */
2433 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2436 /* See if we can use addw or subw. */
2438 && ((i & 0xfffff000) == 0
2439 || ((-i) & 0xfffff000) == 0))
2441 /* else fall through. */
2461 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2463 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2473 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2480 /* Emit a sequence of insns to handle a large constant.
2481 CODE is the code of the operation required, it can be any of SET, PLUS,
2482 IOR, AND, XOR, MINUS;
2483 MODE is the mode in which the operation is being performed;
2484 VAL is the integer to operate on;
2485 SOURCE is the other operand (a register, or a null-pointer for SET);
2486 SUBTARGETS means it is safe to create scratch registers if that will
2487 either produce a simpler sequence, or we will want to cse the values.
2488 Return value is the number of insns emitted. */
2490 /* ??? Tweak this for thumb2. */
2492 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2493 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2497 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2498 cond = COND_EXEC_TEST (PATTERN (insn));
2502 if (subtargets || code == SET
2503 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2504 && REGNO (target) != REGNO (source)))
2506 /* After arm_reorg has been called, we can't fix up expensive
2507 constants by pushing them into memory so we must synthesize
2508 them in-line, regardless of the cost. This is only likely to
2509 be more costly on chips that have load delay slots and we are
2510 compiling without running the scheduler (so no splitting
2511 occurred before the final instruction emission).
2513 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2515 if (!after_arm_reorg
2517 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2519 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2524 /* Currently SET is the only monadic value for CODE, all
2525 the rest are diadic. */
2526 if (TARGET_USE_MOVT)
2527 arm_emit_movpair (target, GEN_INT (val));
2529 emit_set_insn (target, GEN_INT (val));
2535 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2537 if (TARGET_USE_MOVT)
2538 arm_emit_movpair (temp, GEN_INT (val));
2540 emit_set_insn (temp, GEN_INT (val));
2542 /* For MINUS, the value is subtracted from, since we never
2543 have subtraction of a constant. */
2545 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2547 emit_set_insn (target,
2548 gen_rtx_fmt_ee (code, mode, source, temp));
2554 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2558 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2559 ARM/THUMB2 immediates, and add up to VAL.
2560 Thr function return value gives the number of insns required. */
2562 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2563 struct four_ints *return_sequence)
2565 int best_consecutive_zeros = 0;
2569 struct four_ints tmp_sequence;
2571 /* If we aren't targetting ARM, the best place to start is always at
2572 the bottom, otherwise look more closely. */
2575 for (i = 0; i < 32; i += 2)
2577 int consecutive_zeros = 0;
2579 if (!(val & (3 << i)))
2581 while ((i < 32) && !(val & (3 << i)))
2583 consecutive_zeros += 2;
2586 if (consecutive_zeros > best_consecutive_zeros)
2588 best_consecutive_zeros = consecutive_zeros;
2589 best_start = i - consecutive_zeros;
2596 /* So long as it won't require any more insns to do so, it's
2597 desirable to emit a small constant (in bits 0...9) in the last
2598 insn. This way there is more chance that it can be combined with
2599 a later addressing insn to form a pre-indexed load or store
2600 operation. Consider:
2602 *((volatile int *)0xe0000100) = 1;
2603 *((volatile int *)0xe0000110) = 2;
2605 We want this to wind up as:
2609 str rB, [rA, #0x100]
2611 str rB, [rA, #0x110]
2613 rather than having to synthesize both large constants from scratch.
2615 Therefore, we calculate how many insns would be required to emit
2616 the constant starting from `best_start', and also starting from
2617 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2618 yield a shorter sequence, we may as well use zero. */
2619 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2621 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2623 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2624 if (insns2 <= insns1)
2626 *return_sequence = tmp_sequence;
2634 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2636 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2637 struct four_ints *return_sequence, int i)
2639 int remainder = val & 0xffffffff;
2642 /* Try and find a way of doing the job in either two or three
2645 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2646 location. We start at position I. This may be the MSB, or
2647 optimial_immediate_sequence may have positioned it at the largest block
2648 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2649 wrapping around to the top of the word when we drop off the bottom.
2650 In the worst case this code should produce no more than four insns.
2652 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2653 constants, shifted to any arbitrary location. We should always start
2658 unsigned int b1, b2, b3, b4;
2659 unsigned HOST_WIDE_INT result;
2662 gcc_assert (insns < 4);
2667 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2668 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2671 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2672 /* We can use addw/subw for the last 12 bits. */
2676 /* Use an 8-bit shifted/rotated immediate. */
2680 result = remainder & ((0x0ff << end)
2681 | ((i < end) ? (0xff >> (32 - end))
2688 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2689 arbitrary shifts. */
2690 i -= TARGET_ARM ? 2 : 1;
2694 /* Next, see if we can do a better job with a thumb2 replicated
2697 We do it this way around to catch the cases like 0x01F001E0 where
2698 two 8-bit immediates would work, but a replicated constant would
2701 TODO: 16-bit constants that don't clear all the bits, but still win.
2702 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2705 b1 = (remainder & 0xff000000) >> 24;
2706 b2 = (remainder & 0x00ff0000) >> 16;
2707 b3 = (remainder & 0x0000ff00) >> 8;
2708 b4 = remainder & 0xff;
2712 /* The 8-bit immediate already found clears b1 (and maybe b2),
2713 but must leave b3 and b4 alone. */
2715 /* First try to find a 32-bit replicated constant that clears
2716 almost everything. We can assume that we can't do it in one,
2717 or else we wouldn't be here. */
2718 unsigned int tmp = b1 & b2 & b3 & b4;
2719 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2721 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2722 + (tmp == b3) + (tmp == b4);
2724 && (matching_bytes >= 3
2725 || (matching_bytes == 2
2726 && const_ok_for_op (remainder & ~tmp2, code))))
2728 /* At least 3 of the bytes match, and the fourth has at
2729 least as many bits set, or two of the bytes match
2730 and it will only require one more insn to finish. */
2738 /* Second, try to find a 16-bit replicated constant that can
2739 leave three of the bytes clear. If b2 or b4 is already
2740 zero, then we can. If the 8-bit from above would not
2741 clear b2 anyway, then we still win. */
2742 else if (b1 == b3 && (!b2 || !b4
2743 || (remainder & 0x00ff0000 & ~result)))
2745 result = remainder & 0xff00ff00;
2751 /* The 8-bit immediate already found clears b2 (and maybe b3)
2752 and we don't get here unless b1 is alredy clear, but it will
2753 leave b4 unchanged. */
2755 /* If we can clear b2 and b4 at once, then we win, since the
2756 8-bits couldn't possibly reach that far. */
2759 result = remainder & 0x00ff00ff;
2765 return_sequence->i[insns++] = result;
2766 remainder &= ~result;
2768 if (code == SET || code == MINUS)
2776 /* Emit an instruction with the indicated PATTERN. If COND is
2777 non-NULL, conditionalize the execution of the instruction on COND
2781 emit_constant_insn (rtx cond, rtx pattern)
2784 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2785 emit_insn (pattern);
2788 /* As above, but extra parameter GENERATE which, if clear, suppresses
2792 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2793 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2798 int final_invert = 0;
2800 int set_sign_bit_copies = 0;
2801 int clear_sign_bit_copies = 0;
2802 int clear_zero_bit_copies = 0;
2803 int set_zero_bit_copies = 0;
2804 int insns = 0, neg_insns, inv_insns;
2805 unsigned HOST_WIDE_INT temp1, temp2;
2806 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2807 struct four_ints *immediates;
2808 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2810 /* Find out which operations are safe for a given CODE. Also do a quick
2811 check for degenerate cases; these can occur when DImode operations
2824 if (remainder == 0xffffffff)
2827 emit_constant_insn (cond,
2828 gen_rtx_SET (VOIDmode, target,
2829 GEN_INT (ARM_SIGN_EXTEND (val))));
2835 if (reload_completed && rtx_equal_p (target, source))
2839 emit_constant_insn (cond,
2840 gen_rtx_SET (VOIDmode, target, source));
2849 emit_constant_insn (cond,
2850 gen_rtx_SET (VOIDmode, target, const0_rtx));
2853 if (remainder == 0xffffffff)
2855 if (reload_completed && rtx_equal_p (target, source))
2858 emit_constant_insn (cond,
2859 gen_rtx_SET (VOIDmode, target, source));
2868 if (reload_completed && rtx_equal_p (target, source))
2871 emit_constant_insn (cond,
2872 gen_rtx_SET (VOIDmode, target, source));
2876 if (remainder == 0xffffffff)
2879 emit_constant_insn (cond,
2880 gen_rtx_SET (VOIDmode, target,
2881 gen_rtx_NOT (mode, source)));
2888 /* We treat MINUS as (val - source), since (source - val) is always
2889 passed as (source + (-val)). */
2893 emit_constant_insn (cond,
2894 gen_rtx_SET (VOIDmode, target,
2895 gen_rtx_NEG (mode, source)));
2898 if (const_ok_for_arm (val))
2901 emit_constant_insn (cond,
2902 gen_rtx_SET (VOIDmode, target,
2903 gen_rtx_MINUS (mode, GEN_INT (val),
2914 /* If we can do it in one insn get out quickly. */
2915 if (const_ok_for_op (val, code))
2918 emit_constant_insn (cond,
2919 gen_rtx_SET (VOIDmode, target,
2921 ? gen_rtx_fmt_ee (code, mode, source,
2927 /* Calculate a few attributes that may be useful for specific
2929 /* Count number of leading zeros. */
2930 for (i = 31; i >= 0; i--)
2932 if ((remainder & (1 << i)) == 0)
2933 clear_sign_bit_copies++;
2938 /* Count number of leading 1's. */
2939 for (i = 31; i >= 0; i--)
2941 if ((remainder & (1 << i)) != 0)
2942 set_sign_bit_copies++;
2947 /* Count number of trailing zero's. */
2948 for (i = 0; i <= 31; i++)
2950 if ((remainder & (1 << i)) == 0)
2951 clear_zero_bit_copies++;
2956 /* Count number of trailing 1's. */
2957 for (i = 0; i <= 31; i++)
2959 if ((remainder & (1 << i)) != 0)
2960 set_zero_bit_copies++;
2968 /* See if we can do this by sign_extending a constant that is known
2969 to be negative. This is a good, way of doing it, since the shift
2970 may well merge into a subsequent insn. */
2971 if (set_sign_bit_copies > 1)
2973 if (const_ok_for_arm
2974 (temp1 = ARM_SIGN_EXTEND (remainder
2975 << (set_sign_bit_copies - 1))))
2979 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2980 emit_constant_insn (cond,
2981 gen_rtx_SET (VOIDmode, new_src,
2983 emit_constant_insn (cond,
2984 gen_ashrsi3 (target, new_src,
2985 GEN_INT (set_sign_bit_copies - 1)));
2989 /* For an inverted constant, we will need to set the low bits,
2990 these will be shifted out of harm's way. */
2991 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2992 if (const_ok_for_arm (~temp1))
2996 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2997 emit_constant_insn (cond,
2998 gen_rtx_SET (VOIDmode, new_src,
3000 emit_constant_insn (cond,
3001 gen_ashrsi3 (target, new_src,
3002 GEN_INT (set_sign_bit_copies - 1)));
3008 /* See if we can calculate the value as the difference between two
3009 valid immediates. */
3010 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3012 int topshift = clear_sign_bit_copies & ~1;
3014 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3015 & (0xff000000 >> topshift));
3017 /* If temp1 is zero, then that means the 9 most significant
3018 bits of remainder were 1 and we've caused it to overflow.
3019 When topshift is 0 we don't need to do anything since we
3020 can borrow from 'bit 32'. */
3021 if (temp1 == 0 && topshift != 0)
3022 temp1 = 0x80000000 >> (topshift - 1);
3024 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3026 if (const_ok_for_arm (temp2))
3030 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3031 emit_constant_insn (cond,
3032 gen_rtx_SET (VOIDmode, new_src,
3034 emit_constant_insn (cond,
3035 gen_addsi3 (target, new_src,
3043 /* See if we can generate this by setting the bottom (or the top)
3044 16 bits, and then shifting these into the other half of the
3045 word. We only look for the simplest cases, to do more would cost
3046 too much. Be careful, however, not to generate this when the
3047 alternative would take fewer insns. */
3048 if (val & 0xffff0000)
3050 temp1 = remainder & 0xffff0000;
3051 temp2 = remainder & 0x0000ffff;
3053 /* Overlaps outside this range are best done using other methods. */
3054 for (i = 9; i < 24; i++)
3056 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3057 && !const_ok_for_arm (temp2))
3059 rtx new_src = (subtargets
3060 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3062 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3063 source, subtargets, generate);
3071 gen_rtx_ASHIFT (mode, source,
3078 /* Don't duplicate cases already considered. */
3079 for (i = 17; i < 24; i++)
3081 if (((temp1 | (temp1 >> i)) == remainder)
3082 && !const_ok_for_arm (temp1))
3084 rtx new_src = (subtargets
3085 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3087 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3088 source, subtargets, generate);
3093 gen_rtx_SET (VOIDmode, target,
3096 gen_rtx_LSHIFTRT (mode, source,
3107 /* If we have IOR or XOR, and the constant can be loaded in a
3108 single instruction, and we can find a temporary to put it in,
3109 then this can be done in two instructions instead of 3-4. */
3111 /* TARGET can't be NULL if SUBTARGETS is 0 */
3112 || (reload_completed && !reg_mentioned_p (target, source)))
3114 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3118 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3120 emit_constant_insn (cond,
3121 gen_rtx_SET (VOIDmode, sub,
3123 emit_constant_insn (cond,
3124 gen_rtx_SET (VOIDmode, target,
3125 gen_rtx_fmt_ee (code, mode,
3136 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3137 and the remainder 0s for e.g. 0xfff00000)
3138 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3140 This can be done in 2 instructions by using shifts with mov or mvn.
3145 mvn r0, r0, lsr #12 */
3146 if (set_sign_bit_copies > 8
3147 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3151 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3152 rtx shift = GEN_INT (set_sign_bit_copies);
3156 gen_rtx_SET (VOIDmode, sub,
3158 gen_rtx_ASHIFT (mode,
3163 gen_rtx_SET (VOIDmode, target,
3165 gen_rtx_LSHIFTRT (mode, sub,
3172 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3174 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3176 For eg. r0 = r0 | 0xfff
3181 if (set_zero_bit_copies > 8
3182 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3186 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3187 rtx shift = GEN_INT (set_zero_bit_copies);
3191 gen_rtx_SET (VOIDmode, sub,
3193 gen_rtx_LSHIFTRT (mode,
3198 gen_rtx_SET (VOIDmode, target,
3200 gen_rtx_ASHIFT (mode, sub,
3206 /* This will never be reached for Thumb2 because orn is a valid
3207 instruction. This is for Thumb1 and the ARM 32 bit cases.
3209 x = y | constant (such that ~constant is a valid constant)
3211 x = ~(~y & ~constant).
3213 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3217 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3218 emit_constant_insn (cond,
3219 gen_rtx_SET (VOIDmode, sub,
3220 gen_rtx_NOT (mode, source)));
3223 sub = gen_reg_rtx (mode);
3224 emit_constant_insn (cond,
3225 gen_rtx_SET (VOIDmode, sub,
3226 gen_rtx_AND (mode, source,
3228 emit_constant_insn (cond,
3229 gen_rtx_SET (VOIDmode, target,
3230 gen_rtx_NOT (mode, sub)));
3237 /* See if two shifts will do 2 or more insn's worth of work. */
3238 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3240 HOST_WIDE_INT shift_mask = ((0xffffffff
3241 << (32 - clear_sign_bit_copies))
3244 if ((remainder | shift_mask) != 0xffffffff)
3248 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3249 insns = arm_gen_constant (AND, mode, cond,
3250 remainder | shift_mask,
3251 new_src, source, subtargets, 1);
3256 rtx targ = subtargets ? NULL_RTX : target;
3257 insns = arm_gen_constant (AND, mode, cond,
3258 remainder | shift_mask,
3259 targ, source, subtargets, 0);
3265 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3266 rtx shift = GEN_INT (clear_sign_bit_copies);
3268 emit_insn (gen_ashlsi3 (new_src, source, shift));
3269 emit_insn (gen_lshrsi3 (target, new_src, shift));
3275 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3277 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3279 if ((remainder | shift_mask) != 0xffffffff)
3283 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3285 insns = arm_gen_constant (AND, mode, cond,
3286 remainder | shift_mask,
3287 new_src, source, subtargets, 1);
3292 rtx targ = subtargets ? NULL_RTX : target;
3294 insns = arm_gen_constant (AND, mode, cond,
3295 remainder | shift_mask,
3296 targ, source, subtargets, 0);
3302 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3303 rtx shift = GEN_INT (clear_zero_bit_copies);
3305 emit_insn (gen_lshrsi3 (new_src, source, shift));
3306 emit_insn (gen_ashlsi3 (target, new_src, shift));
3318 /* Calculate what the instruction sequences would be if we generated it
3319 normally, negated, or inverted. */
3321 /* AND cannot be split into multiple insns, so invert and use BIC. */
3324 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3327 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3332 if (can_invert || final_invert)
3333 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3338 immediates = &pos_immediates;
3340 /* Is the negated immediate sequence more efficient? */
3341 if (neg_insns < insns && neg_insns <= inv_insns)
3344 immediates = &neg_immediates;
3349 /* Is the inverted immediate sequence more efficient?
3350 We must allow for an extra NOT instruction for XOR operations, although
3351 there is some chance that the final 'mvn' will get optimized later. */
3352 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3355 immediates = &inv_immediates;
3363 /* Now output the chosen sequence as instructions. */
3366 for (i = 0; i < insns; i++)
3368 rtx new_src, temp1_rtx;
3370 temp1 = immediates->i[i];
3372 if (code == SET || code == MINUS)
3373 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3374 else if ((final_invert || i < (insns - 1)) && subtargets)
3375 new_src = gen_reg_rtx (mode);
3381 else if (can_negate)
3384 temp1 = trunc_int_for_mode (temp1, mode);
3385 temp1_rtx = GEN_INT (temp1);
3389 else if (code == MINUS)
3390 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3392 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3394 emit_constant_insn (cond,
3395 gen_rtx_SET (VOIDmode, new_src,
3401 can_negate = can_invert;
3405 else if (code == MINUS)
3413 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3414 gen_rtx_NOT (mode, source)));
3421 /* Canonicalize a comparison so that we are more likely to recognize it.
3422 This can be done for a few constant compares, where we can make the
3423 immediate value easier to load. */
3426 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3428 enum machine_mode mode;
3429 unsigned HOST_WIDE_INT i, maxval;
3431 mode = GET_MODE (*op0);
3432 if (mode == VOIDmode)
3433 mode = GET_MODE (*op1);
3435 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3437 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3438 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3439 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3440 for GTU/LEU in Thumb mode. */
3445 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3447 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3450 if (code == GT || code == LE
3451 || (!TARGET_ARM && (code == GTU || code == LEU)))
3453 /* Missing comparison. First try to use an available
3455 if (GET_CODE (*op1) == CONST_INT)
3463 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3465 *op1 = GEN_INT (i + 1);
3466 return code == GT ? GE : LT;
3471 if (i != ~((unsigned HOST_WIDE_INT) 0)
3472 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3474 *op1 = GEN_INT (i + 1);
3475 return code == GTU ? GEU : LTU;
3483 /* If that did not work, reverse the condition. */
3487 return swap_condition (code);
3493 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3494 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3495 to facilitate possible combining with a cmp into 'ands'. */
3497 && GET_CODE (*op0) == ZERO_EXTEND
3498 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3499 && GET_MODE (XEXP (*op0, 0)) == QImode
3500 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3501 && subreg_lowpart_p (XEXP (*op0, 0))
3502 && *op1 == const0_rtx)
3503 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3506 /* Comparisons smaller than DImode. Only adjust comparisons against
3507 an out-of-range constant. */
3508 if (GET_CODE (*op1) != CONST_INT
3509 || const_ok_for_arm (INTVAL (*op1))
3510 || const_ok_for_arm (- INTVAL (*op1)))
3524 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3526 *op1 = GEN_INT (i + 1);
3527 return code == GT ? GE : LT;
3534 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3536 *op1 = GEN_INT (i - 1);
3537 return code == GE ? GT : LE;
3543 if (i != ~((unsigned HOST_WIDE_INT) 0)
3544 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3546 *op1 = GEN_INT (i + 1);
3547 return code == GTU ? GEU : LTU;
3554 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3556 *op1 = GEN_INT (i - 1);
3557 return code == GEU ? GTU : LEU;
3569 /* Define how to find the value returned by a function. */
3572 arm_function_value(const_tree type, const_tree func,
3573 bool outgoing ATTRIBUTE_UNUSED)
3575 enum machine_mode mode;
3576 int unsignedp ATTRIBUTE_UNUSED;
3577 rtx r ATTRIBUTE_UNUSED;
3579 mode = TYPE_MODE (type);
3581 if (TARGET_AAPCS_BASED)
3582 return aapcs_allocate_return_reg (mode, type, func);
3584 /* Promote integer types. */
3585 if (INTEGRAL_TYPE_P (type))
3586 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3588 /* Promotes small structs returned in a register to full-word size
3589 for big-endian AAPCS. */
3590 if (arm_return_in_msb (type))
3592 HOST_WIDE_INT size = int_size_in_bytes (type);
3593 if (size % UNITS_PER_WORD != 0)
3595 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3596 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3600 return arm_libcall_value_1 (mode);
3604 libcall_eq (const void *p1, const void *p2)
3606 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3610 libcall_hash (const void *p1)
3612 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3616 add_libcall (htab_t htab, rtx libcall)
3618 *htab_find_slot (htab, libcall, INSERT) = libcall;
3622 arm_libcall_uses_aapcs_base (const_rtx libcall)
3624 static bool init_done = false;
3625 static htab_t libcall_htab;
3631 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3633 add_libcall (libcall_htab,
3634 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3635 add_libcall (libcall_htab,
3636 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3637 add_libcall (libcall_htab,
3638 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3639 add_libcall (libcall_htab,
3640 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3642 add_libcall (libcall_htab,
3643 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3644 add_libcall (libcall_htab,
3645 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3651 add_libcall (libcall_htab,
3652 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3653 add_libcall (libcall_htab,
3654 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3655 add_libcall (libcall_htab,
3656 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3657 add_libcall (libcall_htab,
3658 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3659 add_libcall (libcall_htab,
3660 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3661 add_libcall (libcall_htab,
3662 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3664 /* Values from double-precision helper functions are returned in core
3665 registers if the selected core only supports single-precision
3666 arithmetic, even if we are using the hard-float ABI. The same is
3667 true for single-precision helpers, but we will never be using the
3668 hard-float ABI on a CPU which doesn't support single-precision
3669 operations in hardware. */
3670 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3671 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3672 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3673 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3674 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3675 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3676 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3677 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3678 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3679 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3680 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3681 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3683 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3687 return libcall && htab_find (libcall_htab, libcall) != NULL;
3691 arm_libcall_value_1 (enum machine_mode mode)
3693 if (TARGET_AAPCS_BASED)
3694 return aapcs_libcall_value (mode);
3695 else if (TARGET_32BIT
3696 && TARGET_HARD_FLOAT_ABI
3698 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3699 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3700 else if (TARGET_32BIT
3701 && TARGET_HARD_FLOAT_ABI
3703 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3704 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3705 else if (TARGET_IWMMXT_ABI
3706 && arm_vector_mode_supported_p (mode))
3707 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3709 return gen_rtx_REG (mode, ARG_REGISTER (1));
3712 /* Define how to find the value returned by a library function
3713 assuming the value has mode MODE. */
3716 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3718 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3719 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3721 /* The following libcalls return their result in integer registers,
3722 even though they return a floating point value. */
3723 if (arm_libcall_uses_aapcs_base (libcall))
3724 return gen_rtx_REG (mode, ARG_REGISTER(1));
3728 return arm_libcall_value_1 (mode);
3731 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3734 arm_function_value_regno_p (const unsigned int regno)
3736 if (regno == ARG_REGISTER (1)
3738 && TARGET_AAPCS_BASED
3740 && TARGET_HARD_FLOAT
3741 && regno == FIRST_VFP_REGNUM)
3743 && TARGET_HARD_FLOAT_ABI
3745 && regno == FIRST_CIRRUS_FP_REGNUM)
3746 || (TARGET_IWMMXT_ABI
3747 && regno == FIRST_IWMMXT_REGNUM)
3749 && TARGET_HARD_FLOAT_ABI
3751 && regno == FIRST_FPA_REGNUM))
3757 /* Determine the amount of memory needed to store the possible return
3758 registers of an untyped call. */
3760 arm_apply_result_size (void)
3766 if (TARGET_HARD_FLOAT_ABI)
3772 if (TARGET_MAVERICK)
3775 if (TARGET_IWMMXT_ABI)
3782 /* Decide whether TYPE should be returned in memory (true)
3783 or in a register (false). FNTYPE is the type of the function making
3786 arm_return_in_memory (const_tree type, const_tree fntype)
3790 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3792 if (TARGET_AAPCS_BASED)
3794 /* Simple, non-aggregate types (ie not including vectors and
3795 complex) are always returned in a register (or registers).
3796 We don't care about which register here, so we can short-cut
3797 some of the detail. */
3798 if (!AGGREGATE_TYPE_P (type)
3799 && TREE_CODE (type) != VECTOR_TYPE
3800 && TREE_CODE (type) != COMPLEX_TYPE)
3803 /* Any return value that is no larger than one word can be
3805 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3808 /* Check any available co-processors to see if they accept the
3809 type as a register candidate (VFP, for example, can return
3810 some aggregates in consecutive registers). These aren't
3811 available if the call is variadic. */
3812 if (aapcs_select_return_coproc (type, fntype) >= 0)
3815 /* Vector values should be returned using ARM registers, not
3816 memory (unless they're over 16 bytes, which will break since
3817 we only have four call-clobbered registers to play with). */
3818 if (TREE_CODE (type) == VECTOR_TYPE)
3819 return (size < 0 || size > (4 * UNITS_PER_WORD));
3821 /* The rest go in memory. */
3825 if (TREE_CODE (type) == VECTOR_TYPE)
3826 return (size < 0 || size > (4 * UNITS_PER_WORD));
3828 if (!AGGREGATE_TYPE_P (type) &&
3829 (TREE_CODE (type) != VECTOR_TYPE))
3830 /* All simple types are returned in registers. */
3833 if (arm_abi != ARM_ABI_APCS)
3835 /* ATPCS and later return aggregate types in memory only if they are
3836 larger than a word (or are variable size). */
3837 return (size < 0 || size > UNITS_PER_WORD);
3840 /* For the arm-wince targets we choose to be compatible with Microsoft's
3841 ARM and Thumb compilers, which always return aggregates in memory. */
3843 /* All structures/unions bigger than one word are returned in memory.
3844 Also catch the case where int_size_in_bytes returns -1. In this case
3845 the aggregate is either huge or of variable size, and in either case
3846 we will want to return it via memory and not in a register. */
3847 if (size < 0 || size > UNITS_PER_WORD)
3850 if (TREE_CODE (type) == RECORD_TYPE)
3854 /* For a struct the APCS says that we only return in a register
3855 if the type is 'integer like' and every addressable element
3856 has an offset of zero. For practical purposes this means
3857 that the structure can have at most one non bit-field element
3858 and that this element must be the first one in the structure. */
3860 /* Find the first field, ignoring non FIELD_DECL things which will
3861 have been created by C++. */
3862 for (field = TYPE_FIELDS (type);
3863 field && TREE_CODE (field) != FIELD_DECL;
3864 field = DECL_CHAIN (field))
3868 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3870 /* Check that the first field is valid for returning in a register. */
3872 /* ... Floats are not allowed */
3873 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3876 /* ... Aggregates that are not themselves valid for returning in
3877 a register are not allowed. */
3878 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3881 /* Now check the remaining fields, if any. Only bitfields are allowed,
3882 since they are not addressable. */
3883 for (field = DECL_CHAIN (field);
3885 field = DECL_CHAIN (field))
3887 if (TREE_CODE (field) != FIELD_DECL)
3890 if (!DECL_BIT_FIELD_TYPE (field))
3897 if (TREE_CODE (type) == UNION_TYPE)
3901 /* Unions can be returned in registers if every element is
3902 integral, or can be returned in an integer register. */
3903 for (field = TYPE_FIELDS (type);
3905 field = DECL_CHAIN (field))
3907 if (TREE_CODE (field) != FIELD_DECL)
3910 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3913 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3919 #endif /* not ARM_WINCE */
3921 /* Return all other types in memory. */
3925 /* Indicate whether or not words of a double are in big-endian order. */
3928 arm_float_words_big_endian (void)
3930 if (TARGET_MAVERICK)
3933 /* For FPA, float words are always big-endian. For VFP, floats words
3934 follow the memory system mode. */
3942 return (TARGET_BIG_END ? 1 : 0);
3947 const struct pcs_attribute_arg
3951 } pcs_attribute_args[] =
3953 {"aapcs", ARM_PCS_AAPCS},
3954 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3956 /* We could recognize these, but changes would be needed elsewhere
3957 * to implement them. */
3958 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3959 {"atpcs", ARM_PCS_ATPCS},
3960 {"apcs", ARM_PCS_APCS},
3962 {NULL, ARM_PCS_UNKNOWN}
3966 arm_pcs_from_attribute (tree attr)
3968 const struct pcs_attribute_arg *ptr;
3971 /* Get the value of the argument. */
3972 if (TREE_VALUE (attr) == NULL_TREE
3973 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3974 return ARM_PCS_UNKNOWN;
3976 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3978 /* Check it against the list of known arguments. */
3979 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3980 if (streq (arg, ptr->arg))
3983 /* An unrecognized interrupt type. */
3984 return ARM_PCS_UNKNOWN;
3987 /* Get the PCS variant to use for this call. TYPE is the function's type
3988 specification, DECL is the specific declartion. DECL may be null if
3989 the call could be indirect or if this is a library call. */
3991 arm_get_pcs_model (const_tree type, const_tree decl)
3993 bool user_convention = false;
3994 enum arm_pcs user_pcs = arm_pcs_default;
3999 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4002 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4003 user_convention = true;
4006 if (TARGET_AAPCS_BASED)
4008 /* Detect varargs functions. These always use the base rules
4009 (no argument is ever a candidate for a co-processor
4011 bool base_rules = stdarg_p (type);
4013 if (user_convention)
4015 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4016 sorry ("non-AAPCS derived PCS variant");
4017 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4018 error ("variadic functions must use the base AAPCS variant");
4022 return ARM_PCS_AAPCS;
4023 else if (user_convention)
4025 else if (decl && flag_unit_at_a_time)
4027 /* Local functions never leak outside this compilation unit,
4028 so we are free to use whatever conventions are
4030 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4031 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4033 return ARM_PCS_AAPCS_LOCAL;
4036 else if (user_convention && user_pcs != arm_pcs_default)
4037 sorry ("PCS variant");
4039 /* For everything else we use the target's default. */
4040 return arm_pcs_default;
4045 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4046 const_tree fntype ATTRIBUTE_UNUSED,
4047 rtx libcall ATTRIBUTE_UNUSED,
4048 const_tree fndecl ATTRIBUTE_UNUSED)
4050 /* Record the unallocated VFP registers. */
4051 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4052 pcum->aapcs_vfp_reg_alloc = 0;
4055 /* Walk down the type tree of TYPE counting consecutive base elements.
4056 If *MODEP is VOIDmode, then set it to the first valid floating point
4057 type. If a non-floating point type is found, or if a floating point
4058 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4059 otherwise return the count in the sub-tree. */
4061 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4063 enum machine_mode mode;
4066 switch (TREE_CODE (type))
4069 mode = TYPE_MODE (type);
4070 if (mode != DFmode && mode != SFmode)
4073 if (*modep == VOIDmode)
4082 mode = TYPE_MODE (TREE_TYPE (type));
4083 if (mode != DFmode && mode != SFmode)
4086 if (*modep == VOIDmode)
4095 /* Use V2SImode and V4SImode as representatives of all 64-bit
4096 and 128-bit vector types, whether or not those modes are
4097 supported with the present options. */
4098 size = int_size_in_bytes (type);
4111 if (*modep == VOIDmode)
4114 /* Vector modes are considered to be opaque: two vectors are
4115 equivalent for the purposes of being homogeneous aggregates
4116 if they are the same size. */
4125 tree index = TYPE_DOMAIN (type);
4127 /* Can't handle incomplete types. */
4128 if (!COMPLETE_TYPE_P(type))
4131 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4134 || !TYPE_MAX_VALUE (index)
4135 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4136 || !TYPE_MIN_VALUE (index)
4137 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4141 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4142 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4144 /* There must be no padding. */
4145 if (!host_integerp (TYPE_SIZE (type), 1)
4146 || (tree_low_cst (TYPE_SIZE (type), 1)
4147 != count * GET_MODE_BITSIZE (*modep)))
4159 /* Can't handle incomplete types. */
4160 if (!COMPLETE_TYPE_P(type))
4163 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4165 if (TREE_CODE (field) != FIELD_DECL)
4168 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4174 /* There must be no padding. */
4175 if (!host_integerp (TYPE_SIZE (type), 1)
4176 || (tree_low_cst (TYPE_SIZE (type), 1)
4177 != count * GET_MODE_BITSIZE (*modep)))
4184 case QUAL_UNION_TYPE:
4186 /* These aren't very interesting except in a degenerate case. */
4191 /* Can't handle incomplete types. */
4192 if (!COMPLETE_TYPE_P(type))
4195 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4197 if (TREE_CODE (field) != FIELD_DECL)
4200 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4203 count = count > sub_count ? count : sub_count;
4206 /* There must be no padding. */
4207 if (!host_integerp (TYPE_SIZE (type), 1)
4208 || (tree_low_cst (TYPE_SIZE (type), 1)
4209 != count * GET_MODE_BITSIZE (*modep)))
4222 /* Return true if PCS_VARIANT should use VFP registers. */
4224 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4226 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4228 static bool seen_thumb1_vfp = false;
4230 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4232 sorry ("Thumb-1 hard-float VFP ABI");
4233 /* sorry() is not immediately fatal, so only display this once. */
4234 seen_thumb1_vfp = true;
4240 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4243 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4244 (TARGET_VFP_DOUBLE || !is_double));
4248 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4249 enum machine_mode mode, const_tree type,
4250 enum machine_mode *base_mode, int *count)
4252 enum machine_mode new_mode = VOIDmode;
4254 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4255 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4256 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4261 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4264 new_mode = (mode == DCmode ? DFmode : SFmode);
4266 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4268 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4270 if (ag_count > 0 && ag_count <= 4)
4279 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4282 *base_mode = new_mode;
4287 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4288 enum machine_mode mode, const_tree type)
4290 int count ATTRIBUTE_UNUSED;
4291 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4293 if (!use_vfp_abi (pcs_variant, false))
4295 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4300 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4303 if (!use_vfp_abi (pcum->pcs_variant, false))
4306 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4307 &pcum->aapcs_vfp_rmode,
4308 &pcum->aapcs_vfp_rcount);
4312 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4313 const_tree type ATTRIBUTE_UNUSED)
4315 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4316 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4319 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4320 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4322 pcum->aapcs_vfp_reg_alloc = mask << regno;
4323 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4326 int rcount = pcum->aapcs_vfp_rcount;
4328 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4332 /* Avoid using unsupported vector modes. */
4333 if (rmode == V2SImode)
4335 else if (rmode == V4SImode)
4342 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4343 for (i = 0; i < rcount; i++)
4345 rtx tmp = gen_rtx_REG (rmode,
4346 FIRST_VFP_REGNUM + regno + i * rshift);
4347 tmp = gen_rtx_EXPR_LIST
4349 GEN_INT (i * GET_MODE_SIZE (rmode)));
4350 XVECEXP (par, 0, i) = tmp;
4353 pcum->aapcs_reg = par;
4356 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4363 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4364 enum machine_mode mode,
4365 const_tree type ATTRIBUTE_UNUSED)
4367 if (!use_vfp_abi (pcs_variant, false))
4370 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4373 enum machine_mode ag_mode;
4378 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4383 if (ag_mode == V2SImode)
4385 else if (ag_mode == V4SImode)
4391 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4392 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4393 for (i = 0; i < count; i++)
4395 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4396 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4397 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4398 XVECEXP (par, 0, i) = tmp;
4404 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4408 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4409 enum machine_mode mode ATTRIBUTE_UNUSED,
4410 const_tree type ATTRIBUTE_UNUSED)
4412 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4413 pcum->aapcs_vfp_reg_alloc = 0;
4417 #define AAPCS_CP(X) \
4419 aapcs_ ## X ## _cum_init, \
4420 aapcs_ ## X ## _is_call_candidate, \
4421 aapcs_ ## X ## _allocate, \
4422 aapcs_ ## X ## _is_return_candidate, \
4423 aapcs_ ## X ## _allocate_return_reg, \
4424 aapcs_ ## X ## _advance \
4427 /* Table of co-processors that can be used to pass arguments in
4428 registers. Idealy no arugment should be a candidate for more than
4429 one co-processor table entry, but the table is processed in order
4430 and stops after the first match. If that entry then fails to put
4431 the argument into a co-processor register, the argument will go on
4435 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4436 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4438 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4439 BLKmode) is a candidate for this co-processor's registers; this
4440 function should ignore any position-dependent state in
4441 CUMULATIVE_ARGS and only use call-type dependent information. */
4442 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4444 /* Return true if the argument does get a co-processor register; it
4445 should set aapcs_reg to an RTX of the register allocated as is
4446 required for a return from FUNCTION_ARG. */
4447 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4449 /* Return true if a result of mode MODE (or type TYPE if MODE is
4450 BLKmode) is can be returned in this co-processor's registers. */
4451 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4453 /* Allocate and return an RTX element to hold the return type of a
4454 call, this routine must not fail and will only be called if
4455 is_return_candidate returned true with the same parameters. */
4456 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4458 /* Finish processing this argument and prepare to start processing
4460 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4461 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4469 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4474 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4475 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4482 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4484 /* We aren't passed a decl, so we can't check that a call is local.
4485 However, it isn't clear that that would be a win anyway, since it
4486 might limit some tail-calling opportunities. */
4487 enum arm_pcs pcs_variant;
4491 const_tree fndecl = NULL_TREE;
4493 if (TREE_CODE (fntype) == FUNCTION_DECL)
4496 fntype = TREE_TYPE (fntype);
4499 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4502 pcs_variant = arm_pcs_default;
4504 if (pcs_variant != ARM_PCS_AAPCS)
4508 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4509 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4518 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4521 /* We aren't passed a decl, so we can't check that a call is local.
4522 However, it isn't clear that that would be a win anyway, since it
4523 might limit some tail-calling opportunities. */
4524 enum arm_pcs pcs_variant;
4525 int unsignedp ATTRIBUTE_UNUSED;
4529 const_tree fndecl = NULL_TREE;
4531 if (TREE_CODE (fntype) == FUNCTION_DECL)
4534 fntype = TREE_TYPE (fntype);
4537 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4540 pcs_variant = arm_pcs_default;
4542 /* Promote integer types. */
4543 if (type && INTEGRAL_TYPE_P (type))
4544 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4546 if (pcs_variant != ARM_PCS_AAPCS)
4550 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4551 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4553 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4557 /* Promotes small structs returned in a register to full-word size
4558 for big-endian AAPCS. */
4559 if (type && arm_return_in_msb (type))
4561 HOST_WIDE_INT size = int_size_in_bytes (type);
4562 if (size % UNITS_PER_WORD != 0)
4564 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4565 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4569 return gen_rtx_REG (mode, R0_REGNUM);
4573 aapcs_libcall_value (enum machine_mode mode)
4575 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4576 && GET_MODE_SIZE (mode) <= 4)
4579 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4582 /* Lay out a function argument using the AAPCS rules. The rule
4583 numbers referred to here are those in the AAPCS. */
4585 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4586 const_tree type, bool named)
4591 /* We only need to do this once per argument. */
4592 if (pcum->aapcs_arg_processed)
4595 pcum->aapcs_arg_processed = true;
4597 /* Special case: if named is false then we are handling an incoming
4598 anonymous argument which is on the stack. */
4602 /* Is this a potential co-processor register candidate? */
4603 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4605 int slot = aapcs_select_call_coproc (pcum, mode, type);
4606 pcum->aapcs_cprc_slot = slot;
4608 /* We don't have to apply any of the rules from part B of the
4609 preparation phase, these are handled elsewhere in the
4614 /* A Co-processor register candidate goes either in its own
4615 class of registers or on the stack. */
4616 if (!pcum->aapcs_cprc_failed[slot])
4618 /* C1.cp - Try to allocate the argument to co-processor
4620 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4623 /* C2.cp - Put the argument on the stack and note that we
4624 can't assign any more candidates in this slot. We also
4625 need to note that we have allocated stack space, so that
4626 we won't later try to split a non-cprc candidate between
4627 core registers and the stack. */
4628 pcum->aapcs_cprc_failed[slot] = true;
4629 pcum->can_split = false;
4632 /* We didn't get a register, so this argument goes on the
4634 gcc_assert (pcum->can_split == false);
4639 /* C3 - For double-word aligned arguments, round the NCRN up to the
4640 next even number. */
4641 ncrn = pcum->aapcs_ncrn;
4642 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4645 nregs = ARM_NUM_REGS2(mode, type);
4647 /* Sigh, this test should really assert that nregs > 0, but a GCC
4648 extension allows empty structs and then gives them empty size; it
4649 then allows such a structure to be passed by value. For some of
4650 the code below we have to pretend that such an argument has
4651 non-zero size so that we 'locate' it correctly either in
4652 registers or on the stack. */
4653 gcc_assert (nregs >= 0);
4655 nregs2 = nregs ? nregs : 1;
4657 /* C4 - Argument fits entirely in core registers. */
4658 if (ncrn + nregs2 <= NUM_ARG_REGS)
4660 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4661 pcum->aapcs_next_ncrn = ncrn + nregs;
4665 /* C5 - Some core registers left and there are no arguments already
4666 on the stack: split this argument between the remaining core
4667 registers and the stack. */
4668 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4670 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4671 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4672 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4676 /* C6 - NCRN is set to 4. */
4677 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4679 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4683 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4684 for a call to a function whose data type is FNTYPE.
4685 For a library call, FNTYPE is NULL. */
4687 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4689 tree fndecl ATTRIBUTE_UNUSED)
4691 /* Long call handling. */
4693 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4695 pcum->pcs_variant = arm_pcs_default;
4697 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4699 if (arm_libcall_uses_aapcs_base (libname))
4700 pcum->pcs_variant = ARM_PCS_AAPCS;
4702 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4703 pcum->aapcs_reg = NULL_RTX;
4704 pcum->aapcs_partial = 0;
4705 pcum->aapcs_arg_processed = false;
4706 pcum->aapcs_cprc_slot = -1;
4707 pcum->can_split = true;
4709 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4713 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4715 pcum->aapcs_cprc_failed[i] = false;
4716 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4724 /* On the ARM, the offset starts at 0. */
4726 pcum->iwmmxt_nregs = 0;
4727 pcum->can_split = true;
4729 /* Varargs vectors are treated the same as long long.
4730 named_count avoids having to change the way arm handles 'named' */
4731 pcum->named_count = 0;
4734 if (TARGET_REALLY_IWMMXT && fntype)
4738 for (fn_arg = TYPE_ARG_TYPES (fntype);
4740 fn_arg = TREE_CHAIN (fn_arg))
4741 pcum->named_count += 1;
4743 if (! pcum->named_count)
4744 pcum->named_count = INT_MAX;
4749 /* Return true if mode/type need doubleword alignment. */
4751 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4753 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4754 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4758 /* Determine where to put an argument to a function.
4759 Value is zero to push the argument on the stack,
4760 or a hard register in which to store the argument.
4762 MODE is the argument's machine mode.
4763 TYPE is the data type of the argument (as a tree).
4764 This is null for libcalls where that information may
4766 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4767 the preceding args and about the function being called.
4768 NAMED is nonzero if this argument is a named parameter
4769 (otherwise it is an extra parameter matching an ellipsis).
4771 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4772 other arguments are passed on the stack. If (NAMED == 0) (which happens
4773 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4774 defined), say it is passed in the stack (function_prologue will
4775 indeed make it pass in the stack if necessary). */
4778 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4779 const_tree type, bool named)
4781 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4784 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4785 a call insn (op3 of a call_value insn). */
4786 if (mode == VOIDmode)
4789 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4791 aapcs_layout_arg (pcum, mode, type, named);
4792 return pcum->aapcs_reg;
4795 /* Varargs vectors are treated the same as long long.
4796 named_count avoids having to change the way arm handles 'named' */
4797 if (TARGET_IWMMXT_ABI
4798 && arm_vector_mode_supported_p (mode)
4799 && pcum->named_count > pcum->nargs + 1)
4801 if (pcum->iwmmxt_nregs <= 9)
4802 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4805 pcum->can_split = false;
4810 /* Put doubleword aligned quantities in even register pairs. */
4812 && ARM_DOUBLEWORD_ALIGN
4813 && arm_needs_doubleword_align (mode, type))
4816 /* Only allow splitting an arg between regs and memory if all preceding
4817 args were allocated to regs. For args passed by reference we only count
4818 the reference pointer. */
4819 if (pcum->can_split)
4822 nregs = ARM_NUM_REGS2 (mode, type);
4824 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4827 return gen_rtx_REG (mode, pcum->nregs);
4831 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4833 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4834 ? DOUBLEWORD_ALIGNMENT
4839 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4840 tree type, bool named)
4842 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4843 int nregs = pcum->nregs;
4845 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4847 aapcs_layout_arg (pcum, mode, type, named);
4848 return pcum->aapcs_partial;
4851 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4854 if (NUM_ARG_REGS > nregs
4855 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4857 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4862 /* Update the data in PCUM to advance over an argument
4863 of mode MODE and data type TYPE.
4864 (TYPE is null for libcalls where that information may not be available.) */
4867 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4868 const_tree type, bool named)
4870 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4872 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4874 aapcs_layout_arg (pcum, mode, type, named);
4876 if (pcum->aapcs_cprc_slot >= 0)
4878 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4880 pcum->aapcs_cprc_slot = -1;
4883 /* Generic stuff. */
4884 pcum->aapcs_arg_processed = false;
4885 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4886 pcum->aapcs_reg = NULL_RTX;
4887 pcum->aapcs_partial = 0;
4892 if (arm_vector_mode_supported_p (mode)
4893 && pcum->named_count > pcum->nargs
4894 && TARGET_IWMMXT_ABI)
4895 pcum->iwmmxt_nregs += 1;
4897 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4901 /* Variable sized types are passed by reference. This is a GCC
4902 extension to the ARM ABI. */
4905 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4906 enum machine_mode mode ATTRIBUTE_UNUSED,
4907 const_tree type, bool named ATTRIBUTE_UNUSED)
4909 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4912 /* Encode the current state of the #pragma [no_]long_calls. */
4915 OFF, /* No #pragma [no_]long_calls is in effect. */
4916 LONG, /* #pragma long_calls is in effect. */
4917 SHORT /* #pragma no_long_calls is in effect. */
4920 static arm_pragma_enum arm_pragma_long_calls = OFF;
4923 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4925 arm_pragma_long_calls = LONG;
4929 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4931 arm_pragma_long_calls = SHORT;
4935 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4937 arm_pragma_long_calls = OFF;
4940 /* Handle an attribute requiring a FUNCTION_DECL;
4941 arguments as in struct attribute_spec.handler. */
4943 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4944 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4946 if (TREE_CODE (*node) != FUNCTION_DECL)
4948 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4950 *no_add_attrs = true;
4956 /* Handle an "interrupt" or "isr" attribute;
4957 arguments as in struct attribute_spec.handler. */
4959 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4964 if (TREE_CODE (*node) != FUNCTION_DECL)
4966 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4968 *no_add_attrs = true;
4970 /* FIXME: the argument if any is checked for type attributes;
4971 should it be checked for decl ones? */
4975 if (TREE_CODE (*node) == FUNCTION_TYPE
4976 || TREE_CODE (*node) == METHOD_TYPE)
4978 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4980 warning (OPT_Wattributes, "%qE attribute ignored",
4982 *no_add_attrs = true;
4985 else if (TREE_CODE (*node) == POINTER_TYPE
4986 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4987 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4988 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4990 *node = build_variant_type_copy (*node);
4991 TREE_TYPE (*node) = build_type_attribute_variant
4993 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4994 *no_add_attrs = true;
4998 /* Possibly pass this attribute on from the type to a decl. */
4999 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5000 | (int) ATTR_FLAG_FUNCTION_NEXT
5001 | (int) ATTR_FLAG_ARRAY_NEXT))
5003 *no_add_attrs = true;
5004 return tree_cons (name, args, NULL_TREE);
5008 warning (OPT_Wattributes, "%qE attribute ignored",
5017 /* Handle a "pcs" attribute; arguments as in struct
5018 attribute_spec.handler. */
5020 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5021 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5023 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5025 warning (OPT_Wattributes, "%qE attribute ignored", name);
5026 *no_add_attrs = true;
5031 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5032 /* Handle the "notshared" attribute. This attribute is another way of
5033 requesting hidden visibility. ARM's compiler supports
5034 "__declspec(notshared)"; we support the same thing via an
5038 arm_handle_notshared_attribute (tree *node,
5039 tree name ATTRIBUTE_UNUSED,
5040 tree args ATTRIBUTE_UNUSED,
5041 int flags ATTRIBUTE_UNUSED,
5044 tree decl = TYPE_NAME (*node);
5048 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5049 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5050 *no_add_attrs = false;
5056 /* Return 0 if the attributes for two types are incompatible, 1 if they
5057 are compatible, and 2 if they are nearly compatible (which causes a
5058 warning to be generated). */
5060 arm_comp_type_attributes (const_tree type1, const_tree type2)
5064 /* Check for mismatch of non-default calling convention. */
5065 if (TREE_CODE (type1) != FUNCTION_TYPE)
5068 /* Check for mismatched call attributes. */
5069 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5070 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5071 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5072 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5074 /* Only bother to check if an attribute is defined. */
5075 if (l1 | l2 | s1 | s2)
5077 /* If one type has an attribute, the other must have the same attribute. */
5078 if ((l1 != l2) || (s1 != s2))
5081 /* Disallow mixed attributes. */
5082 if ((l1 & s2) || (l2 & s1))
5086 /* Check for mismatched ISR attribute. */
5087 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5089 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5090 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5092 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5099 /* Assigns default attributes to newly defined type. This is used to
5100 set short_call/long_call attributes for function types of
5101 functions defined inside corresponding #pragma scopes. */
5103 arm_set_default_type_attributes (tree type)
5105 /* Add __attribute__ ((long_call)) to all functions, when
5106 inside #pragma long_calls or __attribute__ ((short_call)),
5107 when inside #pragma no_long_calls. */
5108 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5110 tree type_attr_list, attr_name;
5111 type_attr_list = TYPE_ATTRIBUTES (type);
5113 if (arm_pragma_long_calls == LONG)
5114 attr_name = get_identifier ("long_call");
5115 else if (arm_pragma_long_calls == SHORT)
5116 attr_name = get_identifier ("short_call");
5120 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5121 TYPE_ATTRIBUTES (type) = type_attr_list;
5125 /* Return true if DECL is known to be linked into section SECTION. */
5128 arm_function_in_section_p (tree decl, section *section)
5130 /* We can only be certain about functions defined in the same
5131 compilation unit. */
5132 if (!TREE_STATIC (decl))
5135 /* Make sure that SYMBOL always binds to the definition in this
5136 compilation unit. */
5137 if (!targetm.binds_local_p (decl))
5140 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5141 if (!DECL_SECTION_NAME (decl))
5143 /* Make sure that we will not create a unique section for DECL. */
5144 if (flag_function_sections || DECL_ONE_ONLY (decl))
5148 return function_section (decl) == section;
5151 /* Return nonzero if a 32-bit "long_call" should be generated for
5152 a call from the current function to DECL. We generate a long_call
5155 a. has an __attribute__((long call))
5156 or b. is within the scope of a #pragma long_calls
5157 or c. the -mlong-calls command line switch has been specified
5159 However we do not generate a long call if the function:
5161 d. has an __attribute__ ((short_call))
5162 or e. is inside the scope of a #pragma no_long_calls
5163 or f. is defined in the same section as the current function. */
5166 arm_is_long_call_p (tree decl)
5171 return TARGET_LONG_CALLS;
5173 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5174 if (lookup_attribute ("short_call", attrs))
5177 /* For "f", be conservative, and only cater for cases in which the
5178 whole of the current function is placed in the same section. */
5179 if (!flag_reorder_blocks_and_partition
5180 && TREE_CODE (decl) == FUNCTION_DECL
5181 && arm_function_in_section_p (decl, current_function_section ()))
5184 if (lookup_attribute ("long_call", attrs))
5187 return TARGET_LONG_CALLS;
5190 /* Return nonzero if it is ok to make a tail-call to DECL. */
5192 arm_function_ok_for_sibcall (tree decl, tree exp)
5194 unsigned long func_type;
5196 if (cfun->machine->sibcall_blocked)
5199 /* Never tailcall something for which we have no decl, or if we
5200 are generating code for Thumb-1. */
5201 if (decl == NULL || TARGET_THUMB1)
5204 /* The PIC register is live on entry to VxWorks PLT entries, so we
5205 must make the call before restoring the PIC register. */
5206 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5209 /* Cannot tail-call to long calls, since these are out of range of
5210 a branch instruction. */
5211 if (arm_is_long_call_p (decl))
5214 /* If we are interworking and the function is not declared static
5215 then we can't tail-call it unless we know that it exists in this
5216 compilation unit (since it might be a Thumb routine). */
5217 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5220 func_type = arm_current_func_type ();
5221 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5222 if (IS_INTERRUPT (func_type))
5225 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5227 /* Check that the return value locations are the same. For
5228 example that we aren't returning a value from the sibling in
5229 a VFP register but then need to transfer it to a core
5233 a = arm_function_value (TREE_TYPE (exp), decl, false);
5234 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5236 if (!rtx_equal_p (a, b))
5240 /* Never tailcall if function may be called with a misaligned SP. */
5241 if (IS_STACKALIGN (func_type))
5244 /* Everything else is ok. */
5249 /* Addressing mode support functions. */
5251 /* Return nonzero if X is a legitimate immediate operand when compiling
5252 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5254 legitimate_pic_operand_p (rtx x)
5256 if (GET_CODE (x) == SYMBOL_REF
5257 || (GET_CODE (x) == CONST
5258 && GET_CODE (XEXP (x, 0)) == PLUS
5259 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5265 /* Record that the current function needs a PIC register. Initialize
5266 cfun->machine->pic_reg if we have not already done so. */
5269 require_pic_register (void)
5271 /* A lot of the logic here is made obscure by the fact that this
5272 routine gets called as part of the rtx cost estimation process.
5273 We don't want those calls to affect any assumptions about the real
5274 function; and further, we can't call entry_of_function() until we
5275 start the real expansion process. */
5276 if (!crtl->uses_pic_offset_table)
5278 gcc_assert (can_create_pseudo_p ());
5279 if (arm_pic_register != INVALID_REGNUM)
5281 if (!cfun->machine->pic_reg)
5282 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5284 /* Play games to avoid marking the function as needing pic
5285 if we are being called as part of the cost-estimation
5287 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5288 crtl->uses_pic_offset_table = 1;
5294 if (!cfun->machine->pic_reg)
5295 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5297 /* Play games to avoid marking the function as needing pic
5298 if we are being called as part of the cost-estimation
5300 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5302 crtl->uses_pic_offset_table = 1;
5305 arm_load_pic_register (0UL);
5310 for (insn = seq; insn; insn = NEXT_INSN (insn))
5312 INSN_LOCATOR (insn) = prologue_locator;
5314 /* We can be called during expansion of PHI nodes, where
5315 we can't yet emit instructions directly in the final
5316 insn stream. Queue the insns on the entry edge, they will
5317 be committed after everything else is expanded. */
5318 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5325 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5327 if (GET_CODE (orig) == SYMBOL_REF
5328 || GET_CODE (orig) == LABEL_REF)
5334 gcc_assert (can_create_pseudo_p ());
5335 reg = gen_reg_rtx (Pmode);
5338 /* VxWorks does not impose a fixed gap between segments; the run-time
5339 gap can be different from the object-file gap. We therefore can't
5340 use GOTOFF unless we are absolutely sure that the symbol is in the
5341 same segment as the GOT. Unfortunately, the flexibility of linker
5342 scripts means that we can't be sure of that in general, so assume
5343 that GOTOFF is never valid on VxWorks. */
5344 if ((GET_CODE (orig) == LABEL_REF
5345 || (GET_CODE (orig) == SYMBOL_REF &&
5346 SYMBOL_REF_LOCAL_P (orig)))
5348 && !TARGET_VXWORKS_RTP)
5349 insn = arm_pic_static_addr (orig, reg);
5355 /* If this function doesn't have a pic register, create one now. */
5356 require_pic_register ();
5358 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5360 /* Make the MEM as close to a constant as possible. */
5361 mem = SET_SRC (pat);
5362 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5363 MEM_READONLY_P (mem) = 1;
5364 MEM_NOTRAP_P (mem) = 1;
5366 insn = emit_insn (pat);
5369 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5371 set_unique_reg_note (insn, REG_EQUAL, orig);
5375 else if (GET_CODE (orig) == CONST)
5379 if (GET_CODE (XEXP (orig, 0)) == PLUS
5380 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5383 /* Handle the case where we have: const (UNSPEC_TLS). */
5384 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5385 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5388 /* Handle the case where we have:
5389 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5391 if (GET_CODE (XEXP (orig, 0)) == PLUS
5392 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5393 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5395 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5401 gcc_assert (can_create_pseudo_p ());
5402 reg = gen_reg_rtx (Pmode);
5405 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5407 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5408 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5409 base == reg ? 0 : reg);
5411 if (GET_CODE (offset) == CONST_INT)
5413 /* The base register doesn't really matter, we only want to
5414 test the index for the appropriate mode. */
5415 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5417 gcc_assert (can_create_pseudo_p ());
5418 offset = force_reg (Pmode, offset);
5421 if (GET_CODE (offset) == CONST_INT)
5422 return plus_constant (base, INTVAL (offset));
5425 if (GET_MODE_SIZE (mode) > 4
5426 && (GET_MODE_CLASS (mode) == MODE_INT
5427 || TARGET_SOFT_FLOAT))
5429 emit_insn (gen_addsi3 (reg, base, offset));
5433 return gen_rtx_PLUS (Pmode, base, offset);
5440 /* Find a spare register to use during the prolog of a function. */
5443 thumb_find_work_register (unsigned long pushed_regs_mask)
5447 /* Check the argument registers first as these are call-used. The
5448 register allocation order means that sometimes r3 might be used
5449 but earlier argument registers might not, so check them all. */
5450 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5451 if (!df_regs_ever_live_p (reg))
5454 /* Before going on to check the call-saved registers we can try a couple
5455 more ways of deducing that r3 is available. The first is when we are
5456 pushing anonymous arguments onto the stack and we have less than 4
5457 registers worth of fixed arguments(*). In this case r3 will be part of
5458 the variable argument list and so we can be sure that it will be
5459 pushed right at the start of the function. Hence it will be available
5460 for the rest of the prologue.
5461 (*): ie crtl->args.pretend_args_size is greater than 0. */
5462 if (cfun->machine->uses_anonymous_args
5463 && crtl->args.pretend_args_size > 0)
5464 return LAST_ARG_REGNUM;
5466 /* The other case is when we have fixed arguments but less than 4 registers
5467 worth. In this case r3 might be used in the body of the function, but
5468 it is not being used to convey an argument into the function. In theory
5469 we could just check crtl->args.size to see how many bytes are
5470 being passed in argument registers, but it seems that it is unreliable.
5471 Sometimes it will have the value 0 when in fact arguments are being
5472 passed. (See testcase execute/20021111-1.c for an example). So we also
5473 check the args_info.nregs field as well. The problem with this field is
5474 that it makes no allowances for arguments that are passed to the
5475 function but which are not used. Hence we could miss an opportunity
5476 when a function has an unused argument in r3. But it is better to be
5477 safe than to be sorry. */
5478 if (! cfun->machine->uses_anonymous_args
5479 && crtl->args.size >= 0
5480 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5481 && crtl->args.info.nregs < 4)
5482 return LAST_ARG_REGNUM;
5484 /* Otherwise look for a call-saved register that is going to be pushed. */
5485 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5486 if (pushed_regs_mask & (1 << reg))
5491 /* Thumb-2 can use high regs. */
5492 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5493 if (pushed_regs_mask & (1 << reg))
5496 /* Something went wrong - thumb_compute_save_reg_mask()
5497 should have arranged for a suitable register to be pushed. */
5501 static GTY(()) int pic_labelno;
5503 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5507 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5509 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5511 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5514 gcc_assert (flag_pic);
5516 pic_reg = cfun->machine->pic_reg;
5517 if (TARGET_VXWORKS_RTP)
5519 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5520 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5521 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5523 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5525 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5526 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5530 /* We use an UNSPEC rather than a LABEL_REF because this label
5531 never appears in the code stream. */
5533 labelno = GEN_INT (pic_labelno++);
5534 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5535 l1 = gen_rtx_CONST (VOIDmode, l1);
5537 /* On the ARM the PC register contains 'dot + 8' at the time of the
5538 addition, on the Thumb it is 'dot + 4'. */
5539 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5540 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5542 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5546 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5548 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5550 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5552 else /* TARGET_THUMB1 */
5554 if (arm_pic_register != INVALID_REGNUM
5555 && REGNO (pic_reg) > LAST_LO_REGNUM)
5557 /* We will have pushed the pic register, so we should always be
5558 able to find a work register. */
5559 pic_tmp = gen_rtx_REG (SImode,
5560 thumb_find_work_register (saved_regs));
5561 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5562 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5565 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5566 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5570 /* Need to emit this whether or not we obey regdecls,
5571 since setjmp/longjmp can cause life info to screw up. */
5575 /* Generate code to load the address of a static var when flag_pic is set. */
5577 arm_pic_static_addr (rtx orig, rtx reg)
5579 rtx l1, labelno, offset_rtx, insn;
5581 gcc_assert (flag_pic);
5583 /* We use an UNSPEC rather than a LABEL_REF because this label
5584 never appears in the code stream. */
5585 labelno = GEN_INT (pic_labelno++);
5586 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5587 l1 = gen_rtx_CONST (VOIDmode, l1);
5589 /* On the ARM the PC register contains 'dot + 8' at the time of the
5590 addition, on the Thumb it is 'dot + 4'. */
5591 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5592 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5593 UNSPEC_SYMBOL_OFFSET);
5594 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5598 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5600 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5602 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5604 else /* TARGET_THUMB1 */
5606 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5607 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5613 /* Return nonzero if X is valid as an ARM state addressing register. */
5615 arm_address_register_rtx_p (rtx x, int strict_p)
5619 if (GET_CODE (x) != REG)
5625 return ARM_REGNO_OK_FOR_BASE_P (regno);
5627 return (regno <= LAST_ARM_REGNUM
5628 || regno >= FIRST_PSEUDO_REGISTER
5629 || regno == FRAME_POINTER_REGNUM
5630 || regno == ARG_POINTER_REGNUM);
5633 /* Return TRUE if this rtx is the difference of a symbol and a label,
5634 and will reduce to a PC-relative relocation in the object file.
5635 Expressions like this can be left alone when generating PIC, rather
5636 than forced through the GOT. */
5638 pcrel_constant_p (rtx x)
5640 if (GET_CODE (x) == MINUS)
5641 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5646 /* Return true if X will surely end up in an index register after next
5649 will_be_in_index_register (const_rtx x)
5651 /* arm.md: calculate_pic_address will split this into a register. */
5652 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5655 /* Return nonzero if X is a valid ARM state address operand. */
5657 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5661 enum rtx_code code = GET_CODE (x);
5663 if (arm_address_register_rtx_p (x, strict_p))
5666 use_ldrd = (TARGET_LDRD
5668 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5670 if (code == POST_INC || code == PRE_DEC
5671 || ((code == PRE_INC || code == POST_DEC)
5672 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5673 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5675 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5676 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5677 && GET_CODE (XEXP (x, 1)) == PLUS
5678 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5680 rtx addend = XEXP (XEXP (x, 1), 1);
5682 /* Don't allow ldrd post increment by register because it's hard
5683 to fixup invalid register choices. */
5685 && GET_CODE (x) == POST_MODIFY
5686 && GET_CODE (addend) == REG)
5689 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5690 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5693 /* After reload constants split into minipools will have addresses
5694 from a LABEL_REF. */
5695 else if (reload_completed
5696 && (code == LABEL_REF
5698 && GET_CODE (XEXP (x, 0)) == PLUS
5699 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5700 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5703 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5706 else if (code == PLUS)
5708 rtx xop0 = XEXP (x, 0);
5709 rtx xop1 = XEXP (x, 1);
5711 return ((arm_address_register_rtx_p (xop0, strict_p)
5712 && ((GET_CODE(xop1) == CONST_INT
5713 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5714 || (!strict_p && will_be_in_index_register (xop1))))
5715 || (arm_address_register_rtx_p (xop1, strict_p)
5716 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5720 /* Reload currently can't handle MINUS, so disable this for now */
5721 else if (GET_CODE (x) == MINUS)
5723 rtx xop0 = XEXP (x, 0);
5724 rtx xop1 = XEXP (x, 1);
5726 return (arm_address_register_rtx_p (xop0, strict_p)
5727 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5731 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5732 && code == SYMBOL_REF
5733 && CONSTANT_POOL_ADDRESS_P (x)
5735 && symbol_mentioned_p (get_pool_constant (x))
5736 && ! pcrel_constant_p (get_pool_constant (x))))
5742 /* Return nonzero if X is a valid Thumb-2 address operand. */
5744 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5747 enum rtx_code code = GET_CODE (x);
5749 if (arm_address_register_rtx_p (x, strict_p))
5752 use_ldrd = (TARGET_LDRD
5754 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5756 if (code == POST_INC || code == PRE_DEC
5757 || ((code == PRE_INC || code == POST_DEC)
5758 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5759 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5761 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5762 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5763 && GET_CODE (XEXP (x, 1)) == PLUS
5764 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5766 /* Thumb-2 only has autoincrement by constant. */
5767 rtx addend = XEXP (XEXP (x, 1), 1);
5768 HOST_WIDE_INT offset;
5770 if (GET_CODE (addend) != CONST_INT)
5773 offset = INTVAL(addend);
5774 if (GET_MODE_SIZE (mode) <= 4)
5775 return (offset > -256 && offset < 256);
5777 return (use_ldrd && offset > -1024 && offset < 1024
5778 && (offset & 3) == 0);
5781 /* After reload constants split into minipools will have addresses
5782 from a LABEL_REF. */
5783 else if (reload_completed
5784 && (code == LABEL_REF
5786 && GET_CODE (XEXP (x, 0)) == PLUS
5787 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5788 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5791 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5794 else if (code == PLUS)
5796 rtx xop0 = XEXP (x, 0);
5797 rtx xop1 = XEXP (x, 1);
5799 return ((arm_address_register_rtx_p (xop0, strict_p)
5800 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5801 || (!strict_p && will_be_in_index_register (xop1))))
5802 || (arm_address_register_rtx_p (xop1, strict_p)
5803 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5806 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5807 && code == SYMBOL_REF
5808 && CONSTANT_POOL_ADDRESS_P (x)
5810 && symbol_mentioned_p (get_pool_constant (x))
5811 && ! pcrel_constant_p (get_pool_constant (x))))
5817 /* Return nonzero if INDEX is valid for an address index operand in
5820 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5823 HOST_WIDE_INT range;
5824 enum rtx_code code = GET_CODE (index);
5826 /* Standard coprocessor addressing modes. */
5827 if (TARGET_HARD_FLOAT
5828 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5829 && (mode == SFmode || mode == DFmode
5830 || (TARGET_MAVERICK && mode == DImode)))
5831 return (code == CONST_INT && INTVAL (index) < 1024
5832 && INTVAL (index) > -1024
5833 && (INTVAL (index) & 3) == 0);
5835 /* For quad modes, we restrict the constant offset to be slightly less
5836 than what the instruction format permits. We do this because for
5837 quad mode moves, we will actually decompose them into two separate
5838 double-mode reads or writes. INDEX must therefore be a valid
5839 (double-mode) offset and so should INDEX+8. */
5840 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5841 return (code == CONST_INT
5842 && INTVAL (index) < 1016
5843 && INTVAL (index) > -1024
5844 && (INTVAL (index) & 3) == 0);
5846 /* We have no such constraint on double mode offsets, so we permit the
5847 full range of the instruction format. */
5848 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5849 return (code == CONST_INT
5850 && INTVAL (index) < 1024
5851 && INTVAL (index) > -1024
5852 && (INTVAL (index) & 3) == 0);
5854 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5855 return (code == CONST_INT
5856 && INTVAL (index) < 1024
5857 && INTVAL (index) > -1024
5858 && (INTVAL (index) & 3) == 0);
5860 if (arm_address_register_rtx_p (index, strict_p)
5861 && (GET_MODE_SIZE (mode) <= 4))
5864 if (mode == DImode || mode == DFmode)
5866 if (code == CONST_INT)
5868 HOST_WIDE_INT val = INTVAL (index);
5871 return val > -256 && val < 256;
5873 return val > -4096 && val < 4092;
5876 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5879 if (GET_MODE_SIZE (mode) <= 4
5883 || (mode == QImode && outer == SIGN_EXTEND))))
5887 rtx xiop0 = XEXP (index, 0);
5888 rtx xiop1 = XEXP (index, 1);
5890 return ((arm_address_register_rtx_p (xiop0, strict_p)
5891 && power_of_two_operand (xiop1, SImode))
5892 || (arm_address_register_rtx_p (xiop1, strict_p)
5893 && power_of_two_operand (xiop0, SImode)));
5895 else if (code == LSHIFTRT || code == ASHIFTRT
5896 || code == ASHIFT || code == ROTATERT)
5898 rtx op = XEXP (index, 1);
5900 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5901 && GET_CODE (op) == CONST_INT
5903 && INTVAL (op) <= 31);
5907 /* For ARM v4 we may be doing a sign-extend operation during the
5913 || (outer == SIGN_EXTEND && mode == QImode))
5919 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5921 return (code == CONST_INT
5922 && INTVAL (index) < range
5923 && INTVAL (index) > -range);
5926 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5927 index operand. i.e. 1, 2, 4 or 8. */
5929 thumb2_index_mul_operand (rtx op)
5933 if (GET_CODE(op) != CONST_INT)
5937 return (val == 1 || val == 2 || val == 4 || val == 8);
5940 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5942 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5944 enum rtx_code code = GET_CODE (index);
5946 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5947 /* Standard coprocessor addressing modes. */
5948 if (TARGET_HARD_FLOAT
5949 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5950 && (mode == SFmode || mode == DFmode
5951 || (TARGET_MAVERICK && mode == DImode)))
5952 return (code == CONST_INT && INTVAL (index) < 1024
5953 /* Thumb-2 allows only > -256 index range for it's core register
5954 load/stores. Since we allow SF/DF in core registers, we have
5955 to use the intersection between -256~4096 (core) and -1024~1024
5957 && INTVAL (index) > -256
5958 && (INTVAL (index) & 3) == 0);
5960 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5962 /* For DImode assume values will usually live in core regs
5963 and only allow LDRD addressing modes. */
5964 if (!TARGET_LDRD || mode != DImode)
5965 return (code == CONST_INT
5966 && INTVAL (index) < 1024
5967 && INTVAL (index) > -1024
5968 && (INTVAL (index) & 3) == 0);
5971 /* For quad modes, we restrict the constant offset to be slightly less
5972 than what the instruction format permits. We do this because for
5973 quad mode moves, we will actually decompose them into two separate
5974 double-mode reads or writes. INDEX must therefore be a valid
5975 (double-mode) offset and so should INDEX+8. */
5976 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5977 return (code == CONST_INT
5978 && INTVAL (index) < 1016
5979 && INTVAL (index) > -1024
5980 && (INTVAL (index) & 3) == 0);
5982 /* We have no such constraint on double mode offsets, so we permit the
5983 full range of the instruction format. */
5984 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5985 return (code == CONST_INT
5986 && INTVAL (index) < 1024
5987 && INTVAL (index) > -1024
5988 && (INTVAL (index) & 3) == 0);
5990 if (arm_address_register_rtx_p (index, strict_p)
5991 && (GET_MODE_SIZE (mode) <= 4))
5994 if (mode == DImode || mode == DFmode)
5996 if (code == CONST_INT)
5998 HOST_WIDE_INT val = INTVAL (index);
5999 /* ??? Can we assume ldrd for thumb2? */
6000 /* Thumb-2 ldrd only has reg+const addressing modes. */
6001 /* ldrd supports offsets of +-1020.
6002 However the ldr fallback does not. */
6003 return val > -256 && val < 256 && (val & 3) == 0;
6011 rtx xiop0 = XEXP (index, 0);
6012 rtx xiop1 = XEXP (index, 1);
6014 return ((arm_address_register_rtx_p (xiop0, strict_p)
6015 && thumb2_index_mul_operand (xiop1))
6016 || (arm_address_register_rtx_p (xiop1, strict_p)
6017 && thumb2_index_mul_operand (xiop0)));
6019 else if (code == ASHIFT)
6021 rtx op = XEXP (index, 1);
6023 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6024 && GET_CODE (op) == CONST_INT
6026 && INTVAL (op) <= 3);
6029 return (code == CONST_INT
6030 && INTVAL (index) < 4096
6031 && INTVAL (index) > -256);
6034 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6036 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6040 if (GET_CODE (x) != REG)
6046 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6048 return (regno <= LAST_LO_REGNUM
6049 || regno > LAST_VIRTUAL_REGISTER
6050 || regno == FRAME_POINTER_REGNUM
6051 || (GET_MODE_SIZE (mode) >= 4
6052 && (regno == STACK_POINTER_REGNUM
6053 || regno >= FIRST_PSEUDO_REGISTER
6054 || x == hard_frame_pointer_rtx
6055 || x == arg_pointer_rtx)));
6058 /* Return nonzero if x is a legitimate index register. This is the case
6059 for any base register that can access a QImode object. */
6061 thumb1_index_register_rtx_p (rtx x, int strict_p)
6063 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6066 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6068 The AP may be eliminated to either the SP or the FP, so we use the
6069 least common denominator, e.g. SImode, and offsets from 0 to 64.
6071 ??? Verify whether the above is the right approach.
6073 ??? Also, the FP may be eliminated to the SP, so perhaps that
6074 needs special handling also.
6076 ??? Look at how the mips16 port solves this problem. It probably uses
6077 better ways to solve some of these problems.
6079 Although it is not incorrect, we don't accept QImode and HImode
6080 addresses based on the frame pointer or arg pointer until the
6081 reload pass starts. This is so that eliminating such addresses
6082 into stack based ones won't produce impossible code. */
6084 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6086 /* ??? Not clear if this is right. Experiment. */
6087 if (GET_MODE_SIZE (mode) < 4
6088 && !(reload_in_progress || reload_completed)
6089 && (reg_mentioned_p (frame_pointer_rtx, x)
6090 || reg_mentioned_p (arg_pointer_rtx, x)
6091 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6092 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6093 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6094 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6097 /* Accept any base register. SP only in SImode or larger. */
6098 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6101 /* This is PC relative data before arm_reorg runs. */
6102 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6103 && GET_CODE (x) == SYMBOL_REF
6104 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6107 /* This is PC relative data after arm_reorg runs. */
6108 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6110 && (GET_CODE (x) == LABEL_REF
6111 || (GET_CODE (x) == CONST
6112 && GET_CODE (XEXP (x, 0)) == PLUS
6113 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6114 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6117 /* Post-inc indexing only supported for SImode and larger. */
6118 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6119 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6122 else if (GET_CODE (x) == PLUS)
6124 /* REG+REG address can be any two index registers. */
6125 /* We disallow FRAME+REG addressing since we know that FRAME
6126 will be replaced with STACK, and SP relative addressing only
6127 permits SP+OFFSET. */
6128 if (GET_MODE_SIZE (mode) <= 4
6129 && XEXP (x, 0) != frame_pointer_rtx
6130 && XEXP (x, 1) != frame_pointer_rtx
6131 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6132 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6133 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6136 /* REG+const has 5-7 bit offset for non-SP registers. */
6137 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6138 || XEXP (x, 0) == arg_pointer_rtx)
6139 && GET_CODE (XEXP (x, 1)) == CONST_INT
6140 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6143 /* REG+const has 10-bit offset for SP, but only SImode and
6144 larger is supported. */
6145 /* ??? Should probably check for DI/DFmode overflow here
6146 just like GO_IF_LEGITIMATE_OFFSET does. */
6147 else if (GET_CODE (XEXP (x, 0)) == REG
6148 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6149 && GET_MODE_SIZE (mode) >= 4
6150 && GET_CODE (XEXP (x, 1)) == CONST_INT
6151 && INTVAL (XEXP (x, 1)) >= 0
6152 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6153 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6156 else if (GET_CODE (XEXP (x, 0)) == REG
6157 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6158 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6159 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6160 && REGNO (XEXP (x, 0))
6161 <= LAST_VIRTUAL_POINTER_REGISTER))
6162 && GET_MODE_SIZE (mode) >= 4
6163 && GET_CODE (XEXP (x, 1)) == CONST_INT
6164 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6168 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6169 && GET_MODE_SIZE (mode) == 4
6170 && GET_CODE (x) == SYMBOL_REF
6171 && CONSTANT_POOL_ADDRESS_P (x)
6173 && symbol_mentioned_p (get_pool_constant (x))
6174 && ! pcrel_constant_p (get_pool_constant (x))))
6180 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6181 instruction of mode MODE. */
6183 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6185 switch (GET_MODE_SIZE (mode))
6188 return val >= 0 && val < 32;
6191 return val >= 0 && val < 64 && (val & 1) == 0;
6195 && (val + GET_MODE_SIZE (mode)) <= 128
6201 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6204 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6205 else if (TARGET_THUMB2)
6206 return thumb2_legitimate_address_p (mode, x, strict_p);
6207 else /* if (TARGET_THUMB1) */
6208 return thumb1_legitimate_address_p (mode, x, strict_p);
6211 /* Build the SYMBOL_REF for __tls_get_addr. */
6213 static GTY(()) rtx tls_get_addr_libfunc;
6216 get_tls_get_addr (void)
6218 if (!tls_get_addr_libfunc)
6219 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6220 return tls_get_addr_libfunc;
6224 arm_load_tp (rtx target)
6227 target = gen_reg_rtx (SImode);
6231 /* Can return in any reg. */
6232 emit_insn (gen_load_tp_hard (target));
6236 /* Always returned in r0. Immediately copy the result into a pseudo,
6237 otherwise other uses of r0 (e.g. setting up function arguments) may
6238 clobber the value. */
6242 emit_insn (gen_load_tp_soft ());
6244 tmp = gen_rtx_REG (SImode, 0);
6245 emit_move_insn (target, tmp);
6251 load_tls_operand (rtx x, rtx reg)
6255 if (reg == NULL_RTX)
6256 reg = gen_reg_rtx (SImode);
6258 tmp = gen_rtx_CONST (SImode, x);
6260 emit_move_insn (reg, tmp);
6266 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6268 rtx insns, label, labelno, sum;
6270 gcc_assert (reloc != TLS_DESCSEQ);
6273 labelno = GEN_INT (pic_labelno++);
6274 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6275 label = gen_rtx_CONST (VOIDmode, label);
6277 sum = gen_rtx_UNSPEC (Pmode,
6278 gen_rtvec (4, x, GEN_INT (reloc), label,
6279 GEN_INT (TARGET_ARM ? 8 : 4)),
6281 reg = load_tls_operand (sum, reg);
6284 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6286 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6288 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6289 LCT_PURE, /* LCT_CONST? */
6290 Pmode, 1, reg, Pmode);
6292 insns = get_insns ();
6299 arm_tls_descseq_addr (rtx x, rtx reg)
6301 rtx labelno = GEN_INT (pic_labelno++);
6302 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6303 rtx sum = gen_rtx_UNSPEC (Pmode,
6304 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6305 gen_rtx_CONST (VOIDmode, label),
6306 GEN_INT (!TARGET_ARM)),
6308 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6310 emit_insn (gen_tlscall (x, labelno));
6312 reg = gen_reg_rtx (SImode);
6314 gcc_assert (REGNO (reg) != 0);
6316 emit_move_insn (reg, reg0);
6322 legitimize_tls_address (rtx x, rtx reg)
6324 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6325 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6329 case TLS_MODEL_GLOBAL_DYNAMIC:
6330 if (TARGET_GNU2_TLS)
6332 reg = arm_tls_descseq_addr (x, reg);
6334 tp = arm_load_tp (NULL_RTX);
6336 dest = gen_rtx_PLUS (Pmode, tp, reg);
6340 /* Original scheme */
6341 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6342 dest = gen_reg_rtx (Pmode);
6343 emit_libcall_block (insns, dest, ret, x);
6347 case TLS_MODEL_LOCAL_DYNAMIC:
6348 if (TARGET_GNU2_TLS)
6350 reg = arm_tls_descseq_addr (x, reg);
6352 tp = arm_load_tp (NULL_RTX);
6354 dest = gen_rtx_PLUS (Pmode, tp, reg);
6358 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6360 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6361 share the LDM result with other LD model accesses. */
6362 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6364 dest = gen_reg_rtx (Pmode);
6365 emit_libcall_block (insns, dest, ret, eqv);
6367 /* Load the addend. */
6368 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6369 GEN_INT (TLS_LDO32)),
6371 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6372 dest = gen_rtx_PLUS (Pmode, dest, addend);
6376 case TLS_MODEL_INITIAL_EXEC:
6377 labelno = GEN_INT (pic_labelno++);
6378 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6379 label = gen_rtx_CONST (VOIDmode, label);
6380 sum = gen_rtx_UNSPEC (Pmode,
6381 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6382 GEN_INT (TARGET_ARM ? 8 : 4)),
6384 reg = load_tls_operand (sum, reg);
6387 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6388 else if (TARGET_THUMB2)
6389 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6392 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6393 emit_move_insn (reg, gen_const_mem (SImode, reg));
6396 tp = arm_load_tp (NULL_RTX);
6398 return gen_rtx_PLUS (Pmode, tp, reg);
6400 case TLS_MODEL_LOCAL_EXEC:
6401 tp = arm_load_tp (NULL_RTX);
6403 reg = gen_rtx_UNSPEC (Pmode,
6404 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6406 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6408 return gen_rtx_PLUS (Pmode, tp, reg);
6415 /* Try machine-dependent ways of modifying an illegitimate address
6416 to be legitimate. If we find one, return the new, valid address. */
6418 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6422 /* TODO: legitimize_address for Thumb2. */
6425 return thumb_legitimize_address (x, orig_x, mode);
6428 if (arm_tls_symbol_p (x))
6429 return legitimize_tls_address (x, NULL_RTX);
6431 if (GET_CODE (x) == PLUS)
6433 rtx xop0 = XEXP (x, 0);
6434 rtx xop1 = XEXP (x, 1);
6436 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6437 xop0 = force_reg (SImode, xop0);
6439 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6440 xop1 = force_reg (SImode, xop1);
6442 if (ARM_BASE_REGISTER_RTX_P (xop0)
6443 && GET_CODE (xop1) == CONST_INT)
6445 HOST_WIDE_INT n, low_n;
6449 /* VFP addressing modes actually allow greater offsets, but for
6450 now we just stick with the lowest common denominator. */
6452 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6464 low_n = ((mode) == TImode ? 0
6465 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6469 base_reg = gen_reg_rtx (SImode);
6470 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6471 emit_move_insn (base_reg, val);
6472 x = plus_constant (base_reg, low_n);
6474 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6475 x = gen_rtx_PLUS (SImode, xop0, xop1);
6478 /* XXX We don't allow MINUS any more -- see comment in
6479 arm_legitimate_address_outer_p (). */
6480 else if (GET_CODE (x) == MINUS)
6482 rtx xop0 = XEXP (x, 0);
6483 rtx xop1 = XEXP (x, 1);
6485 if (CONSTANT_P (xop0))
6486 xop0 = force_reg (SImode, xop0);
6488 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6489 xop1 = force_reg (SImode, xop1);
6491 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6492 x = gen_rtx_MINUS (SImode, xop0, xop1);
6495 /* Make sure to take full advantage of the pre-indexed addressing mode
6496 with absolute addresses which often allows for the base register to
6497 be factorized for multiple adjacent memory references, and it might
6498 even allows for the mini pool to be avoided entirely. */
6499 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6502 HOST_WIDE_INT mask, base, index;
6505 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6506 use a 8-bit index. So let's use a 12-bit index for SImode only and
6507 hope that arm_gen_constant will enable ldrb to use more bits. */
6508 bits = (mode == SImode) ? 12 : 8;
6509 mask = (1 << bits) - 1;
6510 base = INTVAL (x) & ~mask;
6511 index = INTVAL (x) & mask;
6512 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6514 /* It'll most probably be more efficient to generate the base
6515 with more bits set and use a negative index instead. */
6519 base_reg = force_reg (SImode, GEN_INT (base));
6520 x = plus_constant (base_reg, index);
6525 /* We need to find and carefully transform any SYMBOL and LABEL
6526 references; so go back to the original address expression. */
6527 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6529 if (new_x != orig_x)
6537 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6538 to be legitimate. If we find one, return the new, valid address. */
6540 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6542 if (arm_tls_symbol_p (x))
6543 return legitimize_tls_address (x, NULL_RTX);
6545 if (GET_CODE (x) == PLUS
6546 && GET_CODE (XEXP (x, 1)) == CONST_INT
6547 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6548 || INTVAL (XEXP (x, 1)) < 0))
6550 rtx xop0 = XEXP (x, 0);
6551 rtx xop1 = XEXP (x, 1);
6552 HOST_WIDE_INT offset = INTVAL (xop1);
6554 /* Try and fold the offset into a biasing of the base register and
6555 then offsetting that. Don't do this when optimizing for space
6556 since it can cause too many CSEs. */
6557 if (optimize_size && offset >= 0
6558 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6560 HOST_WIDE_INT delta;
6563 delta = offset - (256 - GET_MODE_SIZE (mode));
6564 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6565 delta = 31 * GET_MODE_SIZE (mode);
6567 delta = offset & (~31 * GET_MODE_SIZE (mode));
6569 xop0 = force_operand (plus_constant (xop0, offset - delta),
6571 x = plus_constant (xop0, delta);
6573 else if (offset < 0 && offset > -256)
6574 /* Small negative offsets are best done with a subtract before the
6575 dereference, forcing these into a register normally takes two
6577 x = force_operand (x, NULL_RTX);
6580 /* For the remaining cases, force the constant into a register. */
6581 xop1 = force_reg (SImode, xop1);
6582 x = gen_rtx_PLUS (SImode, xop0, xop1);
6585 else if (GET_CODE (x) == PLUS
6586 && s_register_operand (XEXP (x, 1), SImode)
6587 && !s_register_operand (XEXP (x, 0), SImode))
6589 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6591 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6596 /* We need to find and carefully transform any SYMBOL and LABEL
6597 references; so go back to the original address expression. */
6598 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6600 if (new_x != orig_x)
6608 arm_legitimize_reload_address (rtx *p,
6609 enum machine_mode mode,
6610 int opnum, int type,
6611 int ind_levels ATTRIBUTE_UNUSED)
6613 /* We must recognize output that we have already generated ourselves. */
6614 if (GET_CODE (*p) == PLUS
6615 && GET_CODE (XEXP (*p, 0)) == PLUS
6616 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6617 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6618 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6620 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6621 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6622 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6626 if (GET_CODE (*p) == PLUS
6627 && GET_CODE (XEXP (*p, 0)) == REG
6628 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6629 /* If the base register is equivalent to a constant, let the generic
6630 code handle it. Otherwise we will run into problems if a future
6631 reload pass decides to rematerialize the constant. */
6632 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6633 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6635 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6636 HOST_WIDE_INT low, high;
6638 /* Detect coprocessor load/stores. */
6639 bool coproc_p = ((TARGET_HARD_FLOAT
6640 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6641 && (mode == SFmode || mode == DFmode
6642 || (mode == DImode && TARGET_MAVERICK)))
6643 || (TARGET_REALLY_IWMMXT
6644 && VALID_IWMMXT_REG_MODE (mode))
6646 && (VALID_NEON_DREG_MODE (mode)
6647 || VALID_NEON_QREG_MODE (mode))));
6649 /* For some conditions, bail out when lower two bits are unaligned. */
6650 if ((val & 0x3) != 0
6651 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6653 /* For DI, and DF under soft-float: */
6654 || ((mode == DImode || mode == DFmode)
6655 /* Without ldrd, we use stm/ldm, which does not
6656 fair well with unaligned bits. */
6658 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6659 || TARGET_THUMB2))))
6662 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6663 of which the (reg+high) gets turned into a reload add insn,
6664 we try to decompose the index into high/low values that can often
6665 also lead to better reload CSE.
6667 ldr r0, [r2, #4100] // Offset too large
6668 ldr r1, [r2, #4104] // Offset too large
6670 is best reloaded as:
6676 which post-reload CSE can simplify in most cases to eliminate the
6677 second add instruction:
6682 The idea here is that we want to split out the bits of the constant
6683 as a mask, rather than as subtracting the maximum offset that the
6684 respective type of load/store used can handle.
6686 When encountering negative offsets, we can still utilize it even if
6687 the overall offset is positive; sometimes this may lead to an immediate
6688 that can be constructed with fewer instructions.
6690 ldr r0, [r2, #0x3FFFFC]
6692 This is best reloaded as:
6693 add t1, r2, #0x400000
6696 The trick for spotting this for a load insn with N bits of offset
6697 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6698 negative offset that is going to make bit N and all the bits below
6699 it become zero in the remainder part.
6701 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6702 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6703 used in most cases of ARM load/store instructions. */
6705 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6706 (((VAL) & ((1 << (N)) - 1)) \
6707 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6712 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6714 /* NEON quad-word load/stores are made of two double-word accesses,
6715 so the valid index range is reduced by 8. Treat as 9-bit range if
6717 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6718 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6720 else if (GET_MODE_SIZE (mode) == 8)
6723 low = (TARGET_THUMB2
6724 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6725 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6727 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6728 to access doublewords. The supported load/store offsets are
6729 -8, -4, and 4, which we try to produce here. */
6730 low = ((val & 0xf) ^ 0x8) - 0x8;
6732 else if (GET_MODE_SIZE (mode) < 8)
6734 /* NEON element load/stores do not have an offset. */
6735 if (TARGET_NEON_FP16 && mode == HFmode)
6740 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6741 Try the wider 12-bit range first, and re-try if the result
6743 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6745 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6749 if (mode == HImode || mode == HFmode)
6752 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6755 /* The storehi/movhi_bytes fallbacks can use only
6756 [-4094,+4094] of the full ldrb/strb index range. */
6757 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6758 if (low == 4095 || low == -4095)
6763 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6769 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6770 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6771 - (unsigned HOST_WIDE_INT) 0x80000000);
6772 /* Check for overflow or zero */
6773 if (low == 0 || high == 0 || (high + low != val))
6776 /* Reload the high part into a base reg; leave the low part
6778 *p = gen_rtx_PLUS (GET_MODE (*p),
6779 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6782 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6783 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6784 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6792 thumb_legitimize_reload_address (rtx *x_p,
6793 enum machine_mode mode,
6794 int opnum, int type,
6795 int ind_levels ATTRIBUTE_UNUSED)
6799 if (GET_CODE (x) == PLUS
6800 && GET_MODE_SIZE (mode) < 4
6801 && REG_P (XEXP (x, 0))
6802 && XEXP (x, 0) == stack_pointer_rtx
6803 && GET_CODE (XEXP (x, 1)) == CONST_INT
6804 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6809 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6810 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6814 /* If both registers are hi-regs, then it's better to reload the
6815 entire expression rather than each register individually. That
6816 only requires one reload register rather than two. */
6817 if (GET_CODE (x) == PLUS
6818 && REG_P (XEXP (x, 0))
6819 && REG_P (XEXP (x, 1))
6820 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6821 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6826 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6827 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6834 /* Test for various thread-local symbols. */
6836 /* Return TRUE if X is a thread-local symbol. */
6839 arm_tls_symbol_p (rtx x)
6841 if (! TARGET_HAVE_TLS)
6844 if (GET_CODE (x) != SYMBOL_REF)
6847 return SYMBOL_REF_TLS_MODEL (x) != 0;
6850 /* Helper for arm_tls_referenced_p. */
6853 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6855 if (GET_CODE (*x) == SYMBOL_REF)
6856 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6858 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6859 TLS offsets, not real symbol references. */
6860 if (GET_CODE (*x) == UNSPEC
6861 && XINT (*x, 1) == UNSPEC_TLS)
6867 /* Return TRUE if X contains any TLS symbol references. */
6870 arm_tls_referenced_p (rtx x)
6872 if (! TARGET_HAVE_TLS)
6875 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6878 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6880 On the ARM, allow any integer (invalid ones are removed later by insn
6881 patterns), nice doubles and symbol_refs which refer to the function's
6884 When generating pic allow anything. */
6887 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6889 /* At present, we have no support for Neon structure constants, so forbid
6890 them here. It might be possible to handle simple cases like 0 and -1
6892 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6895 return flag_pic || !label_mentioned_p (x);
6899 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6901 return (GET_CODE (x) == CONST_INT
6902 || GET_CODE (x) == CONST_DOUBLE
6903 || CONSTANT_ADDRESS_P (x)
6908 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6910 return (!arm_cannot_force_const_mem (mode, x)
6912 ? arm_legitimate_constant_p_1 (mode, x)
6913 : thumb_legitimate_constant_p (mode, x)));
6916 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6919 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6923 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6925 split_const (x, &base, &offset);
6926 if (GET_CODE (base) == SYMBOL_REF
6927 && !offset_within_block_p (base, INTVAL (offset)))
6930 return arm_tls_referenced_p (x);
6933 #define REG_OR_SUBREG_REG(X) \
6934 (GET_CODE (X) == REG \
6935 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6937 #define REG_OR_SUBREG_RTX(X) \
6938 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6941 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6943 enum machine_mode mode = GET_MODE (x);
6957 return COSTS_N_INSNS (1);
6960 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6963 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6970 return COSTS_N_INSNS (2) + cycles;
6972 return COSTS_N_INSNS (1) + 16;
6975 return (COSTS_N_INSNS (1)
6976 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6977 + GET_CODE (SET_DEST (x)) == MEM));
6982 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6984 if (thumb_shiftable_const (INTVAL (x)))
6985 return COSTS_N_INSNS (2);
6986 return COSTS_N_INSNS (3);
6988 else if ((outer == PLUS || outer == COMPARE)
6989 && INTVAL (x) < 256 && INTVAL (x) > -256)
6991 else if ((outer == IOR || outer == XOR || outer == AND)
6992 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6993 return COSTS_N_INSNS (1);
6994 else if (outer == AND)
6997 /* This duplicates the tests in the andsi3 expander. */
6998 for (i = 9; i <= 31; i++)
6999 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7000 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7001 return COSTS_N_INSNS (2);
7003 else if (outer == ASHIFT || outer == ASHIFTRT
7004 || outer == LSHIFTRT)
7006 return COSTS_N_INSNS (2);
7012 return COSTS_N_INSNS (3);
7030 /* XXX another guess. */
7031 /* Memory costs quite a lot for the first word, but subsequent words
7032 load at the equivalent of a single insn each. */
7033 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7034 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7039 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7045 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7046 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7052 return total + COSTS_N_INSNS (1);
7054 /* Assume a two-shift sequence. Increase the cost slightly so
7055 we prefer actual shifts over an extend operation. */
7056 return total + 1 + COSTS_N_INSNS (2);
7064 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7066 enum machine_mode mode = GET_MODE (x);
7067 enum rtx_code subcode;
7069 enum rtx_code code = GET_CODE (x);
7075 /* Memory costs quite a lot for the first word, but subsequent words
7076 load at the equivalent of a single insn each. */
7077 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7084 if (TARGET_HARD_FLOAT && mode == SFmode)
7085 *total = COSTS_N_INSNS (2);
7086 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7087 *total = COSTS_N_INSNS (4);
7089 *total = COSTS_N_INSNS (20);
7093 if (GET_CODE (XEXP (x, 1)) == REG)
7094 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7095 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7096 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7102 *total += COSTS_N_INSNS (4);
7107 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7108 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7111 *total += COSTS_N_INSNS (3);
7115 *total += COSTS_N_INSNS (1);
7116 /* Increase the cost of complex shifts because they aren't any faster,
7117 and reduce dual issue opportunities. */
7118 if (arm_tune_cortex_a9
7119 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7127 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7128 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7129 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7131 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7135 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7136 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7138 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7145 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7147 if (TARGET_HARD_FLOAT
7149 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7151 *total = COSTS_N_INSNS (1);
7152 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7153 && arm_const_double_rtx (XEXP (x, 0)))
7155 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7159 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7160 && arm_const_double_rtx (XEXP (x, 1)))
7162 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7168 *total = COSTS_N_INSNS (20);
7172 *total = COSTS_N_INSNS (1);
7173 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7174 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7176 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7180 subcode = GET_CODE (XEXP (x, 1));
7181 if (subcode == ASHIFT || subcode == ASHIFTRT
7182 || subcode == LSHIFTRT
7183 || subcode == ROTATE || subcode == ROTATERT)
7185 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7186 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7190 /* A shift as a part of RSB costs no more than RSB itself. */
7191 if (GET_CODE (XEXP (x, 0)) == MULT
7192 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7194 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7195 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7200 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7202 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7203 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7207 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7208 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7210 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7211 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7212 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7213 *total += COSTS_N_INSNS (1);
7221 if (code == PLUS && arm_arch6 && mode == SImode
7222 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7223 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7225 *total = COSTS_N_INSNS (1);
7226 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7228 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7232 /* MLA: All arguments must be registers. We filter out
7233 multiplication by a power of two, so that we fall down into
7235 if (GET_CODE (XEXP (x, 0)) == MULT
7236 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7238 /* The cost comes from the cost of the multiply. */
7242 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7244 if (TARGET_HARD_FLOAT
7246 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7248 *total = COSTS_N_INSNS (1);
7249 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7250 && arm_const_double_rtx (XEXP (x, 1)))
7252 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7259 *total = COSTS_N_INSNS (20);
7263 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7264 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7266 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7267 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7268 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7269 *total += COSTS_N_INSNS (1);
7275 case AND: case XOR: case IOR:
7277 /* Normally the frame registers will be spilt into reg+const during
7278 reload, so it is a bad idea to combine them with other instructions,
7279 since then they might not be moved outside of loops. As a compromise
7280 we allow integration with ops that have a constant as their second
7282 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7283 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7284 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7285 *total = COSTS_N_INSNS (1);
7289 *total += COSTS_N_INSNS (2);
7290 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7291 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7293 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7300 *total += COSTS_N_INSNS (1);
7301 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7302 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7304 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7307 subcode = GET_CODE (XEXP (x, 0));
7308 if (subcode == ASHIFT || subcode == ASHIFTRT
7309 || subcode == LSHIFTRT
7310 || subcode == ROTATE || subcode == ROTATERT)
7312 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7313 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7318 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7320 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7321 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7325 if (subcode == UMIN || subcode == UMAX
7326 || subcode == SMIN || subcode == SMAX)
7328 *total = COSTS_N_INSNS (3);
7335 /* This should have been handled by the CPU specific routines. */
7339 if (arm_arch3m && mode == SImode
7340 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7342 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7343 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7344 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7345 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7347 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7350 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7354 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7356 if (TARGET_HARD_FLOAT
7358 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7360 *total = COSTS_N_INSNS (1);
7363 *total = COSTS_N_INSNS (2);
7369 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7370 if (mode == SImode && code == NOT)
7372 subcode = GET_CODE (XEXP (x, 0));
7373 if (subcode == ASHIFT || subcode == ASHIFTRT
7374 || subcode == LSHIFTRT
7375 || subcode == ROTATE || subcode == ROTATERT
7377 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7379 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7380 /* Register shifts cost an extra cycle. */
7381 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7382 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7391 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7393 *total = COSTS_N_INSNS (4);
7397 operand = XEXP (x, 0);
7399 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7400 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7401 && GET_CODE (XEXP (operand, 0)) == REG
7402 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7403 *total += COSTS_N_INSNS (1);
7404 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7405 + rtx_cost (XEXP (x, 2), code, 2, speed));
7409 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7411 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7417 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7418 && mode == SImode && XEXP (x, 1) == const0_rtx)
7420 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7426 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7427 && mode == SImode && XEXP (x, 1) == const0_rtx)
7429 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7449 /* SCC insns. In the case where the comparison has already been
7450 performed, then they cost 2 instructions. Otherwise they need
7451 an additional comparison before them. */
7452 *total = COSTS_N_INSNS (2);
7453 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7460 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7466 *total += COSTS_N_INSNS (1);
7467 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7468 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7470 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7474 subcode = GET_CODE (XEXP (x, 0));
7475 if (subcode == ASHIFT || subcode == ASHIFTRT
7476 || subcode == LSHIFTRT
7477 || subcode == ROTATE || subcode == ROTATERT)
7479 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7480 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7485 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7487 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7488 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7498 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7499 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7500 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7501 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7505 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7507 if (TARGET_HARD_FLOAT
7509 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7511 *total = COSTS_N_INSNS (1);
7514 *total = COSTS_N_INSNS (20);
7517 *total = COSTS_N_INSNS (1);
7519 *total += COSTS_N_INSNS (3);
7525 if (GET_MODE_CLASS (mode) == MODE_INT)
7527 rtx op = XEXP (x, 0);
7528 enum machine_mode opmode = GET_MODE (op);
7531 *total += COSTS_N_INSNS (1);
7533 if (opmode != SImode)
7537 /* If !arm_arch4, we use one of the extendhisi2_mem
7538 or movhi_bytes patterns for HImode. For a QImode
7539 sign extension, we first zero-extend from memory
7540 and then perform a shift sequence. */
7541 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7542 *total += COSTS_N_INSNS (2);
7545 *total += COSTS_N_INSNS (1);
7547 /* We don't have the necessary insn, so we need to perform some
7549 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7550 /* An and with constant 255. */
7551 *total += COSTS_N_INSNS (1);
7553 /* A shift sequence. Increase costs slightly to avoid
7554 combining two shifts into an extend operation. */
7555 *total += COSTS_N_INSNS (2) + 1;
7561 switch (GET_MODE (XEXP (x, 0)))
7568 *total = COSTS_N_INSNS (1);
7578 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7582 if (const_ok_for_arm (INTVAL (x))
7583 || const_ok_for_arm (~INTVAL (x)))
7584 *total = COSTS_N_INSNS (1);
7586 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7587 INTVAL (x), NULL_RTX,
7594 *total = COSTS_N_INSNS (3);
7598 *total = COSTS_N_INSNS (1);
7602 *total = COSTS_N_INSNS (1);
7603 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7607 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7608 && (mode == SFmode || !TARGET_VFP_SINGLE))
7609 *total = COSTS_N_INSNS (1);
7611 *total = COSTS_N_INSNS (4);
7618 *total = COSTS_N_INSNS (4);
7623 /* Estimates the size cost of thumb1 instructions.
7624 For now most of the code is copied from thumb1_rtx_costs. We need more
7625 fine grain tuning when we have more related test cases. */
7627 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7629 enum machine_mode mode = GET_MODE (x);
7642 return COSTS_N_INSNS (1);
7645 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7647 /* Thumb1 mul instruction can't operate on const. We must Load it
7648 into a register first. */
7649 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7650 return COSTS_N_INSNS (1) + const_size;
7652 return COSTS_N_INSNS (1);
7655 return (COSTS_N_INSNS (1)
7656 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7657 + GET_CODE (SET_DEST (x)) == MEM));
7662 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7663 return COSTS_N_INSNS (1);
7664 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7665 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7666 return COSTS_N_INSNS (2);
7667 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7668 if (thumb_shiftable_const (INTVAL (x)))
7669 return COSTS_N_INSNS (2);
7670 return COSTS_N_INSNS (3);
7672 else if ((outer == PLUS || outer == COMPARE)
7673 && INTVAL (x) < 256 && INTVAL (x) > -256)
7675 else if ((outer == IOR || outer == XOR || outer == AND)
7676 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7677 return COSTS_N_INSNS (1);
7678 else if (outer == AND)
7681 /* This duplicates the tests in the andsi3 expander. */
7682 for (i = 9; i <= 31; i++)
7683 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7684 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7685 return COSTS_N_INSNS (2);
7687 else if (outer == ASHIFT || outer == ASHIFTRT
7688 || outer == LSHIFTRT)
7690 return COSTS_N_INSNS (2);
7696 return COSTS_N_INSNS (3);
7714 /* XXX another guess. */
7715 /* Memory costs quite a lot for the first word, but subsequent words
7716 load at the equivalent of a single insn each. */
7717 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7718 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7723 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7728 /* XXX still guessing. */
7729 switch (GET_MODE (XEXP (x, 0)))
7732 return (1 + (mode == DImode ? 4 : 0)
7733 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7736 return (4 + (mode == DImode ? 4 : 0)
7737 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7740 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7751 /* RTX costs when optimizing for size. */
7753 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7756 enum machine_mode mode = GET_MODE (x);
7759 *total = thumb1_size_rtx_costs (x, code, outer_code);
7763 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7767 /* A memory access costs 1 insn if the mode is small, or the address is
7768 a single register, otherwise it costs one insn per word. */
7769 if (REG_P (XEXP (x, 0)))
7770 *total = COSTS_N_INSNS (1);
7772 && GET_CODE (XEXP (x, 0)) == PLUS
7773 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7774 /* This will be split into two instructions.
7775 See arm.md:calculate_pic_address. */
7776 *total = COSTS_N_INSNS (2);
7778 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7785 /* Needs a libcall, so it costs about this. */
7786 *total = COSTS_N_INSNS (2);
7790 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7792 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7800 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7802 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7805 else if (mode == SImode)
7807 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7808 /* Slightly disparage register shifts, but not by much. */
7809 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7810 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7814 /* Needs a libcall. */
7815 *total = COSTS_N_INSNS (2);
7819 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7820 && (mode == SFmode || !TARGET_VFP_SINGLE))
7822 *total = COSTS_N_INSNS (1);
7828 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7829 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7831 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7832 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7833 || subcode1 == ROTATE || subcode1 == ROTATERT
7834 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7835 || subcode1 == ASHIFTRT)
7837 /* It's just the cost of the two operands. */
7842 *total = COSTS_N_INSNS (1);
7846 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7851 && (mode == SFmode || !TARGET_VFP_SINGLE))
7853 *total = COSTS_N_INSNS (1);
7857 /* A shift as a part of ADD costs nothing. */
7858 if (GET_CODE (XEXP (x, 0)) == MULT
7859 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7861 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7862 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7863 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7868 case AND: case XOR: case IOR:
7871 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7873 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7874 || subcode == LSHIFTRT || subcode == ASHIFTRT
7875 || (code == AND && subcode == NOT))
7877 /* It's just the cost of the two operands. */
7883 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7887 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7891 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7892 && (mode == SFmode || !TARGET_VFP_SINGLE))
7894 *total = COSTS_N_INSNS (1);
7900 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7909 if (cc_register (XEXP (x, 0), VOIDmode))
7912 *total = COSTS_N_INSNS (1);
7916 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7917 && (mode == SFmode || !TARGET_VFP_SINGLE))
7918 *total = COSTS_N_INSNS (1);
7920 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7925 return arm_rtx_costs_1 (x, outer_code, total, 0);
7928 if (const_ok_for_arm (INTVAL (x)))
7929 /* A multiplication by a constant requires another instruction
7930 to load the constant to a register. */
7931 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7933 else if (const_ok_for_arm (~INTVAL (x)))
7934 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7935 else if (const_ok_for_arm (-INTVAL (x)))
7937 if (outer_code == COMPARE || outer_code == PLUS
7938 || outer_code == MINUS)
7941 *total = COSTS_N_INSNS (1);
7944 *total = COSTS_N_INSNS (2);
7950 *total = COSTS_N_INSNS (2);
7954 *total = COSTS_N_INSNS (4);
7959 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7960 cost of these slightly. */
7961 *total = COSTS_N_INSNS (1) + 1;
7968 if (mode != VOIDmode)
7969 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7971 *total = COSTS_N_INSNS (4); /* How knows? */
7976 /* RTX costs when optimizing for size. */
7978 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7979 int *total, bool speed)
7982 return arm_size_rtx_costs (x, (enum rtx_code) code,
7983 (enum rtx_code) outer_code, total);
7985 return current_tune->rtx_costs (x, (enum rtx_code) code,
7986 (enum rtx_code) outer_code,
7990 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7991 supported on any "slowmul" cores, so it can be ignored. */
7994 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7995 int *total, bool speed)
7997 enum machine_mode mode = GET_MODE (x);
8001 *total = thumb1_rtx_costs (x, code, outer_code);
8008 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8011 *total = COSTS_N_INSNS (20);
8015 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8017 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8018 & (unsigned HOST_WIDE_INT) 0xffffffff);
8019 int cost, const_ok = const_ok_for_arm (i);
8020 int j, booth_unit_size;
8022 /* Tune as appropriate. */
8023 cost = const_ok ? 4 : 8;
8024 booth_unit_size = 2;
8025 for (j = 0; i && j < 32; j += booth_unit_size)
8027 i >>= booth_unit_size;
8031 *total = COSTS_N_INSNS (cost);
8032 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8036 *total = COSTS_N_INSNS (20);
8040 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8045 /* RTX cost for cores with a fast multiply unit (M variants). */
8048 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8049 int *total, bool speed)
8051 enum machine_mode mode = GET_MODE (x);
8055 *total = thumb1_rtx_costs (x, code, outer_code);
8059 /* ??? should thumb2 use different costs? */
8063 /* There is no point basing this on the tuning, since it is always the
8064 fast variant if it exists at all. */
8066 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8067 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8068 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8070 *total = COSTS_N_INSNS(2);
8077 *total = COSTS_N_INSNS (5);
8081 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8083 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8084 & (unsigned HOST_WIDE_INT) 0xffffffff);
8085 int cost, const_ok = const_ok_for_arm (i);
8086 int j, booth_unit_size;
8088 /* Tune as appropriate. */
8089 cost = const_ok ? 4 : 8;
8090 booth_unit_size = 8;
8091 for (j = 0; i && j < 32; j += booth_unit_size)
8093 i >>= booth_unit_size;
8097 *total = COSTS_N_INSNS(cost);
8103 *total = COSTS_N_INSNS (4);
8107 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8109 if (TARGET_HARD_FLOAT
8111 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8113 *total = COSTS_N_INSNS (1);
8118 /* Requires a lib call */
8119 *total = COSTS_N_INSNS (20);
8123 return arm_rtx_costs_1 (x, outer_code, total, speed);
8128 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8129 so it can be ignored. */
8132 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8133 int *total, bool speed)
8135 enum machine_mode mode = GET_MODE (x);
8139 *total = thumb1_rtx_costs (x, code, outer_code);
8146 if (GET_CODE (XEXP (x, 0)) != MULT)
8147 return arm_rtx_costs_1 (x, outer_code, total, speed);
8149 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8150 will stall until the multiplication is complete. */
8151 *total = COSTS_N_INSNS (3);
8155 /* There is no point basing this on the tuning, since it is always the
8156 fast variant if it exists at all. */
8158 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8159 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8160 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8162 *total = COSTS_N_INSNS (2);
8169 *total = COSTS_N_INSNS (5);
8173 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8175 /* If operand 1 is a constant we can more accurately
8176 calculate the cost of the multiply. The multiplier can
8177 retire 15 bits on the first cycle and a further 12 on the
8178 second. We do, of course, have to load the constant into
8179 a register first. */
8180 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8181 /* There's a general overhead of one cycle. */
8183 unsigned HOST_WIDE_INT masked_const;
8188 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8190 masked_const = i & 0xffff8000;
8191 if (masked_const != 0)
8194 masked_const = i & 0xf8000000;
8195 if (masked_const != 0)
8198 *total = COSTS_N_INSNS (cost);
8204 *total = COSTS_N_INSNS (3);
8208 /* Requires a lib call */
8209 *total = COSTS_N_INSNS (20);
8213 return arm_rtx_costs_1 (x, outer_code, total, speed);
8218 /* RTX costs for 9e (and later) cores. */
8221 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8222 int *total, bool speed)
8224 enum machine_mode mode = GET_MODE (x);
8231 *total = COSTS_N_INSNS (3);
8235 *total = thumb1_rtx_costs (x, code, outer_code);
8243 /* There is no point basing this on the tuning, since it is always the
8244 fast variant if it exists at all. */
8246 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8247 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8248 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8250 *total = COSTS_N_INSNS (2);
8257 *total = COSTS_N_INSNS (5);
8263 *total = COSTS_N_INSNS (2);
8267 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8269 if (TARGET_HARD_FLOAT
8271 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8273 *total = COSTS_N_INSNS (1);
8278 *total = COSTS_N_INSNS (20);
8282 return arm_rtx_costs_1 (x, outer_code, total, speed);
8285 /* All address computations that can be done are free, but rtx cost returns
8286 the same for practically all of them. So we weight the different types
8287 of address here in the order (most pref first):
8288 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8290 arm_arm_address_cost (rtx x)
8292 enum rtx_code c = GET_CODE (x);
8294 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8296 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8301 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8304 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8314 arm_thumb_address_cost (rtx x)
8316 enum rtx_code c = GET_CODE (x);
8321 && GET_CODE (XEXP (x, 0)) == REG
8322 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8329 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8331 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8334 /* Adjust cost hook for XScale. */
8336 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8338 /* Some true dependencies can have a higher cost depending
8339 on precisely how certain input operands are used. */
8340 if (REG_NOTE_KIND(link) == 0
8341 && recog_memoized (insn) >= 0
8342 && recog_memoized (dep) >= 0)
8344 int shift_opnum = get_attr_shift (insn);
8345 enum attr_type attr_type = get_attr_type (dep);
8347 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8348 operand for INSN. If we have a shifted input operand and the
8349 instruction we depend on is another ALU instruction, then we may
8350 have to account for an additional stall. */
8351 if (shift_opnum != 0
8352 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8354 rtx shifted_operand;
8357 /* Get the shifted operand. */
8358 extract_insn (insn);
8359 shifted_operand = recog_data.operand[shift_opnum];
8361 /* Iterate over all the operands in DEP. If we write an operand
8362 that overlaps with SHIFTED_OPERAND, then we have increase the
8363 cost of this dependency. */
8365 preprocess_constraints ();
8366 for (opno = 0; opno < recog_data.n_operands; opno++)
8368 /* We can ignore strict inputs. */
8369 if (recog_data.operand_type[opno] == OP_IN)
8372 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8384 /* Adjust cost hook for Cortex A9. */
8386 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8388 switch (REG_NOTE_KIND (link))
8395 case REG_DEP_OUTPUT:
8396 if (recog_memoized (insn) >= 0
8397 && recog_memoized (dep) >= 0)
8399 if (GET_CODE (PATTERN (insn)) == SET)
8402 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8404 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8406 enum attr_type attr_type_insn = get_attr_type (insn);
8407 enum attr_type attr_type_dep = get_attr_type (dep);
8409 /* By default all dependencies of the form
8412 have an extra latency of 1 cycle because
8413 of the input and output dependency in this
8414 case. However this gets modeled as an true
8415 dependency and hence all these checks. */
8416 if (REG_P (SET_DEST (PATTERN (insn)))
8417 && REG_P (SET_DEST (PATTERN (dep)))
8418 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8419 SET_DEST (PATTERN (dep))))
8421 /* FMACS is a special case where the dependant
8422 instruction can be issued 3 cycles before
8423 the normal latency in case of an output
8425 if ((attr_type_insn == TYPE_FMACS
8426 || attr_type_insn == TYPE_FMACD)
8427 && (attr_type_dep == TYPE_FMACS
8428 || attr_type_dep == TYPE_FMACD))
8430 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8431 *cost = insn_default_latency (dep) - 3;
8433 *cost = insn_default_latency (dep);
8438 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8439 *cost = insn_default_latency (dep) + 1;
8441 *cost = insn_default_latency (dep);
8457 /* Adjust cost hook for FA726TE. */
8459 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8461 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8462 have penalty of 3. */
8463 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8464 && recog_memoized (insn) >= 0
8465 && recog_memoized (dep) >= 0
8466 && get_attr_conds (dep) == CONDS_SET)
8468 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8469 if (get_attr_conds (insn) == CONDS_USE
8470 && get_attr_type (insn) != TYPE_BRANCH)
8476 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8477 || get_attr_conds (insn) == CONDS_USE)
8487 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8488 It corrects the value of COST based on the relationship between
8489 INSN and DEP through the dependence LINK. It returns the new
8490 value. There is a per-core adjust_cost hook to adjust scheduler costs
8491 and the per-core hook can choose to completely override the generic
8492 adjust_cost function. Only put bits of code into arm_adjust_cost that
8493 are common across all cores. */
8495 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8499 /* When generating Thumb-1 code, we want to place flag-setting operations
8500 close to a conditional branch which depends on them, so that we can
8501 omit the comparison. */
8503 && REG_NOTE_KIND (link) == 0
8504 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8505 && recog_memoized (dep) >= 0
8506 && get_attr_conds (dep) == CONDS_SET)
8509 if (current_tune->sched_adjust_cost != NULL)
8511 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8515 /* XXX This is not strictly true for the FPA. */
8516 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8517 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8520 /* Call insns don't incur a stall, even if they follow a load. */
8521 if (REG_NOTE_KIND (link) == 0
8522 && GET_CODE (insn) == CALL_INSN)
8525 if ((i_pat = single_set (insn)) != NULL
8526 && GET_CODE (SET_SRC (i_pat)) == MEM
8527 && (d_pat = single_set (dep)) != NULL
8528 && GET_CODE (SET_DEST (d_pat)) == MEM)
8530 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8531 /* This is a load after a store, there is no conflict if the load reads
8532 from a cached area. Assume that loads from the stack, and from the
8533 constant pool are cached, and that others will miss. This is a
8536 if ((GET_CODE (src_mem) == SYMBOL_REF
8537 && CONSTANT_POOL_ADDRESS_P (src_mem))
8538 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8539 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8540 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8548 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8551 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8553 return (optimize > 0) ? 2 : 0;
8557 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8559 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8562 static int fp_consts_inited = 0;
8564 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8565 static const char * const strings_fp[8] =
8568 "4", "5", "0.5", "10"
8571 static REAL_VALUE_TYPE values_fp[8];
8574 init_fp_table (void)
8580 fp_consts_inited = 1;
8582 fp_consts_inited = 8;
8584 for (i = 0; i < fp_consts_inited; i++)
8586 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8591 /* Return TRUE if rtx X is a valid immediate FP constant. */
8593 arm_const_double_rtx (rtx x)
8598 if (!fp_consts_inited)
8601 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8602 if (REAL_VALUE_MINUS_ZERO (r))
8605 for (i = 0; i < fp_consts_inited; i++)
8606 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8612 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8614 neg_const_double_rtx_ok_for_fpa (rtx x)
8619 if (!fp_consts_inited)
8622 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8623 r = real_value_negate (&r);
8624 if (REAL_VALUE_MINUS_ZERO (r))
8627 for (i = 0; i < 8; i++)
8628 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8635 /* VFPv3 has a fairly wide range of representable immediates, formed from
8636 "quarter-precision" floating-point values. These can be evaluated using this
8637 formula (with ^ for exponentiation):
8641 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8642 16 <= n <= 31 and 0 <= r <= 7.
8644 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8646 - A (most-significant) is the sign bit.
8647 - BCD are the exponent (encoded as r XOR 3).
8648 - EFGH are the mantissa (encoded as n - 16).
8651 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8652 fconst[sd] instruction, or -1 if X isn't suitable. */
8654 vfp3_const_double_index (rtx x)
8656 REAL_VALUE_TYPE r, m;
8658 unsigned HOST_WIDE_INT mantissa, mant_hi;
8659 unsigned HOST_WIDE_INT mask;
8660 HOST_WIDE_INT m1, m2;
8661 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8663 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8666 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8668 /* We can't represent these things, so detect them first. */
8669 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8672 /* Extract sign, exponent and mantissa. */
8673 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8674 r = real_value_abs (&r);
8675 exponent = REAL_EXP (&r);
8676 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8677 highest (sign) bit, with a fixed binary point at bit point_pos.
8678 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8679 bits for the mantissa, this may fail (low bits would be lost). */
8680 real_ldexp (&m, &r, point_pos - exponent);
8681 REAL_VALUE_TO_INT (&m1, &m2, m);
8685 /* If there are bits set in the low part of the mantissa, we can't
8686 represent this value. */
8690 /* Now make it so that mantissa contains the most-significant bits, and move
8691 the point_pos to indicate that the least-significant bits have been
8693 point_pos -= HOST_BITS_PER_WIDE_INT;
8696 /* We can permit four significant bits of mantissa only, plus a high bit
8697 which is always 1. */
8698 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8699 if ((mantissa & mask) != 0)
8702 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8703 mantissa >>= point_pos - 5;
8705 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8706 floating-point immediate zero with Neon using an integer-zero load, but
8707 that case is handled elsewhere.) */
8711 gcc_assert (mantissa >= 16 && mantissa <= 31);
8713 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8714 normalized significands are in the range [1, 2). (Our mantissa is shifted
8715 left 4 places at this point relative to normalized IEEE754 values). GCC
8716 internally uses [0.5, 1) (see real.c), so the exponent returned from
8717 REAL_EXP must be altered. */
8718 exponent = 5 - exponent;
8720 if (exponent < 0 || exponent > 7)
8723 /* Sign, mantissa and exponent are now in the correct form to plug into the
8724 formula described in the comment above. */
8725 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8728 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8730 vfp3_const_double_rtx (rtx x)
8735 return vfp3_const_double_index (x) != -1;
8738 /* Recognize immediates which can be used in various Neon instructions. Legal
8739 immediates are described by the following table (for VMVN variants, the
8740 bitwise inverse of the constant shown is recognized. In either case, VMOV
8741 is output and the correct instruction to use for a given constant is chosen
8742 by the assembler). The constant shown is replicated across all elements of
8743 the destination vector.
8745 insn elems variant constant (binary)
8746 ---- ----- ------- -----------------
8747 vmov i32 0 00000000 00000000 00000000 abcdefgh
8748 vmov i32 1 00000000 00000000 abcdefgh 00000000
8749 vmov i32 2 00000000 abcdefgh 00000000 00000000
8750 vmov i32 3 abcdefgh 00000000 00000000 00000000
8751 vmov i16 4 00000000 abcdefgh
8752 vmov i16 5 abcdefgh 00000000
8753 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8754 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8755 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8756 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8757 vmvn i16 10 00000000 abcdefgh
8758 vmvn i16 11 abcdefgh 00000000
8759 vmov i32 12 00000000 00000000 abcdefgh 11111111
8760 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8761 vmov i32 14 00000000 abcdefgh 11111111 11111111
8762 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8764 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8765 eeeeeeee ffffffff gggggggg hhhhhhhh
8766 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8768 For case 18, B = !b. Representable values are exactly those accepted by
8769 vfp3_const_double_index, but are output as floating-point numbers rather
8772 Variants 0-5 (inclusive) may also be used as immediates for the second
8773 operand of VORR/VBIC instructions.
8775 The INVERSE argument causes the bitwise inverse of the given operand to be
8776 recognized instead (used for recognizing legal immediates for the VAND/VORN
8777 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8778 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8779 output, rather than the real insns vbic/vorr).
8781 INVERSE makes no difference to the recognition of float vectors.
8783 The return value is the variant of immediate as shown in the above table, or
8784 -1 if the given value doesn't match any of the listed patterns.
8787 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8788 rtx *modconst, int *elementwidth)
8790 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8792 for (i = 0; i < idx; i += (STRIDE)) \
8797 immtype = (CLASS); \
8798 elsize = (ELSIZE); \
8802 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8803 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8804 unsigned char bytes[16];
8805 int immtype = -1, matches;
8806 unsigned int invmask = inverse ? 0xff : 0;
8808 /* Vectors of float constants. */
8809 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8811 rtx el0 = CONST_VECTOR_ELT (op, 0);
8814 if (!vfp3_const_double_rtx (el0))
8817 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8819 for (i = 1; i < n_elts; i++)
8821 rtx elt = CONST_VECTOR_ELT (op, i);
8824 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8826 if (!REAL_VALUES_EQUAL (r0, re))
8831 *modconst = CONST_VECTOR_ELT (op, 0);
8839 /* Splat vector constant out into a byte vector. */
8840 for (i = 0; i < n_elts; i++)
8842 rtx el = CONST_VECTOR_ELT (op, i);
8843 unsigned HOST_WIDE_INT elpart;
8844 unsigned int part, parts;
8846 if (GET_CODE (el) == CONST_INT)
8848 elpart = INTVAL (el);
8851 else if (GET_CODE (el) == CONST_DOUBLE)
8853 elpart = CONST_DOUBLE_LOW (el);
8859 for (part = 0; part < parts; part++)
8862 for (byte = 0; byte < innersize; byte++)
8864 bytes[idx++] = (elpart & 0xff) ^ invmask;
8865 elpart >>= BITS_PER_UNIT;
8867 if (GET_CODE (el) == CONST_DOUBLE)
8868 elpart = CONST_DOUBLE_HIGH (el);
8873 gcc_assert (idx == GET_MODE_SIZE (mode));
8877 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8878 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8880 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8881 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8883 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8884 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8886 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8887 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8889 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8891 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8893 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8894 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8896 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8897 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8899 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8900 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8902 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8903 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8905 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8907 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8909 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8910 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8912 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8913 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8915 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8916 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8918 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8919 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8921 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8923 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8924 && bytes[i] == bytes[(i + 8) % idx]);
8932 *elementwidth = elsize;
8936 unsigned HOST_WIDE_INT imm = 0;
8938 /* Un-invert bytes of recognized vector, if necessary. */
8940 for (i = 0; i < idx; i++)
8941 bytes[i] ^= invmask;
8945 /* FIXME: Broken on 32-bit H_W_I hosts. */
8946 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8948 for (i = 0; i < 8; i++)
8949 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8950 << (i * BITS_PER_UNIT);
8952 *modconst = GEN_INT (imm);
8956 unsigned HOST_WIDE_INT imm = 0;
8958 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8959 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8961 *modconst = GEN_INT (imm);
8969 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8970 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8971 float elements), and a modified constant (whatever should be output for a
8972 VMOV) in *MODCONST. */
8975 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8976 rtx *modconst, int *elementwidth)
8980 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8986 *modconst = tmpconst;
8989 *elementwidth = tmpwidth;
8994 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8995 the immediate is valid, write a constant suitable for using as an operand
8996 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8997 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9000 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9001 rtx *modconst, int *elementwidth)
9005 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9007 if (retval < 0 || retval > 5)
9011 *modconst = tmpconst;
9014 *elementwidth = tmpwidth;
9019 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9020 the immediate is valid, write a constant suitable for using as an operand
9021 to VSHR/VSHL to *MODCONST and the corresponding element width to
9022 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9023 because they have different limitations. */
9026 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9027 rtx *modconst, int *elementwidth,
9030 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9031 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9032 unsigned HOST_WIDE_INT last_elt = 0;
9033 unsigned HOST_WIDE_INT maxshift;
9035 /* Split vector constant out into a byte vector. */
9036 for (i = 0; i < n_elts; i++)
9038 rtx el = CONST_VECTOR_ELT (op, i);
9039 unsigned HOST_WIDE_INT elpart;
9041 if (GET_CODE (el) == CONST_INT)
9042 elpart = INTVAL (el);
9043 else if (GET_CODE (el) == CONST_DOUBLE)
9048 if (i != 0 && elpart != last_elt)
9054 /* Shift less than element size. */
9055 maxshift = innersize * 8;
9059 /* Left shift immediate value can be from 0 to <size>-1. */
9060 if (last_elt >= maxshift)
9065 /* Right shift immediate value can be from 1 to <size>. */
9066 if (last_elt == 0 || last_elt > maxshift)
9071 *elementwidth = innersize * 8;
9074 *modconst = CONST_VECTOR_ELT (op, 0);
9079 /* Return a string suitable for output of Neon immediate logic operation
9083 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9084 int inverse, int quad)
9086 int width, is_valid;
9087 static char templ[40];
9089 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9091 gcc_assert (is_valid != 0);
9094 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9096 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9101 /* Return a string suitable for output of Neon immediate shift operation
9102 (VSHR or VSHL) MNEM. */
9105 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9106 enum machine_mode mode, int quad,
9109 int width, is_valid;
9110 static char templ[40];
9112 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9113 gcc_assert (is_valid != 0);
9116 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9118 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9123 /* Output a sequence of pairwise operations to implement a reduction.
9124 NOTE: We do "too much work" here, because pairwise operations work on two
9125 registers-worth of operands in one go. Unfortunately we can't exploit those
9126 extra calculations to do the full operation in fewer steps, I don't think.
9127 Although all vector elements of the result but the first are ignored, we
9128 actually calculate the same result in each of the elements. An alternative
9129 such as initially loading a vector with zero to use as each of the second
9130 operands would use up an additional register and take an extra instruction,
9131 for no particular gain. */
9134 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9135 rtx (*reduc) (rtx, rtx, rtx))
9137 enum machine_mode inner = GET_MODE_INNER (mode);
9138 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9141 for (i = parts / 2; i >= 1; i /= 2)
9143 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9144 emit_insn (reduc (dest, tmpsum, tmpsum));
9149 /* If VALS is a vector constant that can be loaded into a register
9150 using VDUP, generate instructions to do so and return an RTX to
9151 assign to the register. Otherwise return NULL_RTX. */
9154 neon_vdup_constant (rtx vals)
9156 enum machine_mode mode = GET_MODE (vals);
9157 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9158 int n_elts = GET_MODE_NUNITS (mode);
9159 bool all_same = true;
9163 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9166 for (i = 0; i < n_elts; ++i)
9168 x = XVECEXP (vals, 0, i);
9169 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9174 /* The elements are not all the same. We could handle repeating
9175 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9176 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9180 /* We can load this constant by using VDUP and a constant in a
9181 single ARM register. This will be cheaper than a vector
9184 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9185 return gen_rtx_VEC_DUPLICATE (mode, x);
9188 /* Generate code to load VALS, which is a PARALLEL containing only
9189 constants (for vec_init) or CONST_VECTOR, efficiently into a
9190 register. Returns an RTX to copy into the register, or NULL_RTX
9191 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9194 neon_make_constant (rtx vals)
9196 enum machine_mode mode = GET_MODE (vals);
9198 rtx const_vec = NULL_RTX;
9199 int n_elts = GET_MODE_NUNITS (mode);
9203 if (GET_CODE (vals) == CONST_VECTOR)
9205 else if (GET_CODE (vals) == PARALLEL)
9207 /* A CONST_VECTOR must contain only CONST_INTs and
9208 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9209 Only store valid constants in a CONST_VECTOR. */
9210 for (i = 0; i < n_elts; ++i)
9212 rtx x = XVECEXP (vals, 0, i);
9213 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9216 if (n_const == n_elts)
9217 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9222 if (const_vec != NULL
9223 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9224 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9226 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9227 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9228 pipeline cycle; creating the constant takes one or two ARM
9231 else if (const_vec != NULL_RTX)
9232 /* Load from constant pool. On Cortex-A8 this takes two cycles
9233 (for either double or quad vectors). We can not take advantage
9234 of single-cycle VLD1 because we need a PC-relative addressing
9238 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9239 We can not construct an initializer. */
9243 /* Initialize vector TARGET to VALS. */
9246 neon_expand_vector_init (rtx target, rtx vals)
9248 enum machine_mode mode = GET_MODE (target);
9249 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9250 int n_elts = GET_MODE_NUNITS (mode);
9251 int n_var = 0, one_var = -1;
9252 bool all_same = true;
9256 for (i = 0; i < n_elts; ++i)
9258 x = XVECEXP (vals, 0, i);
9259 if (!CONSTANT_P (x))
9260 ++n_var, one_var = i;
9262 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9268 rtx constant = neon_make_constant (vals);
9269 if (constant != NULL_RTX)
9271 emit_move_insn (target, constant);
9276 /* Splat a single non-constant element if we can. */
9277 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9279 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9280 emit_insn (gen_rtx_SET (VOIDmode, target,
9281 gen_rtx_VEC_DUPLICATE (mode, x)));
9285 /* One field is non-constant. Load constant then overwrite varying
9286 field. This is more efficient than using the stack. */
9289 rtx copy = copy_rtx (vals);
9290 rtx index = GEN_INT (one_var);
9292 /* Load constant part of vector, substitute neighboring value for
9294 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9295 neon_expand_vector_init (target, copy);
9297 /* Insert variable. */
9298 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9302 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9305 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9308 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9311 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9314 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9317 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9320 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9323 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9326 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9334 /* Construct the vector in memory one field at a time
9335 and load the whole vector. */
9336 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9337 for (i = 0; i < n_elts; i++)
9338 emit_move_insn (adjust_address_nv (mem, inner_mode,
9339 i * GET_MODE_SIZE (inner_mode)),
9340 XVECEXP (vals, 0, i));
9341 emit_move_insn (target, mem);
9344 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9345 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9346 reported source locations are bogus. */
9349 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9354 gcc_assert (GET_CODE (operand) == CONST_INT);
9356 lane = INTVAL (operand);
9358 if (lane < low || lane >= high)
9362 /* Bounds-check lanes. */
9365 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9367 bounds_check (operand, low, high, "lane out of range");
9370 /* Bounds-check constants. */
9373 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9375 bounds_check (operand, low, high, "constant out of range");
9379 neon_element_bits (enum machine_mode mode)
9382 return GET_MODE_BITSIZE (mode);
9384 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9388 /* Predicates for `match_operand' and `match_operator'. */
9390 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9392 cirrus_memory_offset (rtx op)
9394 /* Reject eliminable registers. */
9395 if (! (reload_in_progress || reload_completed)
9396 && ( reg_mentioned_p (frame_pointer_rtx, op)
9397 || reg_mentioned_p (arg_pointer_rtx, op)
9398 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9399 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9400 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9401 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9404 if (GET_CODE (op) == MEM)
9410 /* Match: (mem (reg)). */
9411 if (GET_CODE (ind) == REG)
9417 if (GET_CODE (ind) == PLUS
9418 && GET_CODE (XEXP (ind, 0)) == REG
9419 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9420 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9427 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9428 WB is true if full writeback address modes are allowed and is false
9429 if limited writeback address modes (POST_INC and PRE_DEC) are
9433 arm_coproc_mem_operand (rtx op, bool wb)
9437 /* Reject eliminable registers. */
9438 if (! (reload_in_progress || reload_completed)
9439 && ( reg_mentioned_p (frame_pointer_rtx, op)
9440 || reg_mentioned_p (arg_pointer_rtx, op)
9441 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9442 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9444 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9447 /* Constants are converted into offsets from labels. */
9448 if (GET_CODE (op) != MEM)
9453 if (reload_completed
9454 && (GET_CODE (ind) == LABEL_REF
9455 || (GET_CODE (ind) == CONST
9456 && GET_CODE (XEXP (ind, 0)) == PLUS
9457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9458 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9461 /* Match: (mem (reg)). */
9462 if (GET_CODE (ind) == REG)
9463 return arm_address_register_rtx_p (ind, 0);
9465 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9466 acceptable in any case (subject to verification by
9467 arm_address_register_rtx_p). We need WB to be true to accept
9468 PRE_INC and POST_DEC. */
9469 if (GET_CODE (ind) == POST_INC
9470 || GET_CODE (ind) == PRE_DEC
9472 && (GET_CODE (ind) == PRE_INC
9473 || GET_CODE (ind) == POST_DEC)))
9474 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9477 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9478 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9479 && GET_CODE (XEXP (ind, 1)) == PLUS
9480 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9481 ind = XEXP (ind, 1);
9486 if (GET_CODE (ind) == PLUS
9487 && GET_CODE (XEXP (ind, 0)) == REG
9488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9489 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9490 && INTVAL (XEXP (ind, 1)) > -1024
9491 && INTVAL (XEXP (ind, 1)) < 1024
9492 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9498 /* Return TRUE if OP is a memory operand which we can load or store a vector
9499 to/from. TYPE is one of the following values:
9500 0 - Vector load/stor (vldr)
9501 1 - Core registers (ldm)
9502 2 - Element/structure loads (vld1)
9505 neon_vector_mem_operand (rtx op, int type)
9509 /* Reject eliminable registers. */
9510 if (! (reload_in_progress || reload_completed)
9511 && ( reg_mentioned_p (frame_pointer_rtx, op)
9512 || reg_mentioned_p (arg_pointer_rtx, op)
9513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9519 /* Constants are converted into offsets from labels. */
9520 if (GET_CODE (op) != MEM)
9525 if (reload_completed
9526 && (GET_CODE (ind) == LABEL_REF
9527 || (GET_CODE (ind) == CONST
9528 && GET_CODE (XEXP (ind, 0)) == PLUS
9529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9530 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9533 /* Match: (mem (reg)). */
9534 if (GET_CODE (ind) == REG)
9535 return arm_address_register_rtx_p (ind, 0);
9537 /* Allow post-increment with Neon registers. */
9538 if ((type != 1 && GET_CODE (ind) == POST_INC)
9539 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9540 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9542 /* FIXME: vld1 allows register post-modify. */
9548 && GET_CODE (ind) == PLUS
9549 && GET_CODE (XEXP (ind, 0)) == REG
9550 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9551 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9552 && INTVAL (XEXP (ind, 1)) > -1024
9553 && INTVAL (XEXP (ind, 1)) < 1016
9554 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9560 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9563 neon_struct_mem_operand (rtx op)
9567 /* Reject eliminable registers. */
9568 if (! (reload_in_progress || reload_completed)
9569 && ( reg_mentioned_p (frame_pointer_rtx, op)
9570 || reg_mentioned_p (arg_pointer_rtx, op)
9571 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9572 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9573 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9574 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9577 /* Constants are converted into offsets from labels. */
9578 if (GET_CODE (op) != MEM)
9583 if (reload_completed
9584 && (GET_CODE (ind) == LABEL_REF
9585 || (GET_CODE (ind) == CONST
9586 && GET_CODE (XEXP (ind, 0)) == PLUS
9587 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9588 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9591 /* Match: (mem (reg)). */
9592 if (GET_CODE (ind) == REG)
9593 return arm_address_register_rtx_p (ind, 0);
9595 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9596 if (GET_CODE (ind) == POST_INC
9597 || GET_CODE (ind) == PRE_DEC)
9598 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9603 /* Return true if X is a register that will be eliminated later on. */
9605 arm_eliminable_register (rtx x)
9607 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9608 || REGNO (x) == ARG_POINTER_REGNUM
9609 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9610 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9613 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9614 coprocessor registers. Otherwise return NO_REGS. */
9617 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9621 if (!TARGET_NEON_FP16)
9622 return GENERAL_REGS;
9623 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9625 return GENERAL_REGS;
9628 /* The neon move patterns handle all legitimate vector and struct
9631 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9632 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9633 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9634 || VALID_NEON_STRUCT_MODE (mode)))
9637 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9640 return GENERAL_REGS;
9643 /* Values which must be returned in the most-significant end of the return
9647 arm_return_in_msb (const_tree valtype)
9649 return (TARGET_AAPCS_BASED
9651 && (AGGREGATE_TYPE_P (valtype)
9652 || TREE_CODE (valtype) == COMPLEX_TYPE
9653 || FIXED_POINT_TYPE_P (valtype)));
9656 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9657 Use by the Cirrus Maverick code which has to workaround
9658 a hardware bug triggered by such instructions. */
9660 arm_memory_load_p (rtx insn)
9662 rtx body, lhs, rhs;;
9664 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9667 body = PATTERN (insn);
9669 if (GET_CODE (body) != SET)
9672 lhs = XEXP (body, 0);
9673 rhs = XEXP (body, 1);
9675 lhs = REG_OR_SUBREG_RTX (lhs);
9677 /* If the destination is not a general purpose
9678 register we do not have to worry. */
9679 if (GET_CODE (lhs) != REG
9680 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9683 /* As well as loads from memory we also have to react
9684 to loads of invalid constants which will be turned
9685 into loads from the minipool. */
9686 return (GET_CODE (rhs) == MEM
9687 || GET_CODE (rhs) == SYMBOL_REF
9688 || note_invalid_constants (insn, -1, false));
9691 /* Return TRUE if INSN is a Cirrus instruction. */
9693 arm_cirrus_insn_p (rtx insn)
9695 enum attr_cirrus attr;
9697 /* get_attr cannot accept USE or CLOBBER. */
9699 || GET_CODE (insn) != INSN
9700 || GET_CODE (PATTERN (insn)) == USE
9701 || GET_CODE (PATTERN (insn)) == CLOBBER)
9704 attr = get_attr_cirrus (insn);
9706 return attr != CIRRUS_NOT;
9709 /* Cirrus reorg for invalid instruction combinations. */
9711 cirrus_reorg (rtx first)
9713 enum attr_cirrus attr;
9714 rtx body = PATTERN (first);
9718 /* Any branch must be followed by 2 non Cirrus instructions. */
9719 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9722 t = next_nonnote_insn (first);
9724 if (arm_cirrus_insn_p (t))
9727 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9731 emit_insn_after (gen_nop (), first);
9736 /* (float (blah)) is in parallel with a clobber. */
9737 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9738 body = XVECEXP (body, 0, 0);
9740 if (GET_CODE (body) == SET)
9742 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9744 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9745 be followed by a non Cirrus insn. */
9746 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9748 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9749 emit_insn_after (gen_nop (), first);
9753 else if (arm_memory_load_p (first))
9755 unsigned int arm_regno;
9757 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9758 ldr/cfmv64hr combination where the Rd field is the same
9759 in both instructions must be split with a non Cirrus
9766 /* Get Arm register number for ldr insn. */
9767 if (GET_CODE (lhs) == REG)
9768 arm_regno = REGNO (lhs);
9771 gcc_assert (GET_CODE (rhs) == REG);
9772 arm_regno = REGNO (rhs);
9776 first = next_nonnote_insn (first);
9778 if (! arm_cirrus_insn_p (first))
9781 body = PATTERN (first);
9783 /* (float (blah)) is in parallel with a clobber. */
9784 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9785 body = XVECEXP (body, 0, 0);
9787 if (GET_CODE (body) == FLOAT)
9788 body = XEXP (body, 0);
9790 if (get_attr_cirrus (first) == CIRRUS_MOVE
9791 && GET_CODE (XEXP (body, 1)) == REG
9792 && arm_regno == REGNO (XEXP (body, 1)))
9793 emit_insn_after (gen_nop (), first);
9799 /* get_attr cannot accept USE or CLOBBER. */
9801 || GET_CODE (first) != INSN
9802 || GET_CODE (PATTERN (first)) == USE
9803 || GET_CODE (PATTERN (first)) == CLOBBER)
9806 attr = get_attr_cirrus (first);
9808 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9809 must be followed by a non-coprocessor instruction. */
9810 if (attr == CIRRUS_COMPARE)
9814 t = next_nonnote_insn (first);
9816 if (arm_cirrus_insn_p (t))
9819 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9823 emit_insn_after (gen_nop (), first);
9829 /* Return TRUE if X references a SYMBOL_REF. */
9831 symbol_mentioned_p (rtx x)
9836 if (GET_CODE (x) == SYMBOL_REF)
9839 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9840 are constant offsets, not symbols. */
9841 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9844 fmt = GET_RTX_FORMAT (GET_CODE (x));
9846 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9852 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9853 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9856 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9863 /* Return TRUE if X references a LABEL_REF. */
9865 label_mentioned_p (rtx x)
9870 if (GET_CODE (x) == LABEL_REF)
9873 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9874 instruction, but they are constant offsets, not symbols. */
9875 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9878 fmt = GET_RTX_FORMAT (GET_CODE (x));
9879 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9885 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9886 if (label_mentioned_p (XVECEXP (x, i, j)))
9889 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9897 tls_mentioned_p (rtx x)
9899 switch (GET_CODE (x))
9902 return tls_mentioned_p (XEXP (x, 0));
9905 if (XINT (x, 1) == UNSPEC_TLS)
9913 /* Must not copy any rtx that uses a pc-relative address. */
9916 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9918 if (GET_CODE (*x) == UNSPEC
9919 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9925 arm_cannot_copy_insn_p (rtx insn)
9927 /* The tls call insn cannot be copied, as it is paired with a data
9929 if (recog_memoized (insn) == CODE_FOR_tlscall)
9932 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9938 enum rtx_code code = GET_CODE (x);
9955 /* Return 1 if memory locations are adjacent. */
9957 adjacent_mem_locations (rtx a, rtx b)
9959 /* We don't guarantee to preserve the order of these memory refs. */
9960 if (volatile_refs_p (a) || volatile_refs_p (b))
9963 if ((GET_CODE (XEXP (a, 0)) == REG
9964 || (GET_CODE (XEXP (a, 0)) == PLUS
9965 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9966 && (GET_CODE (XEXP (b, 0)) == REG
9967 || (GET_CODE (XEXP (b, 0)) == PLUS
9968 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9970 HOST_WIDE_INT val0 = 0, val1 = 0;
9974 if (GET_CODE (XEXP (a, 0)) == PLUS)
9976 reg0 = XEXP (XEXP (a, 0), 0);
9977 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9982 if (GET_CODE (XEXP (b, 0)) == PLUS)
9984 reg1 = XEXP (XEXP (b, 0), 0);
9985 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9990 /* Don't accept any offset that will require multiple
9991 instructions to handle, since this would cause the
9992 arith_adjacentmem pattern to output an overlong sequence. */
9993 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9996 /* Don't allow an eliminable register: register elimination can make
9997 the offset too large. */
9998 if (arm_eliminable_register (reg0))
10001 val_diff = val1 - val0;
10005 /* If the target has load delay slots, then there's no benefit
10006 to using an ldm instruction unless the offset is zero and
10007 we are optimizing for size. */
10008 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10009 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10010 && (val_diff == 4 || val_diff == -4));
10013 return ((REGNO (reg0) == REGNO (reg1))
10014 && (val_diff == 4 || val_diff == -4));
10020 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10021 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10022 instruction. ADD_OFFSET is nonzero if the base address register needs
10023 to be modified with an add instruction before we can use it. */
10026 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10027 int nops, HOST_WIDE_INT add_offset)
10029 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10030 if the offset isn't small enough. The reason 2 ldrs are faster
10031 is because these ARMs are able to do more than one cache access
10032 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10033 whilst the ARM8 has a double bandwidth cache. This means that
10034 these cores can do both an instruction fetch and a data fetch in
10035 a single cycle, so the trick of calculating the address into a
10036 scratch register (one of the result regs) and then doing a load
10037 multiple actually becomes slower (and no smaller in code size).
10038 That is the transformation
10040 ldr rd1, [rbase + offset]
10041 ldr rd2, [rbase + offset + 4]
10045 add rd1, rbase, offset
10046 ldmia rd1, {rd1, rd2}
10048 produces worse code -- '3 cycles + any stalls on rd2' instead of
10049 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10050 access per cycle, the first sequence could never complete in less
10051 than 6 cycles, whereas the ldm sequence would only take 5 and
10052 would make better use of sequential accesses if not hitting the
10055 We cheat here and test 'arm_ld_sched' which we currently know to
10056 only be true for the ARM8, ARM9 and StrongARM. If this ever
10057 changes, then the test below needs to be reworked. */
10058 if (nops == 2 && arm_ld_sched && add_offset != 0)
10061 /* XScale has load-store double instructions, but they have stricter
10062 alignment requirements than load-store multiple, so we cannot
10065 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10066 the pipeline until completion.
10074 An ldr instruction takes 1-3 cycles, but does not block the
10083 Best case ldr will always win. However, the more ldr instructions
10084 we issue, the less likely we are to be able to schedule them well.
10085 Using ldr instructions also increases code size.
10087 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10088 for counts of 3 or 4 regs. */
10089 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10094 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10095 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10096 an array ORDER which describes the sequence to use when accessing the
10097 offsets that produces an ascending order. In this sequence, each
10098 offset must be larger by exactly 4 than the previous one. ORDER[0]
10099 must have been filled in with the lowest offset by the caller.
10100 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10101 we use to verify that ORDER produces an ascending order of registers.
10102 Return true if it was possible to construct such an order, false if
10106 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10107 int *unsorted_regs)
10110 for (i = 1; i < nops; i++)
10114 order[i] = order[i - 1];
10115 for (j = 0; j < nops; j++)
10116 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10118 /* We must find exactly one offset that is higher than the
10119 previous one by 4. */
10120 if (order[i] != order[i - 1])
10124 if (order[i] == order[i - 1])
10126 /* The register numbers must be ascending. */
10127 if (unsorted_regs != NULL
10128 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10134 /* Used to determine in a peephole whether a sequence of load
10135 instructions can be changed into a load-multiple instruction.
10136 NOPS is the number of separate load instructions we are examining. The
10137 first NOPS entries in OPERANDS are the destination registers, the
10138 next NOPS entries are memory operands. If this function is
10139 successful, *BASE is set to the common base register of the memory
10140 accesses; *LOAD_OFFSET is set to the first memory location's offset
10141 from that base register.
10142 REGS is an array filled in with the destination register numbers.
10143 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10144 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10145 the sequence of registers in REGS matches the loads from ascending memory
10146 locations, and the function verifies that the register numbers are
10147 themselves ascending. If CHECK_REGS is false, the register numbers
10148 are stored in the order they are found in the operands. */
10150 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10151 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10153 int unsorted_regs[MAX_LDM_STM_OPS];
10154 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10155 int order[MAX_LDM_STM_OPS];
10156 rtx base_reg_rtx = NULL;
10160 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10161 easily extended if required. */
10162 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10164 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10166 /* Loop over the operands and check that the memory references are
10167 suitable (i.e. immediate offsets from the same base register). At
10168 the same time, extract the target register, and the memory
10170 for (i = 0; i < nops; i++)
10175 /* Convert a subreg of a mem into the mem itself. */
10176 if (GET_CODE (operands[nops + i]) == SUBREG)
10177 operands[nops + i] = alter_subreg (operands + (nops + i));
10179 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10181 /* Don't reorder volatile memory references; it doesn't seem worth
10182 looking for the case where the order is ok anyway. */
10183 if (MEM_VOLATILE_P (operands[nops + i]))
10186 offset = const0_rtx;
10188 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10189 || (GET_CODE (reg) == SUBREG
10190 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10191 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10192 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10194 || (GET_CODE (reg) == SUBREG
10195 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10196 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10201 base_reg = REGNO (reg);
10202 base_reg_rtx = reg;
10203 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10206 else if (base_reg != (int) REGNO (reg))
10207 /* Not addressed from the same base register. */
10210 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10211 ? REGNO (operands[i])
10212 : REGNO (SUBREG_REG (operands[i])));
10214 /* If it isn't an integer register, or if it overwrites the
10215 base register but isn't the last insn in the list, then
10216 we can't do this. */
10217 if (unsorted_regs[i] < 0
10218 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10219 || unsorted_regs[i] > 14
10220 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10223 unsorted_offsets[i] = INTVAL (offset);
10224 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10228 /* Not a suitable memory address. */
10232 /* All the useful information has now been extracted from the
10233 operands into unsorted_regs and unsorted_offsets; additionally,
10234 order[0] has been set to the lowest offset in the list. Sort
10235 the offsets into order, verifying that they are adjacent, and
10236 check that the register numbers are ascending. */
10237 if (!compute_offset_order (nops, unsorted_offsets, order,
10238 check_regs ? unsorted_regs : NULL))
10242 memcpy (saved_order, order, sizeof order);
10248 for (i = 0; i < nops; i++)
10249 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10251 *load_offset = unsorted_offsets[order[0]];
10255 && !peep2_reg_dead_p (nops, base_reg_rtx))
10258 if (unsorted_offsets[order[0]] == 0)
10259 ldm_case = 1; /* ldmia */
10260 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10261 ldm_case = 2; /* ldmib */
10262 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10263 ldm_case = 3; /* ldmda */
10264 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10265 ldm_case = 4; /* ldmdb */
10266 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10267 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10272 if (!multiple_operation_profitable_p (false, nops,
10274 ? unsorted_offsets[order[0]] : 0))
10280 /* Used to determine in a peephole whether a sequence of store instructions can
10281 be changed into a store-multiple instruction.
10282 NOPS is the number of separate store instructions we are examining.
10283 NOPS_TOTAL is the total number of instructions recognized by the peephole
10285 The first NOPS entries in OPERANDS are the source registers, the next
10286 NOPS entries are memory operands. If this function is successful, *BASE is
10287 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10288 to the first memory location's offset from that base register. REGS is an
10289 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10290 likewise filled with the corresponding rtx's.
10291 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10292 numbers to an ascending order of stores.
10293 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10294 from ascending memory locations, and the function verifies that the register
10295 numbers are themselves ascending. If CHECK_REGS is false, the register
10296 numbers are stored in the order they are found in the operands. */
10298 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10299 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10300 HOST_WIDE_INT *load_offset, bool check_regs)
10302 int unsorted_regs[MAX_LDM_STM_OPS];
10303 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10304 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10305 int order[MAX_LDM_STM_OPS];
10307 rtx base_reg_rtx = NULL;
10310 /* Write back of base register is currently only supported for Thumb 1. */
10311 int base_writeback = TARGET_THUMB1;
10313 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10314 easily extended if required. */
10315 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10317 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10319 /* Loop over the operands and check that the memory references are
10320 suitable (i.e. immediate offsets from the same base register). At
10321 the same time, extract the target register, and the memory
10323 for (i = 0; i < nops; i++)
10328 /* Convert a subreg of a mem into the mem itself. */
10329 if (GET_CODE (operands[nops + i]) == SUBREG)
10330 operands[nops + i] = alter_subreg (operands + (nops + i));
10332 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10334 /* Don't reorder volatile memory references; it doesn't seem worth
10335 looking for the case where the order is ok anyway. */
10336 if (MEM_VOLATILE_P (operands[nops + i]))
10339 offset = const0_rtx;
10341 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10342 || (GET_CODE (reg) == SUBREG
10343 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10344 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10345 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10347 || (GET_CODE (reg) == SUBREG
10348 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10349 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10352 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10353 ? operands[i] : SUBREG_REG (operands[i]));
10354 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10358 base_reg = REGNO (reg);
10359 base_reg_rtx = reg;
10360 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10363 else if (base_reg != (int) REGNO (reg))
10364 /* Not addressed from the same base register. */
10367 /* If it isn't an integer register, then we can't do this. */
10368 if (unsorted_regs[i] < 0
10369 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10370 /* The effects are unpredictable if the base register is
10371 both updated and stored. */
10372 || (base_writeback && unsorted_regs[i] == base_reg)
10373 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10374 || unsorted_regs[i] > 14)
10377 unsorted_offsets[i] = INTVAL (offset);
10378 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10382 /* Not a suitable memory address. */
10386 /* All the useful information has now been extracted from the
10387 operands into unsorted_regs and unsorted_offsets; additionally,
10388 order[0] has been set to the lowest offset in the list. Sort
10389 the offsets into order, verifying that they are adjacent, and
10390 check that the register numbers are ascending. */
10391 if (!compute_offset_order (nops, unsorted_offsets, order,
10392 check_regs ? unsorted_regs : NULL))
10396 memcpy (saved_order, order, sizeof order);
10402 for (i = 0; i < nops; i++)
10404 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10406 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10409 *load_offset = unsorted_offsets[order[0]];
10413 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10416 if (unsorted_offsets[order[0]] == 0)
10417 stm_case = 1; /* stmia */
10418 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10419 stm_case = 2; /* stmib */
10420 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10421 stm_case = 3; /* stmda */
10422 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10423 stm_case = 4; /* stmdb */
10427 if (!multiple_operation_profitable_p (false, nops, 0))
10433 /* Routines for use in generating RTL. */
10435 /* Generate a load-multiple instruction. COUNT is the number of loads in
10436 the instruction; REGS and MEMS are arrays containing the operands.
10437 BASEREG is the base register to be used in addressing the memory operands.
10438 WBACK_OFFSET is nonzero if the instruction should update the base
10442 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10443 HOST_WIDE_INT wback_offset)
10448 if (!multiple_operation_profitable_p (false, count, 0))
10454 for (i = 0; i < count; i++)
10455 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10457 if (wback_offset != 0)
10458 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10460 seq = get_insns ();
10466 result = gen_rtx_PARALLEL (VOIDmode,
10467 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10468 if (wback_offset != 0)
10470 XVECEXP (result, 0, 0)
10471 = gen_rtx_SET (VOIDmode, basereg,
10472 plus_constant (basereg, wback_offset));
10477 for (j = 0; i < count; i++, j++)
10478 XVECEXP (result, 0, i)
10479 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10484 /* Generate a store-multiple instruction. COUNT is the number of stores in
10485 the instruction; REGS and MEMS are arrays containing the operands.
10486 BASEREG is the base register to be used in addressing the memory operands.
10487 WBACK_OFFSET is nonzero if the instruction should update the base
10491 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10492 HOST_WIDE_INT wback_offset)
10497 if (GET_CODE (basereg) == PLUS)
10498 basereg = XEXP (basereg, 0);
10500 if (!multiple_operation_profitable_p (false, count, 0))
10506 for (i = 0; i < count; i++)
10507 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10509 if (wback_offset != 0)
10510 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10512 seq = get_insns ();
10518 result = gen_rtx_PARALLEL (VOIDmode,
10519 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10520 if (wback_offset != 0)
10522 XVECEXP (result, 0, 0)
10523 = gen_rtx_SET (VOIDmode, basereg,
10524 plus_constant (basereg, wback_offset));
10529 for (j = 0; i < count; i++, j++)
10530 XVECEXP (result, 0, i)
10531 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10536 /* Generate either a load-multiple or a store-multiple instruction. This
10537 function can be used in situations where we can start with a single MEM
10538 rtx and adjust its address upwards.
10539 COUNT is the number of operations in the instruction, not counting a
10540 possible update of the base register. REGS is an array containing the
10542 BASEREG is the base register to be used in addressing the memory operands,
10543 which are constructed from BASEMEM.
10544 WRITE_BACK specifies whether the generated instruction should include an
10545 update of the base register.
10546 OFFSETP is used to pass an offset to and from this function; this offset
10547 is not used when constructing the address (instead BASEMEM should have an
10548 appropriate offset in its address), it is used only for setting
10549 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10552 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10553 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10555 rtx mems[MAX_LDM_STM_OPS];
10556 HOST_WIDE_INT offset = *offsetp;
10559 gcc_assert (count <= MAX_LDM_STM_OPS);
10561 if (GET_CODE (basereg) == PLUS)
10562 basereg = XEXP (basereg, 0);
10564 for (i = 0; i < count; i++)
10566 rtx addr = plus_constant (basereg, i * 4);
10567 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10575 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10576 write_back ? 4 * count : 0);
10578 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10579 write_back ? 4 * count : 0);
10583 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10584 rtx basemem, HOST_WIDE_INT *offsetp)
10586 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10591 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10592 rtx basemem, HOST_WIDE_INT *offsetp)
10594 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10598 /* Called from a peephole2 expander to turn a sequence of loads into an
10599 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10600 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10601 is true if we can reorder the registers because they are used commutatively
10603 Returns true iff we could generate a new instruction. */
10606 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10608 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10609 rtx mems[MAX_LDM_STM_OPS];
10610 int i, j, base_reg;
10612 HOST_WIDE_INT offset;
10613 int write_back = FALSE;
10617 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10618 &base_reg, &offset, !sort_regs);
10624 for (i = 0; i < nops - 1; i++)
10625 for (j = i + 1; j < nops; j++)
10626 if (regs[i] > regs[j])
10632 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10636 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10637 gcc_assert (ldm_case == 1 || ldm_case == 5);
10643 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10644 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10646 if (!TARGET_THUMB1)
10648 base_reg = regs[0];
10649 base_reg_rtx = newbase;
10653 for (i = 0; i < nops; i++)
10655 addr = plus_constant (base_reg_rtx, offset + i * 4);
10656 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10659 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10660 write_back ? offset + i * 4 : 0));
10664 /* Called from a peephole2 expander to turn a sequence of stores into an
10665 STM instruction. OPERANDS are the operands found by the peephole matcher;
10666 NOPS indicates how many separate stores we are trying to combine.
10667 Returns true iff we could generate a new instruction. */
10670 gen_stm_seq (rtx *operands, int nops)
10673 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10674 rtx mems[MAX_LDM_STM_OPS];
10677 HOST_WIDE_INT offset;
10678 int write_back = FALSE;
10681 bool base_reg_dies;
10683 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10684 mem_order, &base_reg, &offset, true);
10689 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10691 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10694 gcc_assert (base_reg_dies);
10700 gcc_assert (base_reg_dies);
10701 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10705 addr = plus_constant (base_reg_rtx, offset);
10707 for (i = 0; i < nops; i++)
10709 addr = plus_constant (base_reg_rtx, offset + i * 4);
10710 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10713 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10714 write_back ? offset + i * 4 : 0));
10718 /* Called from a peephole2 expander to turn a sequence of stores that are
10719 preceded by constant loads into an STM instruction. OPERANDS are the
10720 operands found by the peephole matcher; NOPS indicates how many
10721 separate stores we are trying to combine; there are 2 * NOPS
10722 instructions in the peephole.
10723 Returns true iff we could generate a new instruction. */
10726 gen_const_stm_seq (rtx *operands, int nops)
10728 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10729 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10730 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10731 rtx mems[MAX_LDM_STM_OPS];
10734 HOST_WIDE_INT offset;
10735 int write_back = FALSE;
10738 bool base_reg_dies;
10740 HARD_REG_SET allocated;
10742 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10743 mem_order, &base_reg, &offset, false);
10748 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10750 /* If the same register is used more than once, try to find a free
10752 CLEAR_HARD_REG_SET (allocated);
10753 for (i = 0; i < nops; i++)
10755 for (j = i + 1; j < nops; j++)
10756 if (regs[i] == regs[j])
10758 rtx t = peep2_find_free_register (0, nops * 2,
10759 TARGET_THUMB1 ? "l" : "r",
10760 SImode, &allocated);
10764 regs[i] = REGNO (t);
10768 /* Compute an ordering that maps the register numbers to an ascending
10771 for (i = 0; i < nops; i++)
10772 if (regs[i] < regs[reg_order[0]])
10775 for (i = 1; i < nops; i++)
10777 int this_order = reg_order[i - 1];
10778 for (j = 0; j < nops; j++)
10779 if (regs[j] > regs[reg_order[i - 1]]
10780 && (this_order == reg_order[i - 1]
10781 || regs[j] < regs[this_order]))
10783 reg_order[i] = this_order;
10786 /* Ensure that registers that must be live after the instruction end
10787 up with the correct value. */
10788 for (i = 0; i < nops; i++)
10790 int this_order = reg_order[i];
10791 if ((this_order != mem_order[i]
10792 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10793 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10797 /* Load the constants. */
10798 for (i = 0; i < nops; i++)
10800 rtx op = operands[2 * nops + mem_order[i]];
10801 sorted_regs[i] = regs[reg_order[i]];
10802 emit_move_insn (reg_rtxs[reg_order[i]], op);
10805 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10807 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10810 gcc_assert (base_reg_dies);
10816 gcc_assert (base_reg_dies);
10817 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10821 addr = plus_constant (base_reg_rtx, offset);
10823 for (i = 0; i < nops; i++)
10825 addr = plus_constant (base_reg_rtx, offset + i * 4);
10826 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10829 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10830 write_back ? offset + i * 4 : 0));
10834 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10835 unaligned copies on processors which support unaligned semantics for those
10836 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10837 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10838 An interleave factor of 1 (the minimum) will perform no interleaving.
10839 Load/store multiple are used for aligned addresses where possible. */
10842 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10843 HOST_WIDE_INT length,
10844 unsigned int interleave_factor)
10846 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10847 int *regnos = XALLOCAVEC (int, interleave_factor);
10848 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10849 HOST_WIDE_INT i, j;
10850 HOST_WIDE_INT remaining = length, words;
10851 rtx halfword_tmp = NULL, byte_tmp = NULL;
10853 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10854 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10855 HOST_WIDE_INT srcoffset, dstoffset;
10856 HOST_WIDE_INT src_autoinc, dst_autoinc;
10859 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10861 /* Use hard registers if we have aligned source or destination so we can use
10862 load/store multiple with contiguous registers. */
10863 if (dst_aligned || src_aligned)
10864 for (i = 0; i < interleave_factor; i++)
10865 regs[i] = gen_rtx_REG (SImode, i);
10867 for (i = 0; i < interleave_factor; i++)
10868 regs[i] = gen_reg_rtx (SImode);
10870 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10871 src = copy_addr_to_reg (XEXP (srcbase, 0));
10873 srcoffset = dstoffset = 0;
10875 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10876 For copying the last bytes we want to subtract this offset again. */
10877 src_autoinc = dst_autoinc = 0;
10879 for (i = 0; i < interleave_factor; i++)
10882 /* Copy BLOCK_SIZE_BYTES chunks. */
10884 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10887 if (src_aligned && interleave_factor > 1)
10889 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10890 TRUE, srcbase, &srcoffset));
10891 src_autoinc += UNITS_PER_WORD * interleave_factor;
10895 for (j = 0; j < interleave_factor; j++)
10897 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10899 mem = adjust_automodify_address (srcbase, SImode, addr,
10900 srcoffset + j * UNITS_PER_WORD);
10901 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10903 srcoffset += block_size_bytes;
10907 if (dst_aligned && interleave_factor > 1)
10909 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10910 TRUE, dstbase, &dstoffset));
10911 dst_autoinc += UNITS_PER_WORD * interleave_factor;
10915 for (j = 0; j < interleave_factor; j++)
10917 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
10919 mem = adjust_automodify_address (dstbase, SImode, addr,
10920 dstoffset + j * UNITS_PER_WORD);
10921 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10923 dstoffset += block_size_bytes;
10926 remaining -= block_size_bytes;
10929 /* Copy any whole words left (note these aren't interleaved with any
10930 subsequent halfword/byte load/stores in the interests of simplicity). */
10932 words = remaining / UNITS_PER_WORD;
10934 gcc_assert (words < interleave_factor);
10936 if (src_aligned && words > 1)
10938 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
10940 src_autoinc += UNITS_PER_WORD * words;
10944 for (j = 0; j < words; j++)
10946 addr = plus_constant (src,
10947 srcoffset + j * UNITS_PER_WORD - src_autoinc);
10948 mem = adjust_automodify_address (srcbase, SImode, addr,
10949 srcoffset + j * UNITS_PER_WORD);
10950 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10952 srcoffset += words * UNITS_PER_WORD;
10955 if (dst_aligned && words > 1)
10957 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
10959 dst_autoinc += words * UNITS_PER_WORD;
10963 for (j = 0; j < words; j++)
10965 addr = plus_constant (dst,
10966 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
10967 mem = adjust_automodify_address (dstbase, SImode, addr,
10968 dstoffset + j * UNITS_PER_WORD);
10969 emit_insn (gen_unaligned_storesi (mem, regs[j]));
10971 dstoffset += words * UNITS_PER_WORD;
10974 remaining -= words * UNITS_PER_WORD;
10976 gcc_assert (remaining < 4);
10978 /* Copy a halfword if necessary. */
10980 if (remaining >= 2)
10982 halfword_tmp = gen_reg_rtx (SImode);
10984 addr = plus_constant (src, srcoffset - src_autoinc);
10985 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
10986 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
10988 /* Either write out immediately, or delay until we've loaded the last
10989 byte, depending on interleave factor. */
10990 if (interleave_factor == 1)
10992 addr = plus_constant (dst, dstoffset - dst_autoinc);
10993 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
10994 emit_insn (gen_unaligned_storehi (mem,
10995 gen_lowpart (HImode, halfword_tmp)));
10996 halfword_tmp = NULL;
11004 gcc_assert (remaining < 2);
11006 /* Copy last byte. */
11008 if ((remaining & 1) != 0)
11010 byte_tmp = gen_reg_rtx (SImode);
11012 addr = plus_constant (src, srcoffset - src_autoinc);
11013 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11014 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11016 if (interleave_factor == 1)
11018 addr = plus_constant (dst, dstoffset - dst_autoinc);
11019 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11020 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11029 /* Store last halfword if we haven't done so already. */
11033 addr = plus_constant (dst, dstoffset - dst_autoinc);
11034 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11035 emit_insn (gen_unaligned_storehi (mem,
11036 gen_lowpart (HImode, halfword_tmp)));
11040 /* Likewise for last byte. */
11044 addr = plus_constant (dst, dstoffset - dst_autoinc);
11045 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11046 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11050 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11053 /* From mips_adjust_block_mem:
11055 Helper function for doing a loop-based block operation on memory
11056 reference MEM. Each iteration of the loop will operate on LENGTH
11059 Create a new base register for use within the loop and point it to
11060 the start of MEM. Create a new memory reference that uses this
11061 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11064 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11067 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11069 /* Although the new mem does not refer to a known location,
11070 it does keep up to LENGTH bytes of alignment. */
11071 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11072 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11075 /* From mips_block_move_loop:
11077 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11078 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11079 the memory regions do not overlap. */
11082 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11083 unsigned int interleave_factor,
11084 HOST_WIDE_INT bytes_per_iter)
11086 rtx label, src_reg, dest_reg, final_src, test;
11087 HOST_WIDE_INT leftover;
11089 leftover = length % bytes_per_iter;
11090 length -= leftover;
11092 /* Create registers and memory references for use within the loop. */
11093 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11094 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11096 /* Calculate the value that SRC_REG should have after the last iteration of
11098 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11099 0, 0, OPTAB_WIDEN);
11101 /* Emit the start of the loop. */
11102 label = gen_label_rtx ();
11103 emit_label (label);
11105 /* Emit the loop body. */
11106 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11107 interleave_factor);
11109 /* Move on to the next block. */
11110 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11111 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11113 /* Emit the loop condition. */
11114 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11115 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11117 /* Mop up any left-over bytes. */
11119 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11122 /* Emit a block move when either the source or destination is unaligned (not
11123 aligned to a four-byte boundary). This may need further tuning depending on
11124 core type, optimize_size setting, etc. */
11127 arm_movmemqi_unaligned (rtx *operands)
11129 HOST_WIDE_INT length = INTVAL (operands[2]);
11133 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11134 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11135 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11136 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11137 or dst_aligned though: allow more interleaving in those cases since the
11138 resulting code can be smaller. */
11139 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11140 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11143 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11144 interleave_factor, bytes_per_iter);
11146 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11147 interleave_factor);
11151 /* Note that the loop created by arm_block_move_unaligned_loop may be
11152 subject to loop unrolling, which makes tuning this condition a little
11155 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11157 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11164 arm_gen_movmemqi (rtx *operands)
11166 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11167 HOST_WIDE_INT srcoffset, dstoffset;
11169 rtx src, dst, srcbase, dstbase;
11170 rtx part_bytes_reg = NULL;
11173 if (GET_CODE (operands[2]) != CONST_INT
11174 || GET_CODE (operands[3]) != CONST_INT
11175 || INTVAL (operands[2]) > 64)
11178 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11179 return arm_movmemqi_unaligned (operands);
11181 if (INTVAL (operands[3]) & 3)
11184 dstbase = operands[0];
11185 srcbase = operands[1];
11187 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11188 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11190 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11191 out_words_to_go = INTVAL (operands[2]) / 4;
11192 last_bytes = INTVAL (operands[2]) & 3;
11193 dstoffset = srcoffset = 0;
11195 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11196 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11198 for (i = 0; in_words_to_go >= 2; i+=4)
11200 if (in_words_to_go > 4)
11201 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11202 TRUE, srcbase, &srcoffset));
11204 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11205 src, FALSE, srcbase,
11208 if (out_words_to_go)
11210 if (out_words_to_go > 4)
11211 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11212 TRUE, dstbase, &dstoffset));
11213 else if (out_words_to_go != 1)
11214 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11215 out_words_to_go, dst,
11218 dstbase, &dstoffset));
11221 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11222 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11223 if (last_bytes != 0)
11225 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11231 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11232 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11235 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11236 if (out_words_to_go)
11240 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11241 sreg = copy_to_reg (mem);
11243 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11244 emit_move_insn (mem, sreg);
11247 gcc_assert (!in_words_to_go); /* Sanity check */
11250 if (in_words_to_go)
11252 gcc_assert (in_words_to_go > 0);
11254 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11255 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11258 gcc_assert (!last_bytes || part_bytes_reg);
11260 if (BYTES_BIG_ENDIAN && last_bytes)
11262 rtx tmp = gen_reg_rtx (SImode);
11264 /* The bytes we want are in the top end of the word. */
11265 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11266 GEN_INT (8 * (4 - last_bytes))));
11267 part_bytes_reg = tmp;
11271 mem = adjust_automodify_address (dstbase, QImode,
11272 plus_constant (dst, last_bytes - 1),
11273 dstoffset + last_bytes - 1);
11274 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11278 tmp = gen_reg_rtx (SImode);
11279 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11280 part_bytes_reg = tmp;
11287 if (last_bytes > 1)
11289 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11290 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11294 rtx tmp = gen_reg_rtx (SImode);
11295 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11296 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11297 part_bytes_reg = tmp;
11304 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11305 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11312 /* Select a dominance comparison mode if possible for a test of the general
11313 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11314 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11315 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11316 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11317 In all cases OP will be either EQ or NE, but we don't need to know which
11318 here. If we are unable to support a dominance comparison we return
11319 CC mode. This will then fail to match for the RTL expressions that
11320 generate this call. */
11322 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11324 enum rtx_code cond1, cond2;
11327 /* Currently we will probably get the wrong result if the individual
11328 comparisons are not simple. This also ensures that it is safe to
11329 reverse a comparison if necessary. */
11330 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11332 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11336 /* The if_then_else variant of this tests the second condition if the
11337 first passes, but is true if the first fails. Reverse the first
11338 condition to get a true "inclusive-or" expression. */
11339 if (cond_or == DOM_CC_NX_OR_Y)
11340 cond1 = reverse_condition (cond1);
11342 /* If the comparisons are not equal, and one doesn't dominate the other,
11343 then we can't do this. */
11345 && !comparison_dominates_p (cond1, cond2)
11346 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11351 enum rtx_code temp = cond1;
11359 if (cond_or == DOM_CC_X_AND_Y)
11364 case EQ: return CC_DEQmode;
11365 case LE: return CC_DLEmode;
11366 case LEU: return CC_DLEUmode;
11367 case GE: return CC_DGEmode;
11368 case GEU: return CC_DGEUmode;
11369 default: gcc_unreachable ();
11373 if (cond_or == DOM_CC_X_AND_Y)
11385 gcc_unreachable ();
11389 if (cond_or == DOM_CC_X_AND_Y)
11401 gcc_unreachable ();
11405 if (cond_or == DOM_CC_X_AND_Y)
11406 return CC_DLTUmode;
11411 return CC_DLTUmode;
11413 return CC_DLEUmode;
11417 gcc_unreachable ();
11421 if (cond_or == DOM_CC_X_AND_Y)
11422 return CC_DGTUmode;
11427 return CC_DGTUmode;
11429 return CC_DGEUmode;
11433 gcc_unreachable ();
11436 /* The remaining cases only occur when both comparisons are the
11439 gcc_assert (cond1 == cond2);
11443 gcc_assert (cond1 == cond2);
11447 gcc_assert (cond1 == cond2);
11451 gcc_assert (cond1 == cond2);
11452 return CC_DLEUmode;
11455 gcc_assert (cond1 == cond2);
11456 return CC_DGEUmode;
11459 gcc_unreachable ();
11464 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11466 /* All floating point compares return CCFP if it is an equality
11467 comparison, and CCFPE otherwise. */
11468 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11488 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11493 gcc_unreachable ();
11497 /* A compare with a shifted operand. Because of canonicalization, the
11498 comparison will have to be swapped when we emit the assembler. */
11499 if (GET_MODE (y) == SImode
11500 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11501 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11502 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11503 || GET_CODE (x) == ROTATERT))
11506 /* This operation is performed swapped, but since we only rely on the Z
11507 flag we don't need an additional mode. */
11508 if (GET_MODE (y) == SImode
11509 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11510 && GET_CODE (x) == NEG
11511 && (op == EQ || op == NE))
11514 /* This is a special case that is used by combine to allow a
11515 comparison of a shifted byte load to be split into a zero-extend
11516 followed by a comparison of the shifted integer (only valid for
11517 equalities and unsigned inequalities). */
11518 if (GET_MODE (x) == SImode
11519 && GET_CODE (x) == ASHIFT
11520 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11521 && GET_CODE (XEXP (x, 0)) == SUBREG
11522 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11523 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11524 && (op == EQ || op == NE
11525 || op == GEU || op == GTU || op == LTU || op == LEU)
11526 && GET_CODE (y) == CONST_INT)
11529 /* A construct for a conditional compare, if the false arm contains
11530 0, then both conditions must be true, otherwise either condition
11531 must be true. Not all conditions are possible, so CCmode is
11532 returned if it can't be done. */
11533 if (GET_CODE (x) == IF_THEN_ELSE
11534 && (XEXP (x, 2) == const0_rtx
11535 || XEXP (x, 2) == const1_rtx)
11536 && COMPARISON_P (XEXP (x, 0))
11537 && COMPARISON_P (XEXP (x, 1)))
11538 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11539 INTVAL (XEXP (x, 2)));
11541 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11542 if (GET_CODE (x) == AND
11543 && (op == EQ || op == NE)
11544 && COMPARISON_P (XEXP (x, 0))
11545 && COMPARISON_P (XEXP (x, 1)))
11546 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11549 if (GET_CODE (x) == IOR
11550 && (op == EQ || op == NE)
11551 && COMPARISON_P (XEXP (x, 0))
11552 && COMPARISON_P (XEXP (x, 1)))
11553 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11556 /* An operation (on Thumb) where we want to test for a single bit.
11557 This is done by shifting that bit up into the top bit of a
11558 scratch register; we can then branch on the sign bit. */
11560 && GET_MODE (x) == SImode
11561 && (op == EQ || op == NE)
11562 && GET_CODE (x) == ZERO_EXTRACT
11563 && XEXP (x, 1) == const1_rtx)
11566 /* An operation that sets the condition codes as a side-effect, the
11567 V flag is not set correctly, so we can only use comparisons where
11568 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11570 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11571 if (GET_MODE (x) == SImode
11573 && (op == EQ || op == NE || op == LT || op == GE)
11574 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11575 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11576 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11577 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11578 || GET_CODE (x) == LSHIFTRT
11579 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11580 || GET_CODE (x) == ROTATERT
11581 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11582 return CC_NOOVmode;
11584 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11587 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11588 && GET_CODE (x) == PLUS
11589 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11592 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11594 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11596 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11603 /* A DImode comparison against zero can be implemented by
11604 or'ing the two halves together. */
11605 if (y == const0_rtx)
11608 /* We can do an equality test in three Thumb instructions. */
11618 /* DImode unsigned comparisons can be implemented by cmp +
11619 cmpeq without a scratch register. Not worth doing in
11630 /* DImode signed and unsigned comparisons can be implemented
11631 by cmp + sbcs with a scratch register, but that does not
11632 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11633 gcc_assert (op != EQ && op != NE);
11637 gcc_unreachable ();
11644 /* X and Y are two things to compare using CODE. Emit the compare insn and
11645 return the rtx for register 0 in the proper mode. FP means this is a
11646 floating point compare: I don't think that it is needed on the arm. */
11648 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11650 enum machine_mode mode;
11652 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11654 /* We might have X as a constant, Y as a register because of the predicates
11655 used for cmpdi. If so, force X to a register here. */
11656 if (dimode_comparison && !REG_P (x))
11657 x = force_reg (DImode, x);
11659 mode = SELECT_CC_MODE (code, x, y);
11660 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11662 if (dimode_comparison
11663 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11664 && mode != CC_CZmode)
11668 /* To compare two non-zero values for equality, XOR them and
11669 then compare against zero. Not used for ARM mode; there
11670 CC_CZmode is cheaper. */
11671 if (mode == CC_Zmode && y != const0_rtx)
11673 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11676 /* A scratch register is required. */
11677 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11678 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11679 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11682 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11687 /* Generate a sequence of insns that will generate the correct return
11688 address mask depending on the physical architecture that the program
11691 arm_gen_return_addr_mask (void)
11693 rtx reg = gen_reg_rtx (Pmode);
11695 emit_insn (gen_return_addr_mask (reg));
11700 arm_reload_in_hi (rtx *operands)
11702 rtx ref = operands[1];
11704 HOST_WIDE_INT offset = 0;
11706 if (GET_CODE (ref) == SUBREG)
11708 offset = SUBREG_BYTE (ref);
11709 ref = SUBREG_REG (ref);
11712 if (GET_CODE (ref) == REG)
11714 /* We have a pseudo which has been spilt onto the stack; there
11715 are two cases here: the first where there is a simple
11716 stack-slot replacement and a second where the stack-slot is
11717 out of range, or is used as a subreg. */
11718 if (reg_equiv_mem (REGNO (ref)))
11720 ref = reg_equiv_mem (REGNO (ref));
11721 base = find_replacement (&XEXP (ref, 0));
11724 /* The slot is out of range, or was dressed up in a SUBREG. */
11725 base = reg_equiv_address (REGNO (ref));
11728 base = find_replacement (&XEXP (ref, 0));
11730 /* Handle the case where the address is too complex to be offset by 1. */
11731 if (GET_CODE (base) == MINUS
11732 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11734 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11736 emit_set_insn (base_plus, base);
11739 else if (GET_CODE (base) == PLUS)
11741 /* The addend must be CONST_INT, or we would have dealt with it above. */
11742 HOST_WIDE_INT hi, lo;
11744 offset += INTVAL (XEXP (base, 1));
11745 base = XEXP (base, 0);
11747 /* Rework the address into a legal sequence of insns. */
11748 /* Valid range for lo is -4095 -> 4095 */
11751 : -((-offset) & 0xfff));
11753 /* Corner case, if lo is the max offset then we would be out of range
11754 once we have added the additional 1 below, so bump the msb into the
11755 pre-loading insn(s). */
11759 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11760 ^ (HOST_WIDE_INT) 0x80000000)
11761 - (HOST_WIDE_INT) 0x80000000);
11763 gcc_assert (hi + lo == offset);
11767 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11769 /* Get the base address; addsi3 knows how to handle constants
11770 that require more than one insn. */
11771 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11777 /* Operands[2] may overlap operands[0] (though it won't overlap
11778 operands[1]), that's why we asked for a DImode reg -- so we can
11779 use the bit that does not overlap. */
11780 if (REGNO (operands[2]) == REGNO (operands[0]))
11781 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11783 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11785 emit_insn (gen_zero_extendqisi2 (scratch,
11786 gen_rtx_MEM (QImode,
11787 plus_constant (base,
11789 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11790 gen_rtx_MEM (QImode,
11791 plus_constant (base,
11793 if (!BYTES_BIG_ENDIAN)
11794 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11795 gen_rtx_IOR (SImode,
11798 gen_rtx_SUBREG (SImode, operands[0], 0),
11802 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11803 gen_rtx_IOR (SImode,
11804 gen_rtx_ASHIFT (SImode, scratch,
11806 gen_rtx_SUBREG (SImode, operands[0], 0)));
11809 /* Handle storing a half-word to memory during reload by synthesizing as two
11810 byte stores. Take care not to clobber the input values until after we
11811 have moved them somewhere safe. This code assumes that if the DImode
11812 scratch in operands[2] overlaps either the input value or output address
11813 in some way, then that value must die in this insn (we absolutely need
11814 two scratch registers for some corner cases). */
11816 arm_reload_out_hi (rtx *operands)
11818 rtx ref = operands[0];
11819 rtx outval = operands[1];
11821 HOST_WIDE_INT offset = 0;
11823 if (GET_CODE (ref) == SUBREG)
11825 offset = SUBREG_BYTE (ref);
11826 ref = SUBREG_REG (ref);
11829 if (GET_CODE (ref) == REG)
11831 /* We have a pseudo which has been spilt onto the stack; there
11832 are two cases here: the first where there is a simple
11833 stack-slot replacement and a second where the stack-slot is
11834 out of range, or is used as a subreg. */
11835 if (reg_equiv_mem (REGNO (ref)))
11837 ref = reg_equiv_mem (REGNO (ref));
11838 base = find_replacement (&XEXP (ref, 0));
11841 /* The slot is out of range, or was dressed up in a SUBREG. */
11842 base = reg_equiv_address (REGNO (ref));
11845 base = find_replacement (&XEXP (ref, 0));
11847 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11849 /* Handle the case where the address is too complex to be offset by 1. */
11850 if (GET_CODE (base) == MINUS
11851 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11853 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11855 /* Be careful not to destroy OUTVAL. */
11856 if (reg_overlap_mentioned_p (base_plus, outval))
11858 /* Updating base_plus might destroy outval, see if we can
11859 swap the scratch and base_plus. */
11860 if (!reg_overlap_mentioned_p (scratch, outval))
11863 scratch = base_plus;
11868 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11870 /* Be conservative and copy OUTVAL into the scratch now,
11871 this should only be necessary if outval is a subreg
11872 of something larger than a word. */
11873 /* XXX Might this clobber base? I can't see how it can,
11874 since scratch is known to overlap with OUTVAL, and
11875 must be wider than a word. */
11876 emit_insn (gen_movhi (scratch_hi, outval));
11877 outval = scratch_hi;
11881 emit_set_insn (base_plus, base);
11884 else if (GET_CODE (base) == PLUS)
11886 /* The addend must be CONST_INT, or we would have dealt with it above. */
11887 HOST_WIDE_INT hi, lo;
11889 offset += INTVAL (XEXP (base, 1));
11890 base = XEXP (base, 0);
11892 /* Rework the address into a legal sequence of insns. */
11893 /* Valid range for lo is -4095 -> 4095 */
11896 : -((-offset) & 0xfff));
11898 /* Corner case, if lo is the max offset then we would be out of range
11899 once we have added the additional 1 below, so bump the msb into the
11900 pre-loading insn(s). */
11904 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11905 ^ (HOST_WIDE_INT) 0x80000000)
11906 - (HOST_WIDE_INT) 0x80000000);
11908 gcc_assert (hi + lo == offset);
11912 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11914 /* Be careful not to destroy OUTVAL. */
11915 if (reg_overlap_mentioned_p (base_plus, outval))
11917 /* Updating base_plus might destroy outval, see if we
11918 can swap the scratch and base_plus. */
11919 if (!reg_overlap_mentioned_p (scratch, outval))
11922 scratch = base_plus;
11927 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11929 /* Be conservative and copy outval into scratch now,
11930 this should only be necessary if outval is a
11931 subreg of something larger than a word. */
11932 /* XXX Might this clobber base? I can't see how it
11933 can, since scratch is known to overlap with
11935 emit_insn (gen_movhi (scratch_hi, outval));
11936 outval = scratch_hi;
11940 /* Get the base address; addsi3 knows how to handle constants
11941 that require more than one insn. */
11942 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11948 if (BYTES_BIG_ENDIAN)
11950 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11951 plus_constant (base, offset + 1)),
11952 gen_lowpart (QImode, outval)));
11953 emit_insn (gen_lshrsi3 (scratch,
11954 gen_rtx_SUBREG (SImode, outval, 0),
11956 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11957 gen_lowpart (QImode, scratch)));
11961 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11962 gen_lowpart (QImode, outval)));
11963 emit_insn (gen_lshrsi3 (scratch,
11964 gen_rtx_SUBREG (SImode, outval, 0),
11966 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11967 plus_constant (base, offset + 1)),
11968 gen_lowpart (QImode, scratch)));
11972 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11973 (padded to the size of a word) should be passed in a register. */
11976 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11978 if (TARGET_AAPCS_BASED)
11979 return must_pass_in_stack_var_size (mode, type);
11981 return must_pass_in_stack_var_size_or_pad (mode, type);
11985 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11986 Return true if an argument passed on the stack should be padded upwards,
11987 i.e. if the least-significant byte has useful data.
11988 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11989 aggregate types are placed in the lowest memory address. */
11992 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
11994 if (!TARGET_AAPCS_BASED)
11995 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11997 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12004 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12005 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12006 register has useful data, and return the opposite if the most
12007 significant byte does. */
12010 arm_pad_reg_upward (enum machine_mode mode,
12011 tree type, int first ATTRIBUTE_UNUSED)
12013 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12015 /* For AAPCS, small aggregates, small fixed-point types,
12016 and small complex types are always padded upwards. */
12019 if ((AGGREGATE_TYPE_P (type)
12020 || TREE_CODE (type) == COMPLEX_TYPE
12021 || FIXED_POINT_TYPE_P (type))
12022 && int_size_in_bytes (type) <= 4)
12027 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12028 && GET_MODE_SIZE (mode) <= 4)
12033 /* Otherwise, use default padding. */
12034 return !BYTES_BIG_ENDIAN;
12038 /* Print a symbolic form of X to the debug file, F. */
12040 arm_print_value (FILE *f, rtx x)
12042 switch (GET_CODE (x))
12045 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12049 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12057 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12059 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12060 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12068 fprintf (f, "\"%s\"", XSTR (x, 0));
12072 fprintf (f, "`%s'", XSTR (x, 0));
12076 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12080 arm_print_value (f, XEXP (x, 0));
12084 arm_print_value (f, XEXP (x, 0));
12086 arm_print_value (f, XEXP (x, 1));
12094 fprintf (f, "????");
12099 /* Routines for manipulation of the constant pool. */
12101 /* Arm instructions cannot load a large constant directly into a
12102 register; they have to come from a pc relative load. The constant
12103 must therefore be placed in the addressable range of the pc
12104 relative load. Depending on the precise pc relative load
12105 instruction the range is somewhere between 256 bytes and 4k. This
12106 means that we often have to dump a constant inside a function, and
12107 generate code to branch around it.
12109 It is important to minimize this, since the branches will slow
12110 things down and make the code larger.
12112 Normally we can hide the table after an existing unconditional
12113 branch so that there is no interruption of the flow, but in the
12114 worst case the code looks like this:
12132 We fix this by performing a scan after scheduling, which notices
12133 which instructions need to have their operands fetched from the
12134 constant table and builds the table.
12136 The algorithm starts by building a table of all the constants that
12137 need fixing up and all the natural barriers in the function (places
12138 where a constant table can be dropped without breaking the flow).
12139 For each fixup we note how far the pc-relative replacement will be
12140 able to reach and the offset of the instruction into the function.
12142 Having built the table we then group the fixes together to form
12143 tables that are as large as possible (subject to addressing
12144 constraints) and emit each table of constants after the last
12145 barrier that is within range of all the instructions in the group.
12146 If a group does not contain a barrier, then we forcibly create one
12147 by inserting a jump instruction into the flow. Once the table has
12148 been inserted, the insns are then modified to reference the
12149 relevant entry in the pool.
12151 Possible enhancements to the algorithm (not implemented) are:
12153 1) For some processors and object formats, there may be benefit in
12154 aligning the pools to the start of cache lines; this alignment
12155 would need to be taken into account when calculating addressability
12158 /* These typedefs are located at the start of this file, so that
12159 they can be used in the prototypes there. This comment is to
12160 remind readers of that fact so that the following structures
12161 can be understood more easily.
12163 typedef struct minipool_node Mnode;
12164 typedef struct minipool_fixup Mfix; */
12166 struct minipool_node
12168 /* Doubly linked chain of entries. */
12171 /* The maximum offset into the code that this entry can be placed. While
12172 pushing fixes for forward references, all entries are sorted in order
12173 of increasing max_address. */
12174 HOST_WIDE_INT max_address;
12175 /* Similarly for an entry inserted for a backwards ref. */
12176 HOST_WIDE_INT min_address;
12177 /* The number of fixes referencing this entry. This can become zero
12178 if we "unpush" an entry. In this case we ignore the entry when we
12179 come to emit the code. */
12181 /* The offset from the start of the minipool. */
12182 HOST_WIDE_INT offset;
12183 /* The value in table. */
12185 /* The mode of value. */
12186 enum machine_mode mode;
12187 /* The size of the value. With iWMMXt enabled
12188 sizes > 4 also imply an alignment of 8-bytes. */
12192 struct minipool_fixup
12196 HOST_WIDE_INT address;
12198 enum machine_mode mode;
12202 HOST_WIDE_INT forwards;
12203 HOST_WIDE_INT backwards;
12206 /* Fixes less than a word need padding out to a word boundary. */
12207 #define MINIPOOL_FIX_SIZE(mode) \
12208 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12210 static Mnode * minipool_vector_head;
12211 static Mnode * minipool_vector_tail;
12212 static rtx minipool_vector_label;
12213 static int minipool_pad;
12215 /* The linked list of all minipool fixes required for this function. */
12216 Mfix * minipool_fix_head;
12217 Mfix * minipool_fix_tail;
12218 /* The fix entry for the current minipool, once it has been placed. */
12219 Mfix * minipool_barrier;
12221 /* Determines if INSN is the start of a jump table. Returns the end
12222 of the TABLE or NULL_RTX. */
12224 is_jump_table (rtx insn)
12228 if (jump_to_label_p (insn)
12229 && ((table = next_real_insn (JUMP_LABEL (insn)))
12230 == next_real_insn (insn))
12232 && GET_CODE (table) == JUMP_INSN
12233 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12234 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12240 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12241 #define JUMP_TABLES_IN_TEXT_SECTION 0
12244 static HOST_WIDE_INT
12245 get_jump_table_size (rtx insn)
12247 /* ADDR_VECs only take room if read-only data does into the text
12249 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12251 rtx body = PATTERN (insn);
12252 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12253 HOST_WIDE_INT size;
12254 HOST_WIDE_INT modesize;
12256 modesize = GET_MODE_SIZE (GET_MODE (body));
12257 size = modesize * XVECLEN (body, elt);
12261 /* Round up size of TBB table to a halfword boundary. */
12262 size = (size + 1) & ~(HOST_WIDE_INT)1;
12265 /* No padding necessary for TBH. */
12268 /* Add two bytes for alignment on Thumb. */
12273 gcc_unreachable ();
12281 /* Return the maximum amount of padding that will be inserted before
12284 static HOST_WIDE_INT
12285 get_label_padding (rtx label)
12287 HOST_WIDE_INT align, min_insn_size;
12289 align = 1 << label_to_alignment (label);
12290 min_insn_size = TARGET_THUMB ? 2 : 4;
12291 return align > min_insn_size ? align - min_insn_size : 0;
12294 /* Move a minipool fix MP from its current location to before MAX_MP.
12295 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12296 constraints may need updating. */
12298 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12299 HOST_WIDE_INT max_address)
12301 /* The code below assumes these are different. */
12302 gcc_assert (mp != max_mp);
12304 if (max_mp == NULL)
12306 if (max_address < mp->max_address)
12307 mp->max_address = max_address;
12311 if (max_address > max_mp->max_address - mp->fix_size)
12312 mp->max_address = max_mp->max_address - mp->fix_size;
12314 mp->max_address = max_address;
12316 /* Unlink MP from its current position. Since max_mp is non-null,
12317 mp->prev must be non-null. */
12318 mp->prev->next = mp->next;
12319 if (mp->next != NULL)
12320 mp->next->prev = mp->prev;
12322 minipool_vector_tail = mp->prev;
12324 /* Re-insert it before MAX_MP. */
12326 mp->prev = max_mp->prev;
12329 if (mp->prev != NULL)
12330 mp->prev->next = mp;
12332 minipool_vector_head = mp;
12335 /* Save the new entry. */
12338 /* Scan over the preceding entries and adjust their addresses as
12340 while (mp->prev != NULL
12341 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12343 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12350 /* Add a constant to the minipool for a forward reference. Returns the
12351 node added or NULL if the constant will not fit in this pool. */
12353 add_minipool_forward_ref (Mfix *fix)
12355 /* If set, max_mp is the first pool_entry that has a lower
12356 constraint than the one we are trying to add. */
12357 Mnode * max_mp = NULL;
12358 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12361 /* If the minipool starts before the end of FIX->INSN then this FIX
12362 can not be placed into the current pool. Furthermore, adding the
12363 new constant pool entry may cause the pool to start FIX_SIZE bytes
12365 if (minipool_vector_head &&
12366 (fix->address + get_attr_length (fix->insn)
12367 >= minipool_vector_head->max_address - fix->fix_size))
12370 /* Scan the pool to see if a constant with the same value has
12371 already been added. While we are doing this, also note the
12372 location where we must insert the constant if it doesn't already
12374 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12376 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12377 && fix->mode == mp->mode
12378 && (GET_CODE (fix->value) != CODE_LABEL
12379 || (CODE_LABEL_NUMBER (fix->value)
12380 == CODE_LABEL_NUMBER (mp->value)))
12381 && rtx_equal_p (fix->value, mp->value))
12383 /* More than one fix references this entry. */
12385 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12388 /* Note the insertion point if necessary. */
12390 && mp->max_address > max_address)
12393 /* If we are inserting an 8-bytes aligned quantity and
12394 we have not already found an insertion point, then
12395 make sure that all such 8-byte aligned quantities are
12396 placed at the start of the pool. */
12397 if (ARM_DOUBLEWORD_ALIGN
12399 && fix->fix_size >= 8
12400 && mp->fix_size < 8)
12403 max_address = mp->max_address;
12407 /* The value is not currently in the minipool, so we need to create
12408 a new entry for it. If MAX_MP is NULL, the entry will be put on
12409 the end of the list since the placement is less constrained than
12410 any existing entry. Otherwise, we insert the new fix before
12411 MAX_MP and, if necessary, adjust the constraints on the other
12414 mp->fix_size = fix->fix_size;
12415 mp->mode = fix->mode;
12416 mp->value = fix->value;
12418 /* Not yet required for a backwards ref. */
12419 mp->min_address = -65536;
12421 if (max_mp == NULL)
12423 mp->max_address = max_address;
12425 mp->prev = minipool_vector_tail;
12427 if (mp->prev == NULL)
12429 minipool_vector_head = mp;
12430 minipool_vector_label = gen_label_rtx ();
12433 mp->prev->next = mp;
12435 minipool_vector_tail = mp;
12439 if (max_address > max_mp->max_address - mp->fix_size)
12440 mp->max_address = max_mp->max_address - mp->fix_size;
12442 mp->max_address = max_address;
12445 mp->prev = max_mp->prev;
12447 if (mp->prev != NULL)
12448 mp->prev->next = mp;
12450 minipool_vector_head = mp;
12453 /* Save the new entry. */
12456 /* Scan over the preceding entries and adjust their addresses as
12458 while (mp->prev != NULL
12459 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12461 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12469 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12470 HOST_WIDE_INT min_address)
12472 HOST_WIDE_INT offset;
12474 /* The code below assumes these are different. */
12475 gcc_assert (mp != min_mp);
12477 if (min_mp == NULL)
12479 if (min_address > mp->min_address)
12480 mp->min_address = min_address;
12484 /* We will adjust this below if it is too loose. */
12485 mp->min_address = min_address;
12487 /* Unlink MP from its current position. Since min_mp is non-null,
12488 mp->next must be non-null. */
12489 mp->next->prev = mp->prev;
12490 if (mp->prev != NULL)
12491 mp->prev->next = mp->next;
12493 minipool_vector_head = mp->next;
12495 /* Reinsert it after MIN_MP. */
12497 mp->next = min_mp->next;
12499 if (mp->next != NULL)
12500 mp->next->prev = mp;
12502 minipool_vector_tail = mp;
12508 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12510 mp->offset = offset;
12511 if (mp->refcount > 0)
12512 offset += mp->fix_size;
12514 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12515 mp->next->min_address = mp->min_address + mp->fix_size;
12521 /* Add a constant to the minipool for a backward reference. Returns the
12522 node added or NULL if the constant will not fit in this pool.
12524 Note that the code for insertion for a backwards reference can be
12525 somewhat confusing because the calculated offsets for each fix do
12526 not take into account the size of the pool (which is still under
12529 add_minipool_backward_ref (Mfix *fix)
12531 /* If set, min_mp is the last pool_entry that has a lower constraint
12532 than the one we are trying to add. */
12533 Mnode *min_mp = NULL;
12534 /* This can be negative, since it is only a constraint. */
12535 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12538 /* If we can't reach the current pool from this insn, or if we can't
12539 insert this entry at the end of the pool without pushing other
12540 fixes out of range, then we don't try. This ensures that we
12541 can't fail later on. */
12542 if (min_address >= minipool_barrier->address
12543 || (minipool_vector_tail->min_address + fix->fix_size
12544 >= minipool_barrier->address))
12547 /* Scan the pool to see if a constant with the same value has
12548 already been added. While we are doing this, also note the
12549 location where we must insert the constant if it doesn't already
12551 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12553 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12554 && fix->mode == mp->mode
12555 && (GET_CODE (fix->value) != CODE_LABEL
12556 || (CODE_LABEL_NUMBER (fix->value)
12557 == CODE_LABEL_NUMBER (mp->value)))
12558 && rtx_equal_p (fix->value, mp->value)
12559 /* Check that there is enough slack to move this entry to the
12560 end of the table (this is conservative). */
12561 && (mp->max_address
12562 > (minipool_barrier->address
12563 + minipool_vector_tail->offset
12564 + minipool_vector_tail->fix_size)))
12567 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12570 if (min_mp != NULL)
12571 mp->min_address += fix->fix_size;
12574 /* Note the insertion point if necessary. */
12575 if (mp->min_address < min_address)
12577 /* For now, we do not allow the insertion of 8-byte alignment
12578 requiring nodes anywhere but at the start of the pool. */
12579 if (ARM_DOUBLEWORD_ALIGN
12580 && fix->fix_size >= 8 && mp->fix_size < 8)
12585 else if (mp->max_address
12586 < minipool_barrier->address + mp->offset + fix->fix_size)
12588 /* Inserting before this entry would push the fix beyond
12589 its maximum address (which can happen if we have
12590 re-located a forwards fix); force the new fix to come
12592 if (ARM_DOUBLEWORD_ALIGN
12593 && fix->fix_size >= 8 && mp->fix_size < 8)
12598 min_address = mp->min_address + fix->fix_size;
12601 /* Do not insert a non-8-byte aligned quantity before 8-byte
12602 aligned quantities. */
12603 else if (ARM_DOUBLEWORD_ALIGN
12604 && fix->fix_size < 8
12605 && mp->fix_size >= 8)
12608 min_address = mp->min_address + fix->fix_size;
12613 /* We need to create a new entry. */
12615 mp->fix_size = fix->fix_size;
12616 mp->mode = fix->mode;
12617 mp->value = fix->value;
12619 mp->max_address = minipool_barrier->address + 65536;
12621 mp->min_address = min_address;
12623 if (min_mp == NULL)
12626 mp->next = minipool_vector_head;
12628 if (mp->next == NULL)
12630 minipool_vector_tail = mp;
12631 minipool_vector_label = gen_label_rtx ();
12634 mp->next->prev = mp;
12636 minipool_vector_head = mp;
12640 mp->next = min_mp->next;
12644 if (mp->next != NULL)
12645 mp->next->prev = mp;
12647 minipool_vector_tail = mp;
12650 /* Save the new entry. */
12658 /* Scan over the following entries and adjust their offsets. */
12659 while (mp->next != NULL)
12661 if (mp->next->min_address < mp->min_address + mp->fix_size)
12662 mp->next->min_address = mp->min_address + mp->fix_size;
12665 mp->next->offset = mp->offset + mp->fix_size;
12667 mp->next->offset = mp->offset;
12676 assign_minipool_offsets (Mfix *barrier)
12678 HOST_WIDE_INT offset = 0;
12681 minipool_barrier = barrier;
12683 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12685 mp->offset = offset;
12687 if (mp->refcount > 0)
12688 offset += mp->fix_size;
12692 /* Output the literal table */
12694 dump_minipool (rtx scan)
12700 if (ARM_DOUBLEWORD_ALIGN)
12701 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12702 if (mp->refcount > 0 && mp->fix_size >= 8)
12709 fprintf (dump_file,
12710 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12711 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12713 scan = emit_label_after (gen_label_rtx (), scan);
12714 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12715 scan = emit_label_after (minipool_vector_label, scan);
12717 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12719 if (mp->refcount > 0)
12723 fprintf (dump_file,
12724 ";; Offset %u, min %ld, max %ld ",
12725 (unsigned) mp->offset, (unsigned long) mp->min_address,
12726 (unsigned long) mp->max_address);
12727 arm_print_value (dump_file, mp->value);
12728 fputc ('\n', dump_file);
12731 switch (mp->fix_size)
12733 #ifdef HAVE_consttable_1
12735 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12739 #ifdef HAVE_consttable_2
12741 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12745 #ifdef HAVE_consttable_4
12747 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12751 #ifdef HAVE_consttable_8
12753 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12757 #ifdef HAVE_consttable_16
12759 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12764 gcc_unreachable ();
12772 minipool_vector_head = minipool_vector_tail = NULL;
12773 scan = emit_insn_after (gen_consttable_end (), scan);
12774 scan = emit_barrier_after (scan);
12777 /* Return the cost of forcibly inserting a barrier after INSN. */
12779 arm_barrier_cost (rtx insn)
12781 /* Basing the location of the pool on the loop depth is preferable,
12782 but at the moment, the basic block information seems to be
12783 corrupt by this stage of the compilation. */
12784 int base_cost = 50;
12785 rtx next = next_nonnote_insn (insn);
12787 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12790 switch (GET_CODE (insn))
12793 /* It will always be better to place the table before the label, rather
12802 return base_cost - 10;
12805 return base_cost + 10;
12809 /* Find the best place in the insn stream in the range
12810 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12811 Create the barrier by inserting a jump and add a new fix entry for
12814 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12816 HOST_WIDE_INT count = 0;
12818 rtx from = fix->insn;
12819 /* The instruction after which we will insert the jump. */
12820 rtx selected = NULL;
12822 /* The address at which the jump instruction will be placed. */
12823 HOST_WIDE_INT selected_address;
12825 HOST_WIDE_INT max_count = max_address - fix->address;
12826 rtx label = gen_label_rtx ();
12828 selected_cost = arm_barrier_cost (from);
12829 selected_address = fix->address;
12831 while (from && count < max_count)
12836 /* This code shouldn't have been called if there was a natural barrier
12838 gcc_assert (GET_CODE (from) != BARRIER);
12840 /* Count the length of this insn. This must stay in sync with the
12841 code that pushes minipool fixes. */
12842 if (LABEL_P (from))
12843 count += get_label_padding (from);
12845 count += get_attr_length (from);
12847 /* If there is a jump table, add its length. */
12848 tmp = is_jump_table (from);
12851 count += get_jump_table_size (tmp);
12853 /* Jump tables aren't in a basic block, so base the cost on
12854 the dispatch insn. If we select this location, we will
12855 still put the pool after the table. */
12856 new_cost = arm_barrier_cost (from);
12858 if (count < max_count
12859 && (!selected || new_cost <= selected_cost))
12862 selected_cost = new_cost;
12863 selected_address = fix->address + count;
12866 /* Continue after the dispatch table. */
12867 from = NEXT_INSN (tmp);
12871 new_cost = arm_barrier_cost (from);
12873 if (count < max_count
12874 && (!selected || new_cost <= selected_cost))
12877 selected_cost = new_cost;
12878 selected_address = fix->address + count;
12881 from = NEXT_INSN (from);
12884 /* Make sure that we found a place to insert the jump. */
12885 gcc_assert (selected);
12887 /* Make sure we do not split a call and its corresponding
12888 CALL_ARG_LOCATION note. */
12889 if (CALL_P (selected))
12891 rtx next = NEXT_INSN (selected);
12892 if (next && NOTE_P (next)
12893 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12897 /* Create a new JUMP_INSN that branches around a barrier. */
12898 from = emit_jump_insn_after (gen_jump (label), selected);
12899 JUMP_LABEL (from) = label;
12900 barrier = emit_barrier_after (from);
12901 emit_label_after (label, barrier);
12903 /* Create a minipool barrier entry for the new barrier. */
12904 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12905 new_fix->insn = barrier;
12906 new_fix->address = selected_address;
12907 new_fix->next = fix->next;
12908 fix->next = new_fix;
12913 /* Record that there is a natural barrier in the insn stream at
12916 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12918 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12921 fix->address = address;
12924 if (minipool_fix_head != NULL)
12925 minipool_fix_tail->next = fix;
12927 minipool_fix_head = fix;
12929 minipool_fix_tail = fix;
12932 /* Record INSN, which will need fixing up to load a value from the
12933 minipool. ADDRESS is the offset of the insn since the start of the
12934 function; LOC is a pointer to the part of the insn which requires
12935 fixing; VALUE is the constant that must be loaded, which is of type
12938 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12939 enum machine_mode mode, rtx value)
12941 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12944 fix->address = address;
12947 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12948 fix->value = value;
12949 fix->forwards = get_attr_pool_range (insn);
12950 fix->backwards = get_attr_neg_pool_range (insn);
12951 fix->minipool = NULL;
12953 /* If an insn doesn't have a range defined for it, then it isn't
12954 expecting to be reworked by this code. Better to stop now than
12955 to generate duff assembly code. */
12956 gcc_assert (fix->forwards || fix->backwards);
12958 /* If an entry requires 8-byte alignment then assume all constant pools
12959 require 4 bytes of padding. Trying to do this later on a per-pool
12960 basis is awkward because existing pool entries have to be modified. */
12961 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12966 fprintf (dump_file,
12967 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12968 GET_MODE_NAME (mode),
12969 INSN_UID (insn), (unsigned long) address,
12970 -1 * (long)fix->backwards, (long)fix->forwards);
12971 arm_print_value (dump_file, fix->value);
12972 fprintf (dump_file, "\n");
12975 /* Add it to the chain of fixes. */
12978 if (minipool_fix_head != NULL)
12979 minipool_fix_tail->next = fix;
12981 minipool_fix_head = fix;
12983 minipool_fix_tail = fix;
12986 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12987 Returns the number of insns needed, or 99 if we don't know how to
12990 arm_const_double_inline_cost (rtx val)
12992 rtx lowpart, highpart;
12993 enum machine_mode mode;
12995 mode = GET_MODE (val);
12997 if (mode == VOIDmode)
13000 gcc_assert (GET_MODE_SIZE (mode) == 8);
13002 lowpart = gen_lowpart (SImode, val);
13003 highpart = gen_highpart_mode (SImode, mode, val);
13005 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13006 gcc_assert (GET_CODE (highpart) == CONST_INT);
13008 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13009 NULL_RTX, NULL_RTX, 0, 0)
13010 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13011 NULL_RTX, NULL_RTX, 0, 0));
13014 /* Return true if it is worthwhile to split a 64-bit constant into two
13015 32-bit operations. This is the case if optimizing for size, or
13016 if we have load delay slots, or if one 32-bit part can be done with
13017 a single data operation. */
13019 arm_const_double_by_parts (rtx val)
13021 enum machine_mode mode = GET_MODE (val);
13024 if (optimize_size || arm_ld_sched)
13027 if (mode == VOIDmode)
13030 part = gen_highpart_mode (SImode, mode, val);
13032 gcc_assert (GET_CODE (part) == CONST_INT);
13034 if (const_ok_for_arm (INTVAL (part))
13035 || const_ok_for_arm (~INTVAL (part)))
13038 part = gen_lowpart (SImode, val);
13040 gcc_assert (GET_CODE (part) == CONST_INT);
13042 if (const_ok_for_arm (INTVAL (part))
13043 || const_ok_for_arm (~INTVAL (part)))
13049 /* Return true if it is possible to inline both the high and low parts
13050 of a 64-bit constant into 32-bit data processing instructions. */
13052 arm_const_double_by_immediates (rtx val)
13054 enum machine_mode mode = GET_MODE (val);
13057 if (mode == VOIDmode)
13060 part = gen_highpart_mode (SImode, mode, val);
13062 gcc_assert (GET_CODE (part) == CONST_INT);
13064 if (!const_ok_for_arm (INTVAL (part)))
13067 part = gen_lowpart (SImode, val);
13069 gcc_assert (GET_CODE (part) == CONST_INT);
13071 if (!const_ok_for_arm (INTVAL (part)))
13077 /* Scan INSN and note any of its operands that need fixing.
13078 If DO_PUSHES is false we do not actually push any of the fixups
13079 needed. The function returns TRUE if any fixups were needed/pushed.
13080 This is used by arm_memory_load_p() which needs to know about loads
13081 of constants that will be converted into minipool loads. */
13083 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13085 bool result = false;
13088 extract_insn (insn);
13090 if (!constrain_operands (1))
13091 fatal_insn_not_found (insn);
13093 if (recog_data.n_alternatives == 0)
13096 /* Fill in recog_op_alt with information about the constraints of
13098 preprocess_constraints ();
13100 for (opno = 0; opno < recog_data.n_operands; opno++)
13102 /* Things we need to fix can only occur in inputs. */
13103 if (recog_data.operand_type[opno] != OP_IN)
13106 /* If this alternative is a memory reference, then any mention
13107 of constants in this alternative is really to fool reload
13108 into allowing us to accept one there. We need to fix them up
13109 now so that we output the right code. */
13110 if (recog_op_alt[opno][which_alternative].memory_ok)
13112 rtx op = recog_data.operand[opno];
13114 if (CONSTANT_P (op))
13117 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13118 recog_data.operand_mode[opno], op);
13121 else if (GET_CODE (op) == MEM
13122 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13123 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13127 rtx cop = avoid_constant_pool_reference (op);
13129 /* Casting the address of something to a mode narrower
13130 than a word can cause avoid_constant_pool_reference()
13131 to return the pool reference itself. That's no good to
13132 us here. Lets just hope that we can use the
13133 constant pool value directly. */
13135 cop = get_pool_constant (XEXP (op, 0));
13137 push_minipool_fix (insn, address,
13138 recog_data.operand_loc[opno],
13139 recog_data.operand_mode[opno], cop);
13150 /* Convert instructions to their cc-clobbering variant if possible, since
13151 that allows us to use smaller encodings. */
13154 thumb2_reorg (void)
13159 INIT_REG_SET (&live);
13161 /* We are freeing block_for_insn in the toplev to keep compatibility
13162 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13163 compute_bb_for_insn ();
13170 COPY_REG_SET (&live, DF_LR_OUT (bb));
13171 df_simulate_initialize_backwards (bb, &live);
13172 FOR_BB_INSNS_REVERSE (bb, insn)
13174 if (NONJUMP_INSN_P (insn)
13175 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13177 rtx pat = PATTERN (insn);
13178 if (GET_CODE (pat) == SET
13179 && low_register_operand (XEXP (pat, 0), SImode)
13180 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13181 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13182 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13184 rtx dst = XEXP (pat, 0);
13185 rtx src = XEXP (pat, 1);
13186 rtx op0 = XEXP (src, 0);
13187 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13188 ? XEXP (src, 1) : NULL);
13190 if (rtx_equal_p (dst, op0)
13191 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13193 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13194 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13195 rtvec vec = gen_rtvec (2, pat, clobber);
13197 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13198 INSN_CODE (insn) = -1;
13200 /* We can also handle a commutative operation where the
13201 second operand matches the destination. */
13202 else if (op1 && rtx_equal_p (dst, op1))
13204 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13205 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13208 src = copy_rtx (src);
13209 XEXP (src, 0) = op1;
13210 XEXP (src, 1) = op0;
13211 pat = gen_rtx_SET (VOIDmode, dst, src);
13212 vec = gen_rtvec (2, pat, clobber);
13213 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13214 INSN_CODE (insn) = -1;
13219 if (NONDEBUG_INSN_P (insn))
13220 df_simulate_one_insn_backwards (bb, insn, &live);
13224 CLEAR_REG_SET (&live);
13227 /* Gcc puts the pool in the wrong place for ARM, since we can only
13228 load addresses a limited distance around the pc. We do some
13229 special munging to move the constant pool values to the correct
13230 point in the code. */
13235 HOST_WIDE_INT address = 0;
13241 minipool_fix_head = minipool_fix_tail = NULL;
13243 /* The first insn must always be a note, or the code below won't
13244 scan it properly. */
13245 insn = get_insns ();
13246 gcc_assert (GET_CODE (insn) == NOTE);
13249 /* Scan all the insns and record the operands that will need fixing. */
13250 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13252 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13253 && (arm_cirrus_insn_p (insn)
13254 || GET_CODE (insn) == JUMP_INSN
13255 || arm_memory_load_p (insn)))
13256 cirrus_reorg (insn);
13258 if (GET_CODE (insn) == BARRIER)
13259 push_minipool_barrier (insn, address);
13260 else if (INSN_P (insn))
13264 note_invalid_constants (insn, address, true);
13265 address += get_attr_length (insn);
13267 /* If the insn is a vector jump, add the size of the table
13268 and skip the table. */
13269 if ((table = is_jump_table (insn)) != NULL)
13271 address += get_jump_table_size (table);
13275 else if (LABEL_P (insn))
13276 /* Add the worst-case padding due to alignment. We don't add
13277 the _current_ padding because the minipool insertions
13278 themselves might change it. */
13279 address += get_label_padding (insn);
13282 fix = minipool_fix_head;
13284 /* Now scan the fixups and perform the required changes. */
13289 Mfix * last_added_fix;
13290 Mfix * last_barrier = NULL;
13293 /* Skip any further barriers before the next fix. */
13294 while (fix && GET_CODE (fix->insn) == BARRIER)
13297 /* No more fixes. */
13301 last_added_fix = NULL;
13303 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13305 if (GET_CODE (ftmp->insn) == BARRIER)
13307 if (ftmp->address >= minipool_vector_head->max_address)
13310 last_barrier = ftmp;
13312 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13315 last_added_fix = ftmp; /* Keep track of the last fix added. */
13318 /* If we found a barrier, drop back to that; any fixes that we
13319 could have reached but come after the barrier will now go in
13320 the next mini-pool. */
13321 if (last_barrier != NULL)
13323 /* Reduce the refcount for those fixes that won't go into this
13325 for (fdel = last_barrier->next;
13326 fdel && fdel != ftmp;
13329 fdel->minipool->refcount--;
13330 fdel->minipool = NULL;
13333 ftmp = last_barrier;
13337 /* ftmp is first fix that we can't fit into this pool and
13338 there no natural barriers that we could use. Insert a
13339 new barrier in the code somewhere between the previous
13340 fix and this one, and arrange to jump around it. */
13341 HOST_WIDE_INT max_address;
13343 /* The last item on the list of fixes must be a barrier, so
13344 we can never run off the end of the list of fixes without
13345 last_barrier being set. */
13348 max_address = minipool_vector_head->max_address;
13349 /* Check that there isn't another fix that is in range that
13350 we couldn't fit into this pool because the pool was
13351 already too large: we need to put the pool before such an
13352 instruction. The pool itself may come just after the
13353 fix because create_fix_barrier also allows space for a
13354 jump instruction. */
13355 if (ftmp->address < max_address)
13356 max_address = ftmp->address + 1;
13358 last_barrier = create_fix_barrier (last_added_fix, max_address);
13361 assign_minipool_offsets (last_barrier);
13365 if (GET_CODE (ftmp->insn) != BARRIER
13366 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13373 /* Scan over the fixes we have identified for this pool, fixing them
13374 up and adding the constants to the pool itself. */
13375 for (this_fix = fix; this_fix && ftmp != this_fix;
13376 this_fix = this_fix->next)
13377 if (GET_CODE (this_fix->insn) != BARRIER)
13380 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13381 minipool_vector_label),
13382 this_fix->minipool->offset);
13383 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13386 dump_minipool (last_barrier->insn);
13390 /* From now on we must synthesize any constants that we can't handle
13391 directly. This can happen if the RTL gets split during final
13392 instruction generation. */
13393 after_arm_reorg = 1;
13395 /* Free the minipool memory. */
13396 obstack_free (&minipool_obstack, minipool_startobj);
13399 /* Routines to output assembly language. */
13401 /* If the rtx is the correct value then return the string of the number.
13402 In this way we can ensure that valid double constants are generated even
13403 when cross compiling. */
13405 fp_immediate_constant (rtx x)
13410 if (!fp_consts_inited)
13413 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13414 for (i = 0; i < 8; i++)
13415 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13416 return strings_fp[i];
13418 gcc_unreachable ();
13421 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13422 static const char *
13423 fp_const_from_val (REAL_VALUE_TYPE *r)
13427 if (!fp_consts_inited)
13430 for (i = 0; i < 8; i++)
13431 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13432 return strings_fp[i];
13434 gcc_unreachable ();
13437 /* Output the operands of a LDM/STM instruction to STREAM.
13438 MASK is the ARM register set mask of which only bits 0-15 are important.
13439 REG is the base register, either the frame pointer or the stack pointer,
13440 INSTR is the possibly suffixed load or store instruction.
13441 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13444 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13445 unsigned long mask, int rfe)
13448 bool not_first = FALSE;
13450 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13451 fputc ('\t', stream);
13452 asm_fprintf (stream, instr, reg);
13453 fputc ('{', stream);
13455 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13456 if (mask & (1 << i))
13459 fprintf (stream, ", ");
13461 asm_fprintf (stream, "%r", i);
13466 fprintf (stream, "}^\n");
13468 fprintf (stream, "}\n");
13472 /* Output a FLDMD instruction to STREAM.
13473 BASE if the register containing the address.
13474 REG and COUNT specify the register range.
13475 Extra registers may be added to avoid hardware bugs.
13477 We output FLDMD even for ARMv5 VFP implementations. Although
13478 FLDMD is technically not supported until ARMv6, it is believed
13479 that all VFP implementations support its use in this context. */
13482 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13486 /* Workaround ARM10 VFPr1 bug. */
13487 if (count == 2 && !arm_arch6)
13494 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13495 load into multiple parts if we have to handle more than 16 registers. */
13498 vfp_output_fldmd (stream, base, reg, 16);
13499 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13503 fputc ('\t', stream);
13504 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13506 for (i = reg; i < reg + count; i++)
13509 fputs (", ", stream);
13510 asm_fprintf (stream, "d%d", i);
13512 fputs ("}\n", stream);
13517 /* Output the assembly for a store multiple. */
13520 vfp_output_fstmd (rtx * operands)
13527 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13528 p = strlen (pattern);
13530 gcc_assert (GET_CODE (operands[1]) == REG);
13532 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13533 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13535 p += sprintf (&pattern[p], ", d%d", base + i);
13537 strcpy (&pattern[p], "}");
13539 output_asm_insn (pattern, operands);
13544 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13545 number of bytes pushed. */
13548 vfp_emit_fstmd (int base_reg, int count)
13555 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13556 register pairs are stored by a store multiple insn. We avoid this
13557 by pushing an extra pair. */
13558 if (count == 2 && !arm_arch6)
13560 if (base_reg == LAST_VFP_REGNUM - 3)
13565 /* FSTMD may not store more than 16 doubleword registers at once. Split
13566 larger stores into multiple parts (up to a maximum of two, in
13571 /* NOTE: base_reg is an internal register number, so each D register
13573 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13574 saved += vfp_emit_fstmd (base_reg, 16);
13578 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13579 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13581 reg = gen_rtx_REG (DFmode, base_reg);
13584 XVECEXP (par, 0, 0)
13585 = gen_rtx_SET (VOIDmode,
13588 gen_rtx_PRE_MODIFY (Pmode,
13591 (stack_pointer_rtx,
13594 gen_rtx_UNSPEC (BLKmode,
13595 gen_rtvec (1, reg),
13596 UNSPEC_PUSH_MULT));
13598 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13599 plus_constant (stack_pointer_rtx, -(count * 8)));
13600 RTX_FRAME_RELATED_P (tmp) = 1;
13601 XVECEXP (dwarf, 0, 0) = tmp;
13603 tmp = gen_rtx_SET (VOIDmode,
13604 gen_frame_mem (DFmode, stack_pointer_rtx),
13606 RTX_FRAME_RELATED_P (tmp) = 1;
13607 XVECEXP (dwarf, 0, 1) = tmp;
13609 for (i = 1; i < count; i++)
13611 reg = gen_rtx_REG (DFmode, base_reg);
13613 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13615 tmp = gen_rtx_SET (VOIDmode,
13616 gen_frame_mem (DFmode,
13617 plus_constant (stack_pointer_rtx,
13620 RTX_FRAME_RELATED_P (tmp) = 1;
13621 XVECEXP (dwarf, 0, i + 1) = tmp;
13624 par = emit_insn (par);
13625 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13626 RTX_FRAME_RELATED_P (par) = 1;
13631 /* Emit a call instruction with pattern PAT. ADDR is the address of
13632 the call target. */
13635 arm_emit_call_insn (rtx pat, rtx addr)
13639 insn = emit_call_insn (pat);
13641 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13642 If the call might use such an entry, add a use of the PIC register
13643 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13644 if (TARGET_VXWORKS_RTP
13646 && GET_CODE (addr) == SYMBOL_REF
13647 && (SYMBOL_REF_DECL (addr)
13648 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13649 : !SYMBOL_REF_LOCAL_P (addr)))
13651 require_pic_register ();
13652 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13656 /* Output a 'call' insn. */
13658 output_call (rtx *operands)
13660 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13662 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13663 if (REGNO (operands[0]) == LR_REGNUM)
13665 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13666 output_asm_insn ("mov%?\t%0, %|lr", operands);
13669 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13671 if (TARGET_INTERWORK || arm_arch4t)
13672 output_asm_insn ("bx%?\t%0", operands);
13674 output_asm_insn ("mov%?\t%|pc, %0", operands);
13679 /* Output a 'call' insn that is a reference in memory. This is
13680 disabled for ARMv5 and we prefer a blx instead because otherwise
13681 there's a significant performance overhead. */
13683 output_call_mem (rtx *operands)
13685 gcc_assert (!arm_arch5);
13686 if (TARGET_INTERWORK)
13688 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13689 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13690 output_asm_insn ("bx%?\t%|ip", operands);
13692 else if (regno_use_in (LR_REGNUM, operands[0]))
13694 /* LR is used in the memory address. We load the address in the
13695 first instruction. It's safe to use IP as the target of the
13696 load since the call will kill it anyway. */
13697 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13698 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13700 output_asm_insn ("bx%?\t%|ip", operands);
13702 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13706 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13707 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13714 /* Output a move from arm registers to an fpa registers.
13715 OPERANDS[0] is an fpa register.
13716 OPERANDS[1] is the first registers of an arm register pair. */
13718 output_mov_long_double_fpa_from_arm (rtx *operands)
13720 int arm_reg0 = REGNO (operands[1]);
13723 gcc_assert (arm_reg0 != IP_REGNUM);
13725 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13726 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13727 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13729 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13730 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13735 /* Output a move from an fpa register to arm registers.
13736 OPERANDS[0] is the first registers of an arm register pair.
13737 OPERANDS[1] is an fpa register. */
13739 output_mov_long_double_arm_from_fpa (rtx *operands)
13741 int arm_reg0 = REGNO (operands[0]);
13744 gcc_assert (arm_reg0 != IP_REGNUM);
13746 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13747 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13748 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13750 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13751 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13755 /* Output a move from arm registers to arm registers of a long double
13756 OPERANDS[0] is the destination.
13757 OPERANDS[1] is the source. */
13759 output_mov_long_double_arm_from_arm (rtx *operands)
13761 /* We have to be careful here because the two might overlap. */
13762 int dest_start = REGNO (operands[0]);
13763 int src_start = REGNO (operands[1]);
13767 if (dest_start < src_start)
13769 for (i = 0; i < 3; i++)
13771 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13772 ops[1] = gen_rtx_REG (SImode, src_start + i);
13773 output_asm_insn ("mov%?\t%0, %1", ops);
13778 for (i = 2; i >= 0; i--)
13780 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13781 ops[1] = gen_rtx_REG (SImode, src_start + i);
13782 output_asm_insn ("mov%?\t%0, %1", ops);
13790 arm_emit_movpair (rtx dest, rtx src)
13792 /* If the src is an immediate, simplify it. */
13793 if (CONST_INT_P (src))
13795 HOST_WIDE_INT val = INTVAL (src);
13796 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13797 if ((val >> 16) & 0x0000ffff)
13798 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13800 GEN_INT ((val >> 16) & 0x0000ffff));
13803 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13804 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13807 /* Output a move from arm registers to an fpa registers.
13808 OPERANDS[0] is an fpa register.
13809 OPERANDS[1] is the first registers of an arm register pair. */
13811 output_mov_double_fpa_from_arm (rtx *operands)
13813 int arm_reg0 = REGNO (operands[1]);
13816 gcc_assert (arm_reg0 != IP_REGNUM);
13818 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13819 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13820 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13821 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13825 /* Output a move from an fpa register to arm registers.
13826 OPERANDS[0] is the first registers of an arm register pair.
13827 OPERANDS[1] is an fpa register. */
13829 output_mov_double_arm_from_fpa (rtx *operands)
13831 int arm_reg0 = REGNO (operands[0]);
13834 gcc_assert (arm_reg0 != IP_REGNUM);
13836 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13837 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13838 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13839 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13843 /* Output a move between double words. It must be REG<-MEM
13846 output_move_double (rtx *operands, bool emit, int *count)
13848 enum rtx_code code0 = GET_CODE (operands[0]);
13849 enum rtx_code code1 = GET_CODE (operands[1]);
13854 /* The only case when this might happen is when
13855 you are looking at the length of a DImode instruction
13856 that has an invalid constant in it. */
13857 if (code0 == REG && code1 != MEM)
13859 gcc_assert (!emit);
13866 unsigned int reg0 = REGNO (operands[0]);
13868 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13870 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13872 switch (GET_CODE (XEXP (operands[1], 0)))
13879 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13880 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13882 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13887 gcc_assert (TARGET_LDRD);
13889 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13896 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13898 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13906 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13908 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13913 gcc_assert (TARGET_LDRD);
13915 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13920 /* Autoicrement addressing modes should never have overlapping
13921 base and destination registers, and overlapping index registers
13922 are already prohibited, so this doesn't need to worry about
13924 otherops[0] = operands[0];
13925 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13926 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13928 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13930 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13932 /* Registers overlap so split out the increment. */
13935 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13936 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13943 /* Use a single insn if we can.
13944 FIXME: IWMMXT allows offsets larger than ldrd can
13945 handle, fix these up with a pair of ldr. */
13947 || GET_CODE (otherops[2]) != CONST_INT
13948 || (INTVAL (otherops[2]) > -256
13949 && INTVAL (otherops[2]) < 256))
13952 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13958 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13959 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13969 /* Use a single insn if we can.
13970 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13971 fix these up with a pair of ldr. */
13973 || GET_CODE (otherops[2]) != CONST_INT
13974 || (INTVAL (otherops[2]) > -256
13975 && INTVAL (otherops[2]) < 256))
13978 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13984 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13985 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13995 /* We might be able to use ldrd %0, %1 here. However the range is
13996 different to ldr/adr, and it is broken on some ARMv7-M
13997 implementations. */
13998 /* Use the second register of the pair to avoid problematic
14000 otherops[1] = operands[1];
14002 output_asm_insn ("adr%?\t%0, %1", otherops);
14003 operands[1] = otherops[0];
14007 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14009 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14016 /* ??? This needs checking for thumb2. */
14018 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14019 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14021 otherops[0] = operands[0];
14022 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14023 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14025 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14027 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14029 switch ((int) INTVAL (otherops[2]))
14033 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14039 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14045 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14049 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14050 operands[1] = otherops[0];
14052 && (GET_CODE (otherops[2]) == REG
14054 || (GET_CODE (otherops[2]) == CONST_INT
14055 && INTVAL (otherops[2]) > -256
14056 && INTVAL (otherops[2]) < 256)))
14058 if (reg_overlap_mentioned_p (operands[0],
14062 /* Swap base and index registers over to
14063 avoid a conflict. */
14065 otherops[1] = otherops[2];
14068 /* If both registers conflict, it will usually
14069 have been fixed by a splitter. */
14070 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14071 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14075 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14076 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14083 otherops[0] = operands[0];
14085 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14090 if (GET_CODE (otherops[2]) == CONST_INT)
14094 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14095 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14097 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14103 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14109 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14113 return "ldr%(d%)\t%0, [%1]";
14115 return "ldm%(ia%)\t%1, %M0";
14119 otherops[1] = adjust_address (operands[1], SImode, 4);
14120 /* Take care of overlapping base/data reg. */
14121 if (reg_mentioned_p (operands[0], operands[1]))
14125 output_asm_insn ("ldr%?\t%0, %1", otherops);
14126 output_asm_insn ("ldr%?\t%0, %1", operands);
14136 output_asm_insn ("ldr%?\t%0, %1", operands);
14137 output_asm_insn ("ldr%?\t%0, %1", otherops);
14147 /* Constraints should ensure this. */
14148 gcc_assert (code0 == MEM && code1 == REG);
14149 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14151 switch (GET_CODE (XEXP (operands[0], 0)))
14157 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14159 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14164 gcc_assert (TARGET_LDRD);
14166 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14173 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14175 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14183 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14185 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14190 gcc_assert (TARGET_LDRD);
14192 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14197 otherops[0] = operands[1];
14198 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14199 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14201 /* IWMMXT allows offsets larger than ldrd can handle,
14202 fix these up with a pair of ldr. */
14204 && GET_CODE (otherops[2]) == CONST_INT
14205 && (INTVAL(otherops[2]) <= -256
14206 || INTVAL(otherops[2]) >= 256))
14208 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14212 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14213 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14222 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14223 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14229 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14232 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14237 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14242 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14243 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14245 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14249 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14256 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14263 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14268 && (GET_CODE (otherops[2]) == REG
14270 || (GET_CODE (otherops[2]) == CONST_INT
14271 && INTVAL (otherops[2]) > -256
14272 && INTVAL (otherops[2]) < 256)))
14274 otherops[0] = operands[1];
14275 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14277 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14283 otherops[0] = adjust_address (operands[0], SImode, 4);
14284 otherops[1] = operands[1];
14287 output_asm_insn ("str%?\t%1, %0", operands);
14288 output_asm_insn ("str%?\t%H1, %0", otherops);
14298 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14299 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14302 output_move_quad (rtx *operands)
14304 if (REG_P (operands[0]))
14306 /* Load, or reg->reg move. */
14308 if (MEM_P (operands[1]))
14310 switch (GET_CODE (XEXP (operands[1], 0)))
14313 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14318 output_asm_insn ("adr%?\t%0, %1", operands);
14319 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14323 gcc_unreachable ();
14331 gcc_assert (REG_P (operands[1]));
14333 dest = REGNO (operands[0]);
14334 src = REGNO (operands[1]);
14336 /* This seems pretty dumb, but hopefully GCC won't try to do it
14339 for (i = 0; i < 4; i++)
14341 ops[0] = gen_rtx_REG (SImode, dest + i);
14342 ops[1] = gen_rtx_REG (SImode, src + i);
14343 output_asm_insn ("mov%?\t%0, %1", ops);
14346 for (i = 3; i >= 0; i--)
14348 ops[0] = gen_rtx_REG (SImode, dest + i);
14349 ops[1] = gen_rtx_REG (SImode, src + i);
14350 output_asm_insn ("mov%?\t%0, %1", ops);
14356 gcc_assert (MEM_P (operands[0]));
14357 gcc_assert (REG_P (operands[1]));
14358 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14360 switch (GET_CODE (XEXP (operands[0], 0)))
14363 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14367 gcc_unreachable ();
14374 /* Output a VFP load or store instruction. */
14377 output_move_vfp (rtx *operands)
14379 rtx reg, mem, addr, ops[2];
14380 int load = REG_P (operands[0]);
14381 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14382 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14385 enum machine_mode mode;
14387 reg = operands[!load];
14388 mem = operands[load];
14390 mode = GET_MODE (reg);
14392 gcc_assert (REG_P (reg));
14393 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14394 gcc_assert (mode == SFmode
14398 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14399 gcc_assert (MEM_P (mem));
14401 addr = XEXP (mem, 0);
14403 switch (GET_CODE (addr))
14406 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14407 ops[0] = XEXP (addr, 0);
14412 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14413 ops[0] = XEXP (addr, 0);
14418 templ = "f%s%c%%?\t%%%s0, %%1%s";
14424 sprintf (buff, templ,
14425 load ? "ld" : "st",
14428 integer_p ? "\t%@ int" : "");
14429 output_asm_insn (buff, ops);
14434 /* Output a Neon quad-word load or store, or a load or store for
14435 larger structure modes.
14437 WARNING: The ordering of elements is weird in big-endian mode,
14438 because we use VSTM, as required by the EABI. GCC RTL defines
14439 element ordering based on in-memory order. This can be differ
14440 from the architectural ordering of elements within a NEON register.
14441 The intrinsics defined in arm_neon.h use the NEON register element
14442 ordering, not the GCC RTL element ordering.
14444 For example, the in-memory ordering of a big-endian a quadword
14445 vector with 16-bit elements when stored from register pair {d0,d1}
14446 will be (lowest address first, d0[N] is NEON register element N):
14448 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14450 When necessary, quadword registers (dN, dN+1) are moved to ARM
14451 registers from rN in the order:
14453 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14455 So that STM/LDM can be used on vectors in ARM registers, and the
14456 same memory layout will result as if VSTM/VLDM were used. */
14459 output_move_neon (rtx *operands)
14461 rtx reg, mem, addr, ops[2];
14462 int regno, load = REG_P (operands[0]);
14465 enum machine_mode mode;
14467 reg = operands[!load];
14468 mem = operands[load];
14470 mode = GET_MODE (reg);
14472 gcc_assert (REG_P (reg));
14473 regno = REGNO (reg);
14474 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14475 || NEON_REGNO_OK_FOR_QUAD (regno));
14476 gcc_assert (VALID_NEON_DREG_MODE (mode)
14477 || VALID_NEON_QREG_MODE (mode)
14478 || VALID_NEON_STRUCT_MODE (mode));
14479 gcc_assert (MEM_P (mem));
14481 addr = XEXP (mem, 0);
14483 /* Strip off const from addresses like (const (plus (...))). */
14484 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14485 addr = XEXP (addr, 0);
14487 switch (GET_CODE (addr))
14490 templ = "v%smia%%?\t%%0!, %%h1";
14491 ops[0] = XEXP (addr, 0);
14496 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14497 templ = "v%smdb%%?\t%%0!, %%h1";
14498 ops[0] = XEXP (addr, 0);
14503 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14504 gcc_unreachable ();
14509 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14512 for (i = 0; i < nregs; i++)
14514 /* We're only using DImode here because it's a convenient size. */
14515 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14516 ops[1] = adjust_address (mem, DImode, 8 * i);
14517 if (reg_overlap_mentioned_p (ops[0], mem))
14519 gcc_assert (overlap == -1);
14524 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14525 output_asm_insn (buff, ops);
14530 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14531 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14532 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14533 output_asm_insn (buff, ops);
14540 templ = "v%smia%%?\t%%m0, %%h1";
14545 sprintf (buff, templ, load ? "ld" : "st");
14546 output_asm_insn (buff, ops);
14551 /* Compute and return the length of neon_mov<mode>, where <mode> is
14552 one of VSTRUCT modes: EI, OI, CI or XI. */
14554 arm_attr_length_move_neon (rtx insn)
14556 rtx reg, mem, addr;
14558 enum machine_mode mode;
14560 extract_insn_cached (insn);
14562 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14564 mode = GET_MODE (recog_data.operand[0]);
14575 gcc_unreachable ();
14579 load = REG_P (recog_data.operand[0]);
14580 reg = recog_data.operand[!load];
14581 mem = recog_data.operand[load];
14583 gcc_assert (MEM_P (mem));
14585 mode = GET_MODE (reg);
14586 addr = XEXP (mem, 0);
14588 /* Strip off const from addresses like (const (plus (...))). */
14589 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14590 addr = XEXP (addr, 0);
14592 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14594 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14601 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14605 arm_address_offset_is_imm (rtx insn)
14609 extract_insn_cached (insn);
14611 if (REG_P (recog_data.operand[0]))
14614 mem = recog_data.operand[0];
14616 gcc_assert (MEM_P (mem));
14618 addr = XEXP (mem, 0);
14620 if (GET_CODE (addr) == REG
14621 || (GET_CODE (addr) == PLUS
14622 && GET_CODE (XEXP (addr, 0)) == REG
14623 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14629 /* Output an ADD r, s, #n where n may be too big for one instruction.
14630 If adding zero to one register, output nothing. */
14632 output_add_immediate (rtx *operands)
14634 HOST_WIDE_INT n = INTVAL (operands[2]);
14636 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14639 output_multi_immediate (operands,
14640 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14643 output_multi_immediate (operands,
14644 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14651 /* Output a multiple immediate operation.
14652 OPERANDS is the vector of operands referred to in the output patterns.
14653 INSTR1 is the output pattern to use for the first constant.
14654 INSTR2 is the output pattern to use for subsequent constants.
14655 IMMED_OP is the index of the constant slot in OPERANDS.
14656 N is the constant value. */
14657 static const char *
14658 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14659 int immed_op, HOST_WIDE_INT n)
14661 #if HOST_BITS_PER_WIDE_INT > 32
14667 /* Quick and easy output. */
14668 operands[immed_op] = const0_rtx;
14669 output_asm_insn (instr1, operands);
14674 const char * instr = instr1;
14676 /* Note that n is never zero here (which would give no output). */
14677 for (i = 0; i < 32; i += 2)
14681 operands[immed_op] = GEN_INT (n & (255 << i));
14682 output_asm_insn (instr, operands);
14692 /* Return the name of a shifter operation. */
14693 static const char *
14694 arm_shift_nmem(enum rtx_code code)
14699 return ARM_LSL_NAME;
14715 /* Return the appropriate ARM instruction for the operation code.
14716 The returned result should not be overwritten. OP is the rtx of the
14717 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14720 arithmetic_instr (rtx op, int shift_first_arg)
14722 switch (GET_CODE (op))
14728 return shift_first_arg ? "rsb" : "sub";
14743 return arm_shift_nmem(GET_CODE(op));
14746 gcc_unreachable ();
14750 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14751 for the operation code. The returned result should not be overwritten.
14752 OP is the rtx code of the shift.
14753 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14755 static const char *
14756 shift_op (rtx op, HOST_WIDE_INT *amountp)
14759 enum rtx_code code = GET_CODE (op);
14761 switch (GET_CODE (XEXP (op, 1)))
14769 *amountp = INTVAL (XEXP (op, 1));
14773 gcc_unreachable ();
14779 gcc_assert (*amountp != -1);
14780 *amountp = 32 - *amountp;
14783 /* Fall through. */
14789 mnem = arm_shift_nmem(code);
14793 /* We never have to worry about the amount being other than a
14794 power of 2, since this case can never be reloaded from a reg. */
14795 gcc_assert (*amountp != -1);
14796 *amountp = int_log2 (*amountp);
14797 return ARM_LSL_NAME;
14800 gcc_unreachable ();
14803 if (*amountp != -1)
14805 /* This is not 100% correct, but follows from the desire to merge
14806 multiplication by a power of 2 with the recognizer for a
14807 shift. >=32 is not a valid shift for "lsl", so we must try and
14808 output a shift that produces the correct arithmetical result.
14809 Using lsr #32 is identical except for the fact that the carry bit
14810 is not set correctly if we set the flags; but we never use the
14811 carry bit from such an operation, so we can ignore that. */
14812 if (code == ROTATERT)
14813 /* Rotate is just modulo 32. */
14815 else if (*amountp != (*amountp & 31))
14817 if (code == ASHIFT)
14822 /* Shifts of 0 are no-ops. */
14830 /* Obtain the shift from the POWER of two. */
14832 static HOST_WIDE_INT
14833 int_log2 (HOST_WIDE_INT power)
14835 HOST_WIDE_INT shift = 0;
14837 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14839 gcc_assert (shift <= 31);
14846 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14847 because /bin/as is horribly restrictive. The judgement about
14848 whether or not each character is 'printable' (and can be output as
14849 is) or not (and must be printed with an octal escape) must be made
14850 with reference to the *host* character set -- the situation is
14851 similar to that discussed in the comments above pp_c_char in
14852 c-pretty-print.c. */
14854 #define MAX_ASCII_LEN 51
14857 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14860 int len_so_far = 0;
14862 fputs ("\t.ascii\t\"", stream);
14864 for (i = 0; i < len; i++)
14868 if (len_so_far >= MAX_ASCII_LEN)
14870 fputs ("\"\n\t.ascii\t\"", stream);
14876 if (c == '\\' || c == '\"')
14878 putc ('\\', stream);
14886 fprintf (stream, "\\%03o", c);
14891 fputs ("\"\n", stream);
14894 /* Compute the register save mask for registers 0 through 12
14895 inclusive. This code is used by arm_compute_save_reg_mask. */
14897 static unsigned long
14898 arm_compute_save_reg0_reg12_mask (void)
14900 unsigned long func_type = arm_current_func_type ();
14901 unsigned long save_reg_mask = 0;
14904 if (IS_INTERRUPT (func_type))
14906 unsigned int max_reg;
14907 /* Interrupt functions must not corrupt any registers,
14908 even call clobbered ones. If this is a leaf function
14909 we can just examine the registers used by the RTL, but
14910 otherwise we have to assume that whatever function is
14911 called might clobber anything, and so we have to save
14912 all the call-clobbered registers as well. */
14913 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14914 /* FIQ handlers have registers r8 - r12 banked, so
14915 we only need to check r0 - r7, Normal ISRs only
14916 bank r14 and r15, so we must check up to r12.
14917 r13 is the stack pointer which is always preserved,
14918 so we do not need to consider it here. */
14923 for (reg = 0; reg <= max_reg; reg++)
14924 if (df_regs_ever_live_p (reg)
14925 || (! current_function_is_leaf && call_used_regs[reg]))
14926 save_reg_mask |= (1 << reg);
14928 /* Also save the pic base register if necessary. */
14930 && !TARGET_SINGLE_PIC_BASE
14931 && arm_pic_register != INVALID_REGNUM
14932 && crtl->uses_pic_offset_table)
14933 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14935 else if (IS_VOLATILE(func_type))
14937 /* For noreturn functions we historically omitted register saves
14938 altogether. However this really messes up debugging. As a
14939 compromise save just the frame pointers. Combined with the link
14940 register saved elsewhere this should be sufficient to get
14942 if (frame_pointer_needed)
14943 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14944 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14945 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14946 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14947 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14951 /* In the normal case we only need to save those registers
14952 which are call saved and which are used by this function. */
14953 for (reg = 0; reg <= 11; reg++)
14954 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14955 save_reg_mask |= (1 << reg);
14957 /* Handle the frame pointer as a special case. */
14958 if (frame_pointer_needed)
14959 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14961 /* If we aren't loading the PIC register,
14962 don't stack it even though it may be live. */
14964 && !TARGET_SINGLE_PIC_BASE
14965 && arm_pic_register != INVALID_REGNUM
14966 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14967 || crtl->uses_pic_offset_table))
14968 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14970 /* The prologue will copy SP into R0, so save it. */
14971 if (IS_STACKALIGN (func_type))
14972 save_reg_mask |= 1;
14975 /* Save registers so the exception handler can modify them. */
14976 if (crtl->calls_eh_return)
14982 reg = EH_RETURN_DATA_REGNO (i);
14983 if (reg == INVALID_REGNUM)
14985 save_reg_mask |= 1 << reg;
14989 return save_reg_mask;
14993 /* Compute the number of bytes used to store the static chain register on the
14994 stack, above the stack frame. We need to know this accurately to get the
14995 alignment of the rest of the stack frame correct. */
14997 static int arm_compute_static_chain_stack_bytes (void)
14999 unsigned long func_type = arm_current_func_type ();
15000 int static_chain_stack_bytes = 0;
15002 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15003 IS_NESTED (func_type) &&
15004 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15005 static_chain_stack_bytes = 4;
15007 return static_chain_stack_bytes;
15011 /* Compute a bit mask of which registers need to be
15012 saved on the stack for the current function.
15013 This is used by arm_get_frame_offsets, which may add extra registers. */
15015 static unsigned long
15016 arm_compute_save_reg_mask (void)
15018 unsigned int save_reg_mask = 0;
15019 unsigned long func_type = arm_current_func_type ();
15022 if (IS_NAKED (func_type))
15023 /* This should never really happen. */
15026 /* If we are creating a stack frame, then we must save the frame pointer,
15027 IP (which will hold the old stack pointer), LR and the PC. */
15028 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15030 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15033 | (1 << PC_REGNUM);
15035 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15037 /* Decide if we need to save the link register.
15038 Interrupt routines have their own banked link register,
15039 so they never need to save it.
15040 Otherwise if we do not use the link register we do not need to save
15041 it. If we are pushing other registers onto the stack however, we
15042 can save an instruction in the epilogue by pushing the link register
15043 now and then popping it back into the PC. This incurs extra memory
15044 accesses though, so we only do it when optimizing for size, and only
15045 if we know that we will not need a fancy return sequence. */
15046 if (df_regs_ever_live_p (LR_REGNUM)
15049 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15050 && !crtl->calls_eh_return))
15051 save_reg_mask |= 1 << LR_REGNUM;
15053 if (cfun->machine->lr_save_eliminated)
15054 save_reg_mask &= ~ (1 << LR_REGNUM);
15056 if (TARGET_REALLY_IWMMXT
15057 && ((bit_count (save_reg_mask)
15058 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15059 arm_compute_static_chain_stack_bytes())
15062 /* The total number of registers that are going to be pushed
15063 onto the stack is odd. We need to ensure that the stack
15064 is 64-bit aligned before we start to save iWMMXt registers,
15065 and also before we start to create locals. (A local variable
15066 might be a double or long long which we will load/store using
15067 an iWMMXt instruction). Therefore we need to push another
15068 ARM register, so that the stack will be 64-bit aligned. We
15069 try to avoid using the arg registers (r0 -r3) as they might be
15070 used to pass values in a tail call. */
15071 for (reg = 4; reg <= 12; reg++)
15072 if ((save_reg_mask & (1 << reg)) == 0)
15076 save_reg_mask |= (1 << reg);
15079 cfun->machine->sibcall_blocked = 1;
15080 save_reg_mask |= (1 << 3);
15084 /* We may need to push an additional register for use initializing the
15085 PIC base register. */
15086 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15087 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15089 reg = thumb_find_work_register (1 << 4);
15090 if (!call_used_regs[reg])
15091 save_reg_mask |= (1 << reg);
15094 return save_reg_mask;
15098 /* Compute a bit mask of which registers need to be
15099 saved on the stack for the current function. */
15100 static unsigned long
15101 thumb1_compute_save_reg_mask (void)
15103 unsigned long mask;
15107 for (reg = 0; reg < 12; reg ++)
15108 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15112 && !TARGET_SINGLE_PIC_BASE
15113 && arm_pic_register != INVALID_REGNUM
15114 && crtl->uses_pic_offset_table)
15115 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15117 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15118 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15119 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15121 /* LR will also be pushed if any lo regs are pushed. */
15122 if (mask & 0xff || thumb_force_lr_save ())
15123 mask |= (1 << LR_REGNUM);
15125 /* Make sure we have a low work register if we need one.
15126 We will need one if we are going to push a high register,
15127 but we are not currently intending to push a low register. */
15128 if ((mask & 0xff) == 0
15129 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15131 /* Use thumb_find_work_register to choose which register
15132 we will use. If the register is live then we will
15133 have to push it. Use LAST_LO_REGNUM as our fallback
15134 choice for the register to select. */
15135 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15136 /* Make sure the register returned by thumb_find_work_register is
15137 not part of the return value. */
15138 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15139 reg = LAST_LO_REGNUM;
15141 if (! call_used_regs[reg])
15145 /* The 504 below is 8 bytes less than 512 because there are two possible
15146 alignment words. We can't tell here if they will be present or not so we
15147 have to play it safe and assume that they are. */
15148 if ((CALLER_INTERWORKING_SLOT_SIZE +
15149 ROUND_UP_WORD (get_frame_size ()) +
15150 crtl->outgoing_args_size) >= 504)
15152 /* This is the same as the code in thumb1_expand_prologue() which
15153 determines which register to use for stack decrement. */
15154 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15155 if (mask & (1 << reg))
15158 if (reg > LAST_LO_REGNUM)
15160 /* Make sure we have a register available for stack decrement. */
15161 mask |= 1 << LAST_LO_REGNUM;
15169 /* Return the number of bytes required to save VFP registers. */
15171 arm_get_vfp_saved_size (void)
15173 unsigned int regno;
15178 /* Space for saved VFP registers. */
15179 if (TARGET_HARD_FLOAT && TARGET_VFP)
15182 for (regno = FIRST_VFP_REGNUM;
15183 regno < LAST_VFP_REGNUM;
15186 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15187 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15191 /* Workaround ARM10 VFPr1 bug. */
15192 if (count == 2 && !arm_arch6)
15194 saved += count * 8;
15203 if (count == 2 && !arm_arch6)
15205 saved += count * 8;
15212 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15213 everything bar the final return instruction. */
15215 output_return_instruction (rtx operand, int really_return, int reverse)
15217 char conditional[10];
15220 unsigned long live_regs_mask;
15221 unsigned long func_type;
15222 arm_stack_offsets *offsets;
15224 func_type = arm_current_func_type ();
15226 if (IS_NAKED (func_type))
15229 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15231 /* If this function was declared non-returning, and we have
15232 found a tail call, then we have to trust that the called
15233 function won't return. */
15238 /* Otherwise, trap an attempted return by aborting. */
15240 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15242 assemble_external_libcall (ops[1]);
15243 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15249 gcc_assert (!cfun->calls_alloca || really_return);
15251 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15253 cfun->machine->return_used_this_function = 1;
15255 offsets = arm_get_frame_offsets ();
15256 live_regs_mask = offsets->saved_regs_mask;
15258 if (live_regs_mask)
15260 const char * return_reg;
15262 /* If we do not have any special requirements for function exit
15263 (e.g. interworking) then we can load the return address
15264 directly into the PC. Otherwise we must load it into LR. */
15266 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15267 return_reg = reg_names[PC_REGNUM];
15269 return_reg = reg_names[LR_REGNUM];
15271 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15273 /* There are three possible reasons for the IP register
15274 being saved. 1) a stack frame was created, in which case
15275 IP contains the old stack pointer, or 2) an ISR routine
15276 corrupted it, or 3) it was saved to align the stack on
15277 iWMMXt. In case 1, restore IP into SP, otherwise just
15279 if (frame_pointer_needed)
15281 live_regs_mask &= ~ (1 << IP_REGNUM);
15282 live_regs_mask |= (1 << SP_REGNUM);
15285 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15288 /* On some ARM architectures it is faster to use LDR rather than
15289 LDM to load a single register. On other architectures, the
15290 cost is the same. In 26 bit mode, or for exception handlers,
15291 we have to use LDM to load the PC so that the CPSR is also
15293 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15294 if (live_regs_mask == (1U << reg))
15297 if (reg <= LAST_ARM_REGNUM
15298 && (reg != LR_REGNUM
15300 || ! IS_INTERRUPT (func_type)))
15302 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15303 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15310 /* Generate the load multiple instruction to restore the
15311 registers. Note we can get here, even if
15312 frame_pointer_needed is true, but only if sp already
15313 points to the base of the saved core registers. */
15314 if (live_regs_mask & (1 << SP_REGNUM))
15316 unsigned HOST_WIDE_INT stack_adjust;
15318 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15319 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15321 if (stack_adjust && arm_arch5 && TARGET_ARM)
15322 if (TARGET_UNIFIED_ASM)
15323 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15325 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15328 /* If we can't use ldmib (SA110 bug),
15329 then try to pop r3 instead. */
15331 live_regs_mask |= 1 << 3;
15333 if (TARGET_UNIFIED_ASM)
15334 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15336 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15340 if (TARGET_UNIFIED_ASM)
15341 sprintf (instr, "pop%s\t{", conditional);
15343 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15345 p = instr + strlen (instr);
15347 for (reg = 0; reg <= SP_REGNUM; reg++)
15348 if (live_regs_mask & (1 << reg))
15350 int l = strlen (reg_names[reg]);
15356 memcpy (p, ", ", 2);
15360 memcpy (p, "%|", 2);
15361 memcpy (p + 2, reg_names[reg], l);
15365 if (live_regs_mask & (1 << LR_REGNUM))
15367 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15368 /* If returning from an interrupt, restore the CPSR. */
15369 if (IS_INTERRUPT (func_type))
15376 output_asm_insn (instr, & operand);
15378 /* See if we need to generate an extra instruction to
15379 perform the actual function return. */
15381 && func_type != ARM_FT_INTERWORKED
15382 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15384 /* The return has already been handled
15385 by loading the LR into the PC. */
15392 switch ((int) ARM_FUNC_TYPE (func_type))
15396 /* ??? This is wrong for unified assembly syntax. */
15397 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15400 case ARM_FT_INTERWORKED:
15401 sprintf (instr, "bx%s\t%%|lr", conditional);
15404 case ARM_FT_EXCEPTION:
15405 /* ??? This is wrong for unified assembly syntax. */
15406 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15410 /* Use bx if it's available. */
15411 if (arm_arch5 || arm_arch4t)
15412 sprintf (instr, "bx%s\t%%|lr", conditional);
15414 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15418 output_asm_insn (instr, & operand);
15424 /* Write the function name into the code section, directly preceding
15425 the function prologue.
15427 Code will be output similar to this:
15429 .ascii "arm_poke_function_name", 0
15432 .word 0xff000000 + (t1 - t0)
15433 arm_poke_function_name
15435 stmfd sp!, {fp, ip, lr, pc}
15438 When performing a stack backtrace, code can inspect the value
15439 of 'pc' stored at 'fp' + 0. If the trace function then looks
15440 at location pc - 12 and the top 8 bits are set, then we know
15441 that there is a function name embedded immediately preceding this
15442 location and has length ((pc[-3]) & 0xff000000).
15444 We assume that pc is declared as a pointer to an unsigned long.
15446 It is of no benefit to output the function name if we are assembling
15447 a leaf function. These function types will not contain a stack
15448 backtrace structure, therefore it is not possible to determine the
15451 arm_poke_function_name (FILE *stream, const char *name)
15453 unsigned long alignlength;
15454 unsigned long length;
15457 length = strlen (name) + 1;
15458 alignlength = ROUND_UP_WORD (length);
15460 ASM_OUTPUT_ASCII (stream, name, length);
15461 ASM_OUTPUT_ALIGN (stream, 2);
15462 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15463 assemble_aligned_integer (UNITS_PER_WORD, x);
15466 /* Place some comments into the assembler stream
15467 describing the current function. */
15469 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15471 unsigned long func_type;
15473 /* ??? Do we want to print some of the below anyway? */
15477 /* Sanity check. */
15478 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15480 func_type = arm_current_func_type ();
15482 switch ((int) ARM_FUNC_TYPE (func_type))
15485 case ARM_FT_NORMAL:
15487 case ARM_FT_INTERWORKED:
15488 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15491 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15494 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15496 case ARM_FT_EXCEPTION:
15497 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15501 if (IS_NAKED (func_type))
15502 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15504 if (IS_VOLATILE (func_type))
15505 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15507 if (IS_NESTED (func_type))
15508 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15509 if (IS_STACKALIGN (func_type))
15510 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15512 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15514 crtl->args.pretend_args_size, frame_size);
15516 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15517 frame_pointer_needed,
15518 cfun->machine->uses_anonymous_args);
15520 if (cfun->machine->lr_save_eliminated)
15521 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15523 if (crtl->calls_eh_return)
15524 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15529 arm_output_epilogue (rtx sibling)
15532 unsigned long saved_regs_mask;
15533 unsigned long func_type;
15534 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15535 frame that is $fp + 4 for a non-variadic function. */
15536 int floats_offset = 0;
15538 FILE * f = asm_out_file;
15539 unsigned int lrm_count = 0;
15540 int really_return = (sibling == NULL);
15542 arm_stack_offsets *offsets;
15544 /* If we have already generated the return instruction
15545 then it is futile to generate anything else. */
15546 if (use_return_insn (FALSE, sibling) &&
15547 (cfun->machine->return_used_this_function != 0))
15550 func_type = arm_current_func_type ();
15552 if (IS_NAKED (func_type))
15553 /* Naked functions don't have epilogues. */
15556 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15560 /* A volatile function should never return. Call abort. */
15561 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15562 assemble_external_libcall (op);
15563 output_asm_insn ("bl\t%a0", &op);
15568 /* If we are throwing an exception, then we really must be doing a
15569 return, so we can't tail-call. */
15570 gcc_assert (!crtl->calls_eh_return || really_return);
15572 offsets = arm_get_frame_offsets ();
15573 saved_regs_mask = offsets->saved_regs_mask;
15576 lrm_count = bit_count (saved_regs_mask);
15578 floats_offset = offsets->saved_args;
15579 /* Compute how far away the floats will be. */
15580 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15581 if (saved_regs_mask & (1 << reg))
15582 floats_offset += 4;
15584 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15586 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15587 int vfp_offset = offsets->frame;
15589 if (TARGET_FPA_EMU2)
15591 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15592 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15594 floats_offset += 12;
15595 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15596 reg, FP_REGNUM, floats_offset - vfp_offset);
15601 start_reg = LAST_FPA_REGNUM;
15603 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15605 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15607 floats_offset += 12;
15609 /* We can't unstack more than four registers at once. */
15610 if (start_reg - reg == 3)
15612 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15613 reg, FP_REGNUM, floats_offset - vfp_offset);
15614 start_reg = reg - 1;
15619 if (reg != start_reg)
15620 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15621 reg + 1, start_reg - reg,
15622 FP_REGNUM, floats_offset - vfp_offset);
15623 start_reg = reg - 1;
15627 /* Just in case the last register checked also needs unstacking. */
15628 if (reg != start_reg)
15629 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15630 reg + 1, start_reg - reg,
15631 FP_REGNUM, floats_offset - vfp_offset);
15634 if (TARGET_HARD_FLOAT && TARGET_VFP)
15638 /* The fldmd insns do not have base+offset addressing
15639 modes, so we use IP to hold the address. */
15640 saved_size = arm_get_vfp_saved_size ();
15642 if (saved_size > 0)
15644 floats_offset += saved_size;
15645 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15646 FP_REGNUM, floats_offset - vfp_offset);
15648 start_reg = FIRST_VFP_REGNUM;
15649 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15651 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15652 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15654 if (start_reg != reg)
15655 vfp_output_fldmd (f, IP_REGNUM,
15656 (start_reg - FIRST_VFP_REGNUM) / 2,
15657 (reg - start_reg) / 2);
15658 start_reg = reg + 2;
15661 if (start_reg != reg)
15662 vfp_output_fldmd (f, IP_REGNUM,
15663 (start_reg - FIRST_VFP_REGNUM) / 2,
15664 (reg - start_reg) / 2);
15669 /* The frame pointer is guaranteed to be non-double-word aligned.
15670 This is because it is set to (old_stack_pointer - 4) and the
15671 old_stack_pointer was double word aligned. Thus the offset to
15672 the iWMMXt registers to be loaded must also be non-double-word
15673 sized, so that the resultant address *is* double-word aligned.
15674 We can ignore floats_offset since that was already included in
15675 the live_regs_mask. */
15676 lrm_count += (lrm_count % 2 ? 2 : 1);
15678 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15679 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15681 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15682 reg, FP_REGNUM, lrm_count * 4);
15687 /* saved_regs_mask should contain the IP, which at the time of stack
15688 frame generation actually contains the old stack pointer. So a
15689 quick way to unwind the stack is just pop the IP register directly
15690 into the stack pointer. */
15691 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15692 saved_regs_mask &= ~ (1 << IP_REGNUM);
15693 saved_regs_mask |= (1 << SP_REGNUM);
15695 /* There are two registers left in saved_regs_mask - LR and PC. We
15696 only need to restore the LR register (the return address), but to
15697 save time we can load it directly into the PC, unless we need a
15698 special function exit sequence, or we are not really returning. */
15700 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15701 && !crtl->calls_eh_return)
15702 /* Delete the LR from the register mask, so that the LR on
15703 the stack is loaded into the PC in the register mask. */
15704 saved_regs_mask &= ~ (1 << LR_REGNUM);
15706 saved_regs_mask &= ~ (1 << PC_REGNUM);
15708 /* We must use SP as the base register, because SP is one of the
15709 registers being restored. If an interrupt or page fault
15710 happens in the ldm instruction, the SP might or might not
15711 have been restored. That would be bad, as then SP will no
15712 longer indicate the safe area of stack, and we can get stack
15713 corruption. Using SP as the base register means that it will
15714 be reset correctly to the original value, should an interrupt
15715 occur. If the stack pointer already points at the right
15716 place, then omit the subtraction. */
15717 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15718 || cfun->calls_alloca)
15719 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15720 4 * bit_count (saved_regs_mask));
15721 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15723 if (IS_INTERRUPT (func_type))
15724 /* Interrupt handlers will have pushed the
15725 IP onto the stack, so restore it now. */
15726 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15730 /* This branch is executed for ARM mode (non-apcs frames) and
15731 Thumb-2 mode. Frame layout is essentially the same for those
15732 cases, except that in ARM mode frame pointer points to the
15733 first saved register, while in Thumb-2 mode the frame pointer points
15734 to the last saved register.
15736 It is possible to make frame pointer point to last saved
15737 register in both cases, and remove some conditionals below.
15738 That means that fp setup in prologue would be just "mov fp, sp"
15739 and sp restore in epilogue would be just "mov sp, fp", whereas
15740 now we have to use add/sub in those cases. However, the value
15741 of that would be marginal, as both mov and add/sub are 32-bit
15742 in ARM mode, and it would require extra conditionals
15743 in arm_expand_prologue to distingish ARM-apcs-frame case
15744 (where frame pointer is required to point at first register)
15745 and ARM-non-apcs-frame. Therefore, such change is postponed
15746 until real need arise. */
15747 unsigned HOST_WIDE_INT amount;
15749 /* Restore stack pointer if necessary. */
15750 if (TARGET_ARM && frame_pointer_needed)
15752 operands[0] = stack_pointer_rtx;
15753 operands[1] = hard_frame_pointer_rtx;
15755 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15756 output_add_immediate (operands);
15760 if (frame_pointer_needed)
15762 /* For Thumb-2 restore sp from the frame pointer.
15763 Operand restrictions mean we have to incrememnt FP, then copy
15765 amount = offsets->locals_base - offsets->saved_regs;
15766 operands[0] = hard_frame_pointer_rtx;
15770 unsigned long count;
15771 operands[0] = stack_pointer_rtx;
15772 amount = offsets->outgoing_args - offsets->saved_regs;
15773 /* pop call clobbered registers if it avoids a
15774 separate stack adjustment. */
15775 count = offsets->saved_regs - offsets->saved_args;
15778 && !crtl->calls_eh_return
15779 && bit_count(saved_regs_mask) * 4 == count
15780 && !IS_INTERRUPT (func_type)
15781 && !IS_STACKALIGN (func_type)
15782 && !crtl->tail_call_emit)
15784 unsigned long mask;
15785 /* Preserve return values, of any size. */
15786 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15788 mask &= ~saved_regs_mask;
15790 while (bit_count (mask) * 4 > amount)
15792 while ((mask & (1 << reg)) == 0)
15794 mask &= ~(1 << reg);
15796 if (bit_count (mask) * 4 == amount) {
15798 saved_regs_mask |= mask;
15805 operands[1] = operands[0];
15806 operands[2] = GEN_INT (amount);
15807 output_add_immediate (operands);
15809 if (frame_pointer_needed)
15810 asm_fprintf (f, "\tmov\t%r, %r\n",
15811 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15814 if (TARGET_FPA_EMU2)
15816 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15817 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15818 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15823 start_reg = FIRST_FPA_REGNUM;
15825 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15827 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15829 if (reg - start_reg == 3)
15831 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15832 start_reg, SP_REGNUM);
15833 start_reg = reg + 1;
15838 if (reg != start_reg)
15839 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15840 start_reg, reg - start_reg,
15843 start_reg = reg + 1;
15847 /* Just in case the last register checked also needs unstacking. */
15848 if (reg != start_reg)
15849 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15850 start_reg, reg - start_reg, SP_REGNUM);
15853 if (TARGET_HARD_FLOAT && TARGET_VFP)
15855 int end_reg = LAST_VFP_REGNUM + 1;
15857 /* Scan the registers in reverse order. We need to match
15858 any groupings made in the prologue and generate matching
15860 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15862 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15863 && (!df_regs_ever_live_p (reg + 1)
15864 || call_used_regs[reg + 1]))
15866 if (end_reg > reg + 2)
15867 vfp_output_fldmd (f, SP_REGNUM,
15868 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15869 (end_reg - (reg + 2)) / 2);
15873 if (end_reg > reg + 2)
15874 vfp_output_fldmd (f, SP_REGNUM, 0,
15875 (end_reg - (reg + 2)) / 2);
15879 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15880 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15881 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15883 /* If we can, restore the LR into the PC. */
15884 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15885 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15886 && !IS_STACKALIGN (func_type)
15888 && crtl->args.pretend_args_size == 0
15889 && saved_regs_mask & (1 << LR_REGNUM)
15890 && !crtl->calls_eh_return)
15892 saved_regs_mask &= ~ (1 << LR_REGNUM);
15893 saved_regs_mask |= (1 << PC_REGNUM);
15894 rfe = IS_INTERRUPT (func_type);
15899 /* Load the registers off the stack. If we only have one register
15900 to load use the LDR instruction - it is faster. For Thumb-2
15901 always use pop and the assembler will pick the best instruction.*/
15902 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15903 && !IS_INTERRUPT(func_type))
15905 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15907 else if (saved_regs_mask)
15909 if (saved_regs_mask & (1 << SP_REGNUM))
15910 /* Note - write back to the stack register is not enabled
15911 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15912 in the list of registers and if we add writeback the
15913 instruction becomes UNPREDICTABLE. */
15914 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15916 else if (TARGET_ARM)
15917 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15920 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15923 if (crtl->args.pretend_args_size)
15925 /* Unwind the pre-pushed regs. */
15926 operands[0] = operands[1] = stack_pointer_rtx;
15927 operands[2] = GEN_INT (crtl->args.pretend_args_size);
15928 output_add_immediate (operands);
15932 /* We may have already restored PC directly from the stack. */
15933 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
15936 /* Stack adjustment for exception handler. */
15937 if (crtl->calls_eh_return)
15938 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
15939 ARM_EH_STACKADJ_REGNUM);
15941 /* Generate the return instruction. */
15942 switch ((int) ARM_FUNC_TYPE (func_type))
15946 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
15949 case ARM_FT_EXCEPTION:
15950 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15953 case ARM_FT_INTERWORKED:
15954 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15958 if (IS_STACKALIGN (func_type))
15960 /* See comment in arm_expand_prologue. */
15961 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
15963 if (arm_arch5 || arm_arch4t)
15964 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15966 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15974 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15975 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15977 arm_stack_offsets *offsets;
15983 /* Emit any call-via-reg trampolines that are needed for v4t support
15984 of call_reg and call_value_reg type insns. */
15985 for (regno = 0; regno < LR_REGNUM; regno++)
15987 rtx label = cfun->machine->call_via[regno];
15991 switch_to_section (function_section (current_function_decl));
15992 targetm.asm_out.internal_label (asm_out_file, "L",
15993 CODE_LABEL_NUMBER (label));
15994 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15998 /* ??? Probably not safe to set this here, since it assumes that a
15999 function will be emitted as assembly immediately after we generate
16000 RTL for it. This does not happen for inline functions. */
16001 cfun->machine->return_used_this_function = 0;
16003 else /* TARGET_32BIT */
16005 /* We need to take into account any stack-frame rounding. */
16006 offsets = arm_get_frame_offsets ();
16008 gcc_assert (!use_return_insn (FALSE, NULL)
16009 || (cfun->machine->return_used_this_function != 0)
16010 || offsets->saved_regs == offsets->outgoing_args
16011 || frame_pointer_needed);
16013 /* Reset the ARM-specific per-function variables. */
16014 after_arm_reorg = 0;
16018 /* Generate and emit an insn that we will recognize as a push_multi.
16019 Unfortunately, since this insn does not reflect very well the actual
16020 semantics of the operation, we need to annotate the insn for the benefit
16021 of DWARF2 frame unwind information. */
16023 emit_multi_reg_push (unsigned long mask)
16026 int num_dwarf_regs;
16030 int dwarf_par_index;
16033 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16034 if (mask & (1 << i))
16037 gcc_assert (num_regs && num_regs <= 16);
16039 /* We don't record the PC in the dwarf frame information. */
16040 num_dwarf_regs = num_regs;
16041 if (mask & (1 << PC_REGNUM))
16044 /* For the body of the insn we are going to generate an UNSPEC in
16045 parallel with several USEs. This allows the insn to be recognized
16046 by the push_multi pattern in the arm.md file.
16048 The body of the insn looks something like this:
16051 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16052 (const_int:SI <num>)))
16053 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16059 For the frame note however, we try to be more explicit and actually
16060 show each register being stored into the stack frame, plus a (single)
16061 decrement of the stack pointer. We do it this way in order to be
16062 friendly to the stack unwinding code, which only wants to see a single
16063 stack decrement per instruction. The RTL we generate for the note looks
16064 something like this:
16067 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16068 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16069 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16070 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16074 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16075 instead we'd have a parallel expression detailing all
16076 the stores to the various memory addresses so that debug
16077 information is more up-to-date. Remember however while writing
16078 this to take care of the constraints with the push instruction.
16080 Note also that this has to be taken care of for the VFP registers.
16082 For more see PR43399. */
16084 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16085 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16086 dwarf_par_index = 1;
16088 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16090 if (mask & (1 << i))
16092 reg = gen_rtx_REG (SImode, i);
16094 XVECEXP (par, 0, 0)
16095 = gen_rtx_SET (VOIDmode,
16098 gen_rtx_PRE_MODIFY (Pmode,
16101 (stack_pointer_rtx,
16104 gen_rtx_UNSPEC (BLKmode,
16105 gen_rtvec (1, reg),
16106 UNSPEC_PUSH_MULT));
16108 if (i != PC_REGNUM)
16110 tmp = gen_rtx_SET (VOIDmode,
16111 gen_frame_mem (SImode, stack_pointer_rtx),
16113 RTX_FRAME_RELATED_P (tmp) = 1;
16114 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16122 for (j = 1, i++; j < num_regs; i++)
16124 if (mask & (1 << i))
16126 reg = gen_rtx_REG (SImode, i);
16128 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16130 if (i != PC_REGNUM)
16133 = gen_rtx_SET (VOIDmode,
16136 plus_constant (stack_pointer_rtx,
16139 RTX_FRAME_RELATED_P (tmp) = 1;
16140 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16147 par = emit_insn (par);
16149 tmp = gen_rtx_SET (VOIDmode,
16151 plus_constant (stack_pointer_rtx, -4 * num_regs));
16152 RTX_FRAME_RELATED_P (tmp) = 1;
16153 XVECEXP (dwarf, 0, 0) = tmp;
16155 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16160 /* Calculate the size of the return value that is passed in registers. */
16162 arm_size_return_regs (void)
16164 enum machine_mode mode;
16166 if (crtl->return_rtx != 0)
16167 mode = GET_MODE (crtl->return_rtx);
16169 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16171 return GET_MODE_SIZE (mode);
16175 emit_sfm (int base_reg, int count)
16182 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16183 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16185 reg = gen_rtx_REG (XFmode, base_reg++);
16187 XVECEXP (par, 0, 0)
16188 = gen_rtx_SET (VOIDmode,
16191 gen_rtx_PRE_MODIFY (Pmode,
16194 (stack_pointer_rtx,
16197 gen_rtx_UNSPEC (BLKmode,
16198 gen_rtvec (1, reg),
16199 UNSPEC_PUSH_MULT));
16200 tmp = gen_rtx_SET (VOIDmode,
16201 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16202 RTX_FRAME_RELATED_P (tmp) = 1;
16203 XVECEXP (dwarf, 0, 1) = tmp;
16205 for (i = 1; i < count; i++)
16207 reg = gen_rtx_REG (XFmode, base_reg++);
16208 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16210 tmp = gen_rtx_SET (VOIDmode,
16211 gen_frame_mem (XFmode,
16212 plus_constant (stack_pointer_rtx,
16215 RTX_FRAME_RELATED_P (tmp) = 1;
16216 XVECEXP (dwarf, 0, i + 1) = tmp;
16219 tmp = gen_rtx_SET (VOIDmode,
16221 plus_constant (stack_pointer_rtx, -12 * count));
16223 RTX_FRAME_RELATED_P (tmp) = 1;
16224 XVECEXP (dwarf, 0, 0) = tmp;
16226 par = emit_insn (par);
16227 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16233 /* Return true if the current function needs to save/restore LR. */
16236 thumb_force_lr_save (void)
16238 return !cfun->machine->lr_save_eliminated
16239 && (!leaf_function_p ()
16240 || thumb_far_jump_used_p ()
16241 || df_regs_ever_live_p (LR_REGNUM));
16245 /* Return true if r3 is used by any of the tail call insns in the
16246 current function. */
16249 any_sibcall_uses_r3 (void)
16254 if (!crtl->tail_call_emit)
16256 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16257 if (e->flags & EDGE_SIBCALL)
16259 rtx call = BB_END (e->src);
16260 if (!CALL_P (call))
16261 call = prev_nonnote_nondebug_insn (call);
16262 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16263 if (find_regno_fusage (call, USE, 3))
16270 /* Compute the distance from register FROM to register TO.
16271 These can be the arg pointer (26), the soft frame pointer (25),
16272 the stack pointer (13) or the hard frame pointer (11).
16273 In thumb mode r7 is used as the soft frame pointer, if needed.
16274 Typical stack layout looks like this:
16276 old stack pointer -> | |
16279 | | saved arguments for
16280 | | vararg functions
16283 hard FP & arg pointer -> | | \
16291 soft frame pointer -> | | /
16296 locals base pointer -> | | /
16301 current stack pointer -> | | /
16304 For a given function some or all of these stack components
16305 may not be needed, giving rise to the possibility of
16306 eliminating some of the registers.
16308 The values returned by this function must reflect the behavior
16309 of arm_expand_prologue() and arm_compute_save_reg_mask().
16311 The sign of the number returned reflects the direction of stack
16312 growth, so the values are positive for all eliminations except
16313 from the soft frame pointer to the hard frame pointer.
16315 SFP may point just inside the local variables block to ensure correct
16319 /* Calculate stack offsets. These are used to calculate register elimination
16320 offsets and in prologue/epilogue code. Also calculates which registers
16321 should be saved. */
16323 static arm_stack_offsets *
16324 arm_get_frame_offsets (void)
16326 struct arm_stack_offsets *offsets;
16327 unsigned long func_type;
16331 HOST_WIDE_INT frame_size;
16334 offsets = &cfun->machine->stack_offsets;
16336 /* We need to know if we are a leaf function. Unfortunately, it
16337 is possible to be called after start_sequence has been called,
16338 which causes get_insns to return the insns for the sequence,
16339 not the function, which will cause leaf_function_p to return
16340 the incorrect result.
16342 to know about leaf functions once reload has completed, and the
16343 frame size cannot be changed after that time, so we can safely
16344 use the cached value. */
16346 if (reload_completed)
16349 /* Initially this is the size of the local variables. It will translated
16350 into an offset once we have determined the size of preceding data. */
16351 frame_size = ROUND_UP_WORD (get_frame_size ());
16353 leaf = leaf_function_p ();
16355 /* Space for variadic functions. */
16356 offsets->saved_args = crtl->args.pretend_args_size;
16358 /* In Thumb mode this is incorrect, but never used. */
16359 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16360 arm_compute_static_chain_stack_bytes();
16364 unsigned int regno;
16366 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16367 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16368 saved = core_saved;
16370 /* We know that SP will be doubleword aligned on entry, and we must
16371 preserve that condition at any subroutine call. We also require the
16372 soft frame pointer to be doubleword aligned. */
16374 if (TARGET_REALLY_IWMMXT)
16376 /* Check for the call-saved iWMMXt registers. */
16377 for (regno = FIRST_IWMMXT_REGNUM;
16378 regno <= LAST_IWMMXT_REGNUM;
16380 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16384 func_type = arm_current_func_type ();
16385 if (! IS_VOLATILE (func_type))
16387 /* Space for saved FPA registers. */
16388 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16389 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16392 /* Space for saved VFP registers. */
16393 if (TARGET_HARD_FLOAT && TARGET_VFP)
16394 saved += arm_get_vfp_saved_size ();
16397 else /* TARGET_THUMB1 */
16399 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16400 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16401 saved = core_saved;
16402 if (TARGET_BACKTRACE)
16406 /* Saved registers include the stack frame. */
16407 offsets->saved_regs = offsets->saved_args + saved +
16408 arm_compute_static_chain_stack_bytes();
16409 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16410 /* A leaf function does not need any stack alignment if it has nothing
16412 if (leaf && frame_size == 0
16413 /* However if it calls alloca(), we have a dynamically allocated
16414 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16415 && ! cfun->calls_alloca)
16417 offsets->outgoing_args = offsets->soft_frame;
16418 offsets->locals_base = offsets->soft_frame;
16422 /* Ensure SFP has the correct alignment. */
16423 if (ARM_DOUBLEWORD_ALIGN
16424 && (offsets->soft_frame & 7))
16426 offsets->soft_frame += 4;
16427 /* Try to align stack by pushing an extra reg. Don't bother doing this
16428 when there is a stack frame as the alignment will be rolled into
16429 the normal stack adjustment. */
16430 if (frame_size + crtl->outgoing_args_size == 0)
16434 /* If it is safe to use r3, then do so. This sometimes
16435 generates better code on Thumb-2 by avoiding the need to
16436 use 32-bit push/pop instructions. */
16437 if (! any_sibcall_uses_r3 ()
16438 && arm_size_return_regs () <= 12
16439 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16444 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16446 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16455 offsets->saved_regs += 4;
16456 offsets->saved_regs_mask |= (1 << reg);
16461 offsets->locals_base = offsets->soft_frame + frame_size;
16462 offsets->outgoing_args = (offsets->locals_base
16463 + crtl->outgoing_args_size);
16465 if (ARM_DOUBLEWORD_ALIGN)
16467 /* Ensure SP remains doubleword aligned. */
16468 if (offsets->outgoing_args & 7)
16469 offsets->outgoing_args += 4;
16470 gcc_assert (!(offsets->outgoing_args & 7));
16477 /* Calculate the relative offsets for the different stack pointers. Positive
16478 offsets are in the direction of stack growth. */
16481 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16483 arm_stack_offsets *offsets;
16485 offsets = arm_get_frame_offsets ();
16487 /* OK, now we have enough information to compute the distances.
16488 There must be an entry in these switch tables for each pair
16489 of registers in ELIMINABLE_REGS, even if some of the entries
16490 seem to be redundant or useless. */
16493 case ARG_POINTER_REGNUM:
16496 case THUMB_HARD_FRAME_POINTER_REGNUM:
16499 case FRAME_POINTER_REGNUM:
16500 /* This is the reverse of the soft frame pointer
16501 to hard frame pointer elimination below. */
16502 return offsets->soft_frame - offsets->saved_args;
16504 case ARM_HARD_FRAME_POINTER_REGNUM:
16505 /* This is only non-zero in the case where the static chain register
16506 is stored above the frame. */
16507 return offsets->frame - offsets->saved_args - 4;
16509 case STACK_POINTER_REGNUM:
16510 /* If nothing has been pushed on the stack at all
16511 then this will return -4. This *is* correct! */
16512 return offsets->outgoing_args - (offsets->saved_args + 4);
16515 gcc_unreachable ();
16517 gcc_unreachable ();
16519 case FRAME_POINTER_REGNUM:
16522 case THUMB_HARD_FRAME_POINTER_REGNUM:
16525 case ARM_HARD_FRAME_POINTER_REGNUM:
16526 /* The hard frame pointer points to the top entry in the
16527 stack frame. The soft frame pointer to the bottom entry
16528 in the stack frame. If there is no stack frame at all,
16529 then they are identical. */
16531 return offsets->frame - offsets->soft_frame;
16533 case STACK_POINTER_REGNUM:
16534 return offsets->outgoing_args - offsets->soft_frame;
16537 gcc_unreachable ();
16539 gcc_unreachable ();
16542 /* You cannot eliminate from the stack pointer.
16543 In theory you could eliminate from the hard frame
16544 pointer to the stack pointer, but this will never
16545 happen, since if a stack frame is not needed the
16546 hard frame pointer will never be used. */
16547 gcc_unreachable ();
16551 /* Given FROM and TO register numbers, say whether this elimination is
16552 allowed. Frame pointer elimination is automatically handled.
16554 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16555 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16556 pointer, we must eliminate FRAME_POINTER_REGNUM into
16557 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16558 ARG_POINTER_REGNUM. */
16561 arm_can_eliminate (const int from, const int to)
16563 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16564 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16565 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16566 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16570 /* Emit RTL to save coprocessor registers on function entry. Returns the
16571 number of bytes pushed. */
16574 arm_save_coproc_regs(void)
16576 int saved_size = 0;
16578 unsigned start_reg;
16581 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16582 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16584 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16585 insn = gen_rtx_MEM (V2SImode, insn);
16586 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16587 RTX_FRAME_RELATED_P (insn) = 1;
16591 /* Save any floating point call-saved registers used by this
16593 if (TARGET_FPA_EMU2)
16595 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16596 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16598 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16599 insn = gen_rtx_MEM (XFmode, insn);
16600 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16601 RTX_FRAME_RELATED_P (insn) = 1;
16607 start_reg = LAST_FPA_REGNUM;
16609 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16611 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16613 if (start_reg - reg == 3)
16615 insn = emit_sfm (reg, 4);
16616 RTX_FRAME_RELATED_P (insn) = 1;
16618 start_reg = reg - 1;
16623 if (start_reg != reg)
16625 insn = emit_sfm (reg + 1, start_reg - reg);
16626 RTX_FRAME_RELATED_P (insn) = 1;
16627 saved_size += (start_reg - reg) * 12;
16629 start_reg = reg - 1;
16633 if (start_reg != reg)
16635 insn = emit_sfm (reg + 1, start_reg - reg);
16636 saved_size += (start_reg - reg) * 12;
16637 RTX_FRAME_RELATED_P (insn) = 1;
16640 if (TARGET_HARD_FLOAT && TARGET_VFP)
16642 start_reg = FIRST_VFP_REGNUM;
16644 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16646 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16647 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16649 if (start_reg != reg)
16650 saved_size += vfp_emit_fstmd (start_reg,
16651 (reg - start_reg) / 2);
16652 start_reg = reg + 2;
16655 if (start_reg != reg)
16656 saved_size += vfp_emit_fstmd (start_reg,
16657 (reg - start_reg) / 2);
16663 /* Set the Thumb frame pointer from the stack pointer. */
16666 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16668 HOST_WIDE_INT amount;
16671 amount = offsets->outgoing_args - offsets->locals_base;
16673 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16674 stack_pointer_rtx, GEN_INT (amount)));
16677 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16678 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16679 expects the first two operands to be the same. */
16682 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16684 hard_frame_pointer_rtx));
16688 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16689 hard_frame_pointer_rtx,
16690 stack_pointer_rtx));
16692 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16693 plus_constant (stack_pointer_rtx, amount));
16694 RTX_FRAME_RELATED_P (dwarf) = 1;
16695 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16698 RTX_FRAME_RELATED_P (insn) = 1;
16701 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16704 arm_expand_prologue (void)
16709 unsigned long live_regs_mask;
16710 unsigned long func_type;
16712 int saved_pretend_args = 0;
16713 int saved_regs = 0;
16714 unsigned HOST_WIDE_INT args_to_push;
16715 arm_stack_offsets *offsets;
16717 func_type = arm_current_func_type ();
16719 /* Naked functions don't have prologues. */
16720 if (IS_NAKED (func_type))
16723 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16724 args_to_push = crtl->args.pretend_args_size;
16726 /* Compute which register we will have to save onto the stack. */
16727 offsets = arm_get_frame_offsets ();
16728 live_regs_mask = offsets->saved_regs_mask;
16730 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16732 if (IS_STACKALIGN (func_type))
16736 /* Handle a word-aligned stack pointer. We generate the following:
16741 <save and restore r0 in normal prologue/epilogue>
16745 The unwinder doesn't need to know about the stack realignment.
16746 Just tell it we saved SP in r0. */
16747 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16749 r0 = gen_rtx_REG (SImode, 0);
16750 r1 = gen_rtx_REG (SImode, 1);
16752 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16753 RTX_FRAME_RELATED_P (insn) = 1;
16754 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16756 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16758 /* ??? The CFA changes here, which may cause GDB to conclude that it
16759 has entered a different function. That said, the unwind info is
16760 correct, individually, before and after this instruction because
16761 we've described the save of SP, which will override the default
16762 handling of SP as restoring from the CFA. */
16763 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16766 /* For APCS frames, if IP register is clobbered
16767 when creating frame, save that register in a special
16769 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16771 if (IS_INTERRUPT (func_type))
16773 /* Interrupt functions must not corrupt any registers.
16774 Creating a frame pointer however, corrupts the IP
16775 register, so we must push it first. */
16776 emit_multi_reg_push (1 << IP_REGNUM);
16778 /* Do not set RTX_FRAME_RELATED_P on this insn.
16779 The dwarf stack unwinding code only wants to see one
16780 stack decrement per function, and this is not it. If
16781 this instruction is labeled as being part of the frame
16782 creation sequence then dwarf2out_frame_debug_expr will
16783 die when it encounters the assignment of IP to FP
16784 later on, since the use of SP here establishes SP as
16785 the CFA register and not IP.
16787 Anyway this instruction is not really part of the stack
16788 frame creation although it is part of the prologue. */
16790 else if (IS_NESTED (func_type))
16792 /* The Static chain register is the same as the IP register
16793 used as a scratch register during stack frame creation.
16794 To get around this need to find somewhere to store IP
16795 whilst the frame is being created. We try the following
16798 1. The last argument register.
16799 2. A slot on the stack above the frame. (This only
16800 works if the function is not a varargs function).
16801 3. Register r3, after pushing the argument registers
16804 Note - we only need to tell the dwarf2 backend about the SP
16805 adjustment in the second variant; the static chain register
16806 doesn't need to be unwound, as it doesn't contain a value
16807 inherited from the caller. */
16809 if (df_regs_ever_live_p (3) == false)
16810 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16811 else if (args_to_push == 0)
16815 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16818 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16819 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16822 /* Just tell the dwarf backend that we adjusted SP. */
16823 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16824 plus_constant (stack_pointer_rtx,
16826 RTX_FRAME_RELATED_P (insn) = 1;
16827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16831 /* Store the args on the stack. */
16832 if (cfun->machine->uses_anonymous_args)
16833 insn = emit_multi_reg_push
16834 ((0xf0 >> (args_to_push / 4)) & 0xf);
16837 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16838 GEN_INT (- args_to_push)));
16840 RTX_FRAME_RELATED_P (insn) = 1;
16842 saved_pretend_args = 1;
16843 fp_offset = args_to_push;
16846 /* Now reuse r3 to preserve IP. */
16847 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16851 insn = emit_set_insn (ip_rtx,
16852 plus_constant (stack_pointer_rtx, fp_offset));
16853 RTX_FRAME_RELATED_P (insn) = 1;
16858 /* Push the argument registers, or reserve space for them. */
16859 if (cfun->machine->uses_anonymous_args)
16860 insn = emit_multi_reg_push
16861 ((0xf0 >> (args_to_push / 4)) & 0xf);
16864 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16865 GEN_INT (- args_to_push)));
16866 RTX_FRAME_RELATED_P (insn) = 1;
16869 /* If this is an interrupt service routine, and the link register
16870 is going to be pushed, and we're not generating extra
16871 push of IP (needed when frame is needed and frame layout if apcs),
16872 subtracting four from LR now will mean that the function return
16873 can be done with a single instruction. */
16874 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16875 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16876 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16879 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16881 emit_set_insn (lr, plus_constant (lr, -4));
16884 if (live_regs_mask)
16886 saved_regs += bit_count (live_regs_mask) * 4;
16887 if (optimize_size && !frame_pointer_needed
16888 && saved_regs == offsets->saved_regs - offsets->saved_args)
16890 /* If no coprocessor registers are being pushed and we don't have
16891 to worry about a frame pointer then push extra registers to
16892 create the stack frame. This is done is a way that does not
16893 alter the frame layout, so is independent of the epilogue. */
16897 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16899 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16900 if (frame && n * 4 >= frame)
16903 live_regs_mask |= (1 << n) - 1;
16904 saved_regs += frame;
16907 insn = emit_multi_reg_push (live_regs_mask);
16908 RTX_FRAME_RELATED_P (insn) = 1;
16911 if (! IS_VOLATILE (func_type))
16912 saved_regs += arm_save_coproc_regs ();
16914 if (frame_pointer_needed && TARGET_ARM)
16916 /* Create the new frame pointer. */
16917 if (TARGET_APCS_FRAME)
16919 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16920 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16921 RTX_FRAME_RELATED_P (insn) = 1;
16923 if (IS_NESTED (func_type))
16925 /* Recover the static chain register. */
16926 if (!df_regs_ever_live_p (3)
16927 || saved_pretend_args)
16928 insn = gen_rtx_REG (SImode, 3);
16929 else /* if (crtl->args.pretend_args_size == 0) */
16931 insn = plus_constant (hard_frame_pointer_rtx, 4);
16932 insn = gen_frame_mem (SImode, insn);
16934 emit_set_insn (ip_rtx, insn);
16935 /* Add a USE to stop propagate_one_insn() from barfing. */
16936 emit_insn (gen_prologue_use (ip_rtx));
16941 insn = GEN_INT (saved_regs - 4);
16942 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16943 stack_pointer_rtx, insn));
16944 RTX_FRAME_RELATED_P (insn) = 1;
16948 if (flag_stack_usage_info)
16949 current_function_static_stack_size
16950 = offsets->outgoing_args - offsets->saved_args;
16952 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16954 /* This add can produce multiple insns for a large constant, so we
16955 need to get tricky. */
16956 rtx last = get_last_insn ();
16958 amount = GEN_INT (offsets->saved_args + saved_regs
16959 - offsets->outgoing_args);
16961 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16965 last = last ? NEXT_INSN (last) : get_insns ();
16966 RTX_FRAME_RELATED_P (last) = 1;
16968 while (last != insn);
16970 /* If the frame pointer is needed, emit a special barrier that
16971 will prevent the scheduler from moving stores to the frame
16972 before the stack adjustment. */
16973 if (frame_pointer_needed)
16974 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16975 hard_frame_pointer_rtx));
16979 if (frame_pointer_needed && TARGET_THUMB2)
16980 thumb_set_frame_pointer (offsets);
16982 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16984 unsigned long mask;
16986 mask = live_regs_mask;
16987 mask &= THUMB2_WORK_REGS;
16988 if (!IS_NESTED (func_type))
16989 mask |= (1 << IP_REGNUM);
16990 arm_load_pic_register (mask);
16993 /* If we are profiling, make sure no instructions are scheduled before
16994 the call to mcount. Similarly if the user has requested no
16995 scheduling in the prolog. Similarly if we want non-call exceptions
16996 using the EABI unwinder, to prevent faulting instructions from being
16997 swapped with a stack adjustment. */
16998 if (crtl->profile || !TARGET_SCHED_PROLOG
16999 || (arm_except_unwind_info (&global_options) == UI_TARGET
17000 && cfun->can_throw_non_call_exceptions))
17001 emit_insn (gen_blockage ());
17003 /* If the link register is being kept alive, with the return address in it,
17004 then make sure that it does not get reused by the ce2 pass. */
17005 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17006 cfun->machine->lr_save_eliminated = 1;
17009 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17011 arm_print_condition (FILE *stream)
17013 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17015 /* Branch conversion is not implemented for Thumb-2. */
17018 output_operand_lossage ("predicated Thumb instruction");
17021 if (current_insn_predicate != NULL)
17023 output_operand_lossage
17024 ("predicated instruction in conditional sequence");
17028 fputs (arm_condition_codes[arm_current_cc], stream);
17030 else if (current_insn_predicate)
17032 enum arm_cond_code code;
17036 output_operand_lossage ("predicated Thumb instruction");
17040 code = get_arm_condition_code (current_insn_predicate);
17041 fputs (arm_condition_codes[code], stream);
17046 /* If CODE is 'd', then the X is a condition operand and the instruction
17047 should only be executed if the condition is true.
17048 if CODE is 'D', then the X is a condition operand and the instruction
17049 should only be executed if the condition is false: however, if the mode
17050 of the comparison is CCFPEmode, then always execute the instruction -- we
17051 do this because in these circumstances !GE does not necessarily imply LT;
17052 in these cases the instruction pattern will take care to make sure that
17053 an instruction containing %d will follow, thereby undoing the effects of
17054 doing this instruction unconditionally.
17055 If CODE is 'N' then X is a floating point operand that must be negated
17057 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17058 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17060 arm_print_operand (FILE *stream, rtx x, int code)
17065 fputs (ASM_COMMENT_START, stream);
17069 fputs (user_label_prefix, stream);
17073 fputs (REGISTER_PREFIX, stream);
17077 arm_print_condition (stream);
17081 /* Nothing in unified syntax, otherwise the current condition code. */
17082 if (!TARGET_UNIFIED_ASM)
17083 arm_print_condition (stream);
17087 /* The current condition code in unified syntax, otherwise nothing. */
17088 if (TARGET_UNIFIED_ASM)
17089 arm_print_condition (stream);
17093 /* The current condition code for a condition code setting instruction.
17094 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17095 if (TARGET_UNIFIED_ASM)
17097 fputc('s', stream);
17098 arm_print_condition (stream);
17102 arm_print_condition (stream);
17103 fputc('s', stream);
17108 /* If the instruction is conditionally executed then print
17109 the current condition code, otherwise print 's'. */
17110 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17111 if (current_insn_predicate)
17112 arm_print_condition (stream);
17114 fputc('s', stream);
17117 /* %# is a "break" sequence. It doesn't output anything, but is used to
17118 separate e.g. operand numbers from following text, if that text consists
17119 of further digits which we don't want to be part of the operand
17127 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17128 r = real_value_negate (&r);
17129 fprintf (stream, "%s", fp_const_from_val (&r));
17133 /* An integer or symbol address without a preceding # sign. */
17135 switch (GET_CODE (x))
17138 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17142 output_addr_const (stream, x);
17146 if (GET_CODE (XEXP (x, 0)) == PLUS
17147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17149 output_addr_const (stream, x);
17152 /* Fall through. */
17155 output_operand_lossage ("Unsupported operand for code '%c'", code);
17160 if (GET_CODE (x) == CONST_INT)
17163 val = ARM_SIGN_EXTEND (~INTVAL (x));
17164 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17168 putc ('~', stream);
17169 output_addr_const (stream, x);
17174 /* The low 16 bits of an immediate constant. */
17175 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17179 fprintf (stream, "%s", arithmetic_instr (x, 1));
17182 /* Truncate Cirrus shift counts. */
17184 if (GET_CODE (x) == CONST_INT)
17186 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17189 arm_print_operand (stream, x, 0);
17193 fprintf (stream, "%s", arithmetic_instr (x, 0));
17201 if (!shift_operator (x, SImode))
17203 output_operand_lossage ("invalid shift operand");
17207 shift = shift_op (x, &val);
17211 fprintf (stream, ", %s ", shift);
17213 arm_print_operand (stream, XEXP (x, 1), 0);
17215 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17220 /* An explanation of the 'Q', 'R' and 'H' register operands:
17222 In a pair of registers containing a DI or DF value the 'Q'
17223 operand returns the register number of the register containing
17224 the least significant part of the value. The 'R' operand returns
17225 the register number of the register containing the most
17226 significant part of the value.
17228 The 'H' operand returns the higher of the two register numbers.
17229 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17230 same as the 'Q' operand, since the most significant part of the
17231 value is held in the lower number register. The reverse is true
17232 on systems where WORDS_BIG_ENDIAN is false.
17234 The purpose of these operands is to distinguish between cases
17235 where the endian-ness of the values is important (for example
17236 when they are added together), and cases where the endian-ness
17237 is irrelevant, but the order of register operations is important.
17238 For example when loading a value from memory into a register
17239 pair, the endian-ness does not matter. Provided that the value
17240 from the lower memory address is put into the lower numbered
17241 register, and the value from the higher address is put into the
17242 higher numbered register, the load will work regardless of whether
17243 the value being loaded is big-wordian or little-wordian. The
17244 order of the two register loads can matter however, if the address
17245 of the memory location is actually held in one of the registers
17246 being overwritten by the load.
17248 The 'Q' and 'R' constraints are also available for 64-bit
17251 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17253 rtx part = gen_lowpart (SImode, x);
17254 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17258 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17260 output_operand_lossage ("invalid operand for code '%c'", code);
17264 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17268 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17270 enum machine_mode mode = GET_MODE (x);
17273 if (mode == VOIDmode)
17275 part = gen_highpart_mode (SImode, mode, x);
17276 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17280 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17282 output_operand_lossage ("invalid operand for code '%c'", code);
17286 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17290 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17292 output_operand_lossage ("invalid operand for code '%c'", code);
17296 asm_fprintf (stream, "%r", REGNO (x) + 1);
17300 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17302 output_operand_lossage ("invalid operand for code '%c'", code);
17306 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17310 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17312 output_operand_lossage ("invalid operand for code '%c'", code);
17316 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17320 asm_fprintf (stream, "%r",
17321 GET_CODE (XEXP (x, 0)) == REG
17322 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17326 asm_fprintf (stream, "{%r-%r}",
17328 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17331 /* Like 'M', but writing doubleword vector registers, for use by Neon
17335 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17336 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17338 asm_fprintf (stream, "{d%d}", regno);
17340 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17345 /* CONST_TRUE_RTX means always -- that's the default. */
17346 if (x == const_true_rtx)
17349 if (!COMPARISON_P (x))
17351 output_operand_lossage ("invalid operand for code '%c'", code);
17355 fputs (arm_condition_codes[get_arm_condition_code (x)],
17360 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17361 want to do that. */
17362 if (x == const_true_rtx)
17364 output_operand_lossage ("instruction never executed");
17367 if (!COMPARISON_P (x))
17369 output_operand_lossage ("invalid operand for code '%c'", code);
17373 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17374 (get_arm_condition_code (x))],
17378 /* Cirrus registers can be accessed in a variety of ways:
17379 single floating point (f)
17380 double floating point (d)
17382 64bit integer (dx). */
17383 case 'W': /* Cirrus register in F mode. */
17384 case 'X': /* Cirrus register in D mode. */
17385 case 'Y': /* Cirrus register in FX mode. */
17386 case 'Z': /* Cirrus register in DX mode. */
17387 gcc_assert (GET_CODE (x) == REG
17388 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17390 fprintf (stream, "mv%s%s",
17392 : code == 'X' ? "d"
17393 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17397 /* Print cirrus register in the mode specified by the register's mode. */
17400 int mode = GET_MODE (x);
17402 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17404 output_operand_lossage ("invalid operand for code '%c'", code);
17408 fprintf (stream, "mv%s%s",
17409 mode == DFmode ? "d"
17410 : mode == SImode ? "fx"
17411 : mode == DImode ? "dx"
17412 : "f", reg_names[REGNO (x)] + 2);
17418 if (GET_CODE (x) != REG
17419 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17420 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17421 /* Bad value for wCG register number. */
17423 output_operand_lossage ("invalid operand for code '%c'", code);
17428 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17431 /* Print an iWMMXt control register name. */
17433 if (GET_CODE (x) != CONST_INT
17435 || INTVAL (x) >= 16)
17436 /* Bad value for wC register number. */
17438 output_operand_lossage ("invalid operand for code '%c'", code);
17444 static const char * wc_reg_names [16] =
17446 "wCID", "wCon", "wCSSF", "wCASF",
17447 "wC4", "wC5", "wC6", "wC7",
17448 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17449 "wC12", "wC13", "wC14", "wC15"
17452 fprintf (stream, wc_reg_names [INTVAL (x)]);
17456 /* Print the high single-precision register of a VFP double-precision
17460 int mode = GET_MODE (x);
17463 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17465 output_operand_lossage ("invalid operand for code '%c'", code);
17470 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17472 output_operand_lossage ("invalid operand for code '%c'", code);
17476 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17480 /* Print a VFP/Neon double precision or quad precision register name. */
17484 int mode = GET_MODE (x);
17485 int is_quad = (code == 'q');
17488 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17490 output_operand_lossage ("invalid operand for code '%c'", code);
17494 if (GET_CODE (x) != REG
17495 || !IS_VFP_REGNUM (REGNO (x)))
17497 output_operand_lossage ("invalid operand for code '%c'", code);
17502 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17503 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17505 output_operand_lossage ("invalid operand for code '%c'", code);
17509 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17510 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17514 /* These two codes print the low/high doubleword register of a Neon quad
17515 register, respectively. For pair-structure types, can also print
17516 low/high quadword registers. */
17520 int mode = GET_MODE (x);
17523 if ((GET_MODE_SIZE (mode) != 16
17524 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17526 output_operand_lossage ("invalid operand for code '%c'", code);
17531 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17533 output_operand_lossage ("invalid operand for code '%c'", code);
17537 if (GET_MODE_SIZE (mode) == 16)
17538 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17539 + (code == 'f' ? 1 : 0));
17541 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17542 + (code == 'f' ? 1 : 0));
17546 /* Print a VFPv3 floating-point constant, represented as an integer
17550 int index = vfp3_const_double_index (x);
17551 gcc_assert (index != -1);
17552 fprintf (stream, "%d", index);
17556 /* Print bits representing opcode features for Neon.
17558 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17559 and polynomials as unsigned.
17561 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17563 Bit 2 is 1 for rounding functions, 0 otherwise. */
17565 /* Identify the type as 's', 'u', 'p' or 'f'. */
17568 HOST_WIDE_INT bits = INTVAL (x);
17569 fputc ("uspf"[bits & 3], stream);
17573 /* Likewise, but signed and unsigned integers are both 'i'. */
17576 HOST_WIDE_INT bits = INTVAL (x);
17577 fputc ("iipf"[bits & 3], stream);
17581 /* As for 'T', but emit 'u' instead of 'p'. */
17584 HOST_WIDE_INT bits = INTVAL (x);
17585 fputc ("usuf"[bits & 3], stream);
17589 /* Bit 2: rounding (vs none). */
17592 HOST_WIDE_INT bits = INTVAL (x);
17593 fputs ((bits & 4) != 0 ? "r" : "", stream);
17597 /* Memory operand for vld1/vst1 instruction. */
17601 bool postinc = FALSE;
17602 unsigned align, memsize, align_bits;
17604 gcc_assert (GET_CODE (x) == MEM);
17605 addr = XEXP (x, 0);
17606 if (GET_CODE (addr) == POST_INC)
17609 addr = XEXP (addr, 0);
17611 asm_fprintf (stream, "[%r", REGNO (addr));
17613 /* We know the alignment of this access, so we can emit a hint in the
17614 instruction (for some alignments) as an aid to the memory subsystem
17616 align = MEM_ALIGN (x) >> 3;
17617 memsize = MEM_SIZE (x);
17619 /* Only certain alignment specifiers are supported by the hardware. */
17620 if (memsize == 16 && (align % 32) == 0)
17622 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
17624 else if ((align % 8) == 0)
17629 if (align_bits != 0)
17630 asm_fprintf (stream, ":%d", align_bits);
17632 asm_fprintf (stream, "]");
17635 fputs("!", stream);
17643 gcc_assert (GET_CODE (x) == MEM);
17644 addr = XEXP (x, 0);
17645 gcc_assert (GET_CODE (addr) == REG);
17646 asm_fprintf (stream, "[%r]", REGNO (addr));
17650 /* Translate an S register number into a D register number and element index. */
17653 int mode = GET_MODE (x);
17656 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17658 output_operand_lossage ("invalid operand for code '%c'", code);
17663 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17665 output_operand_lossage ("invalid operand for code '%c'", code);
17669 regno = regno - FIRST_VFP_REGNUM;
17670 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17674 /* Register specifier for vld1.16/vst1.16. Translate the S register
17675 number into a D register number and element index. */
17678 int mode = GET_MODE (x);
17681 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17683 output_operand_lossage ("invalid operand for code '%c'", code);
17688 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17690 output_operand_lossage ("invalid operand for code '%c'", code);
17694 regno = regno - FIRST_VFP_REGNUM;
17695 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17702 output_operand_lossage ("missing operand");
17706 switch (GET_CODE (x))
17709 asm_fprintf (stream, "%r", REGNO (x));
17713 output_memory_reference_mode = GET_MODE (x);
17714 output_address (XEXP (x, 0));
17721 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17722 sizeof (fpstr), 0, 1);
17723 fprintf (stream, "#%s", fpstr);
17726 fprintf (stream, "#%s", fp_immediate_constant (x));
17730 gcc_assert (GET_CODE (x) != NEG);
17731 fputc ('#', stream);
17732 if (GET_CODE (x) == HIGH)
17734 fputs (":lower16:", stream);
17738 output_addr_const (stream, x);
17744 /* Target hook for printing a memory address. */
17746 arm_print_operand_address (FILE *stream, rtx x)
17750 int is_minus = GET_CODE (x) == MINUS;
17752 if (GET_CODE (x) == REG)
17753 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17754 else if (GET_CODE (x) == PLUS || is_minus)
17756 rtx base = XEXP (x, 0);
17757 rtx index = XEXP (x, 1);
17758 HOST_WIDE_INT offset = 0;
17759 if (GET_CODE (base) != REG
17760 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17762 /* Ensure that BASE is a register. */
17763 /* (one of them must be). */
17764 /* Also ensure the SP is not used as in index register. */
17769 switch (GET_CODE (index))
17772 offset = INTVAL (index);
17775 asm_fprintf (stream, "[%r, #%wd]",
17776 REGNO (base), offset);
17780 asm_fprintf (stream, "[%r, %s%r]",
17781 REGNO (base), is_minus ? "-" : "",
17791 asm_fprintf (stream, "[%r, %s%r",
17792 REGNO (base), is_minus ? "-" : "",
17793 REGNO (XEXP (index, 0)));
17794 arm_print_operand (stream, index, 'S');
17795 fputs ("]", stream);
17800 gcc_unreachable ();
17803 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17804 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17806 extern enum machine_mode output_memory_reference_mode;
17808 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17810 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17811 asm_fprintf (stream, "[%r, #%s%d]!",
17812 REGNO (XEXP (x, 0)),
17813 GET_CODE (x) == PRE_DEC ? "-" : "",
17814 GET_MODE_SIZE (output_memory_reference_mode));
17816 asm_fprintf (stream, "[%r], #%s%d",
17817 REGNO (XEXP (x, 0)),
17818 GET_CODE (x) == POST_DEC ? "-" : "",
17819 GET_MODE_SIZE (output_memory_reference_mode));
17821 else if (GET_CODE (x) == PRE_MODIFY)
17823 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17824 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17825 asm_fprintf (stream, "#%wd]!",
17826 INTVAL (XEXP (XEXP (x, 1), 1)));
17828 asm_fprintf (stream, "%r]!",
17829 REGNO (XEXP (XEXP (x, 1), 1)));
17831 else if (GET_CODE (x) == POST_MODIFY)
17833 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17834 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17835 asm_fprintf (stream, "#%wd",
17836 INTVAL (XEXP (XEXP (x, 1), 1)));
17838 asm_fprintf (stream, "%r",
17839 REGNO (XEXP (XEXP (x, 1), 1)));
17841 else output_addr_const (stream, x);
17845 if (GET_CODE (x) == REG)
17846 asm_fprintf (stream, "[%r]", REGNO (x));
17847 else if (GET_CODE (x) == POST_INC)
17848 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17849 else if (GET_CODE (x) == PLUS)
17851 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17852 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17853 asm_fprintf (stream, "[%r, #%wd]",
17854 REGNO (XEXP (x, 0)),
17855 INTVAL (XEXP (x, 1)));
17857 asm_fprintf (stream, "[%r, %r]",
17858 REGNO (XEXP (x, 0)),
17859 REGNO (XEXP (x, 1)));
17862 output_addr_const (stream, x);
17866 /* Target hook for indicating whether a punctuation character for
17867 TARGET_PRINT_OPERAND is valid. */
17869 arm_print_operand_punct_valid_p (unsigned char code)
17871 return (code == '@' || code == '|' || code == '.'
17872 || code == '(' || code == ')' || code == '#'
17873 || (TARGET_32BIT && (code == '?'))
17874 || (TARGET_THUMB2 && (code == '!'))
17875 || (TARGET_THUMB && (code == '_')));
17878 /* Target hook for assembling integer objects. The ARM version needs to
17879 handle word-sized values specially. */
17881 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17883 enum machine_mode mode;
17885 if (size == UNITS_PER_WORD && aligned_p)
17887 fputs ("\t.word\t", asm_out_file);
17888 output_addr_const (asm_out_file, x);
17890 /* Mark symbols as position independent. We only do this in the
17891 .text segment, not in the .data segment. */
17892 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17893 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17895 /* See legitimize_pic_address for an explanation of the
17896 TARGET_VXWORKS_RTP check. */
17897 if (TARGET_VXWORKS_RTP
17898 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17899 fputs ("(GOT)", asm_out_file);
17901 fputs ("(GOTOFF)", asm_out_file);
17903 fputc ('\n', asm_out_file);
17907 mode = GET_MODE (x);
17909 if (arm_vector_mode_supported_p (mode))
17913 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17915 units = CONST_VECTOR_NUNITS (x);
17916 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17918 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17919 for (i = 0; i < units; i++)
17921 rtx elt = CONST_VECTOR_ELT (x, i);
17923 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17926 for (i = 0; i < units; i++)
17928 rtx elt = CONST_VECTOR_ELT (x, i);
17929 REAL_VALUE_TYPE rval;
17931 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17934 (rval, GET_MODE_INNER (mode),
17935 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17941 return default_assemble_integer (x, size, aligned_p);
17945 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17949 if (!TARGET_AAPCS_BASED)
17952 default_named_section_asm_out_constructor
17953 : default_named_section_asm_out_destructor) (symbol, priority);
17957 /* Put these in the .init_array section, using a special relocation. */
17958 if (priority != DEFAULT_INIT_PRIORITY)
17961 sprintf (buf, "%s.%.5u",
17962 is_ctor ? ".init_array" : ".fini_array",
17964 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17971 switch_to_section (s);
17972 assemble_align (POINTER_SIZE);
17973 fputs ("\t.word\t", asm_out_file);
17974 output_addr_const (asm_out_file, symbol);
17975 fputs ("(target1)\n", asm_out_file);
17978 /* Add a function to the list of static constructors. */
17981 arm_elf_asm_constructor (rtx symbol, int priority)
17983 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17986 /* Add a function to the list of static destructors. */
17989 arm_elf_asm_destructor (rtx symbol, int priority)
17991 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17994 /* A finite state machine takes care of noticing whether or not instructions
17995 can be conditionally executed, and thus decrease execution time and code
17996 size by deleting branch instructions. The fsm is controlled by
17997 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17999 /* The state of the fsm controlling condition codes are:
18000 0: normal, do nothing special
18001 1: make ASM_OUTPUT_OPCODE not output this instruction
18002 2: make ASM_OUTPUT_OPCODE not output this instruction
18003 3: make instructions conditional
18004 4: make instructions conditional
18006 State transitions (state->state by whom under condition):
18007 0 -> 1 final_prescan_insn if the `target' is a label
18008 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18009 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18010 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18011 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18012 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18013 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18014 (the target insn is arm_target_insn).
18016 If the jump clobbers the conditions then we use states 2 and 4.
18018 A similar thing can be done with conditional return insns.
18020 XXX In case the `target' is an unconditional branch, this conditionalising
18021 of the instructions always reduces code size, but not always execution
18022 time. But then, I want to reduce the code size to somewhere near what
18023 /bin/cc produces. */
18025 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18026 instructions. When a COND_EXEC instruction is seen the subsequent
18027 instructions are scanned so that multiple conditional instructions can be
18028 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18029 specify the length and true/false mask for the IT block. These will be
18030 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18032 /* Returns the index of the ARM condition code string in
18033 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18034 COMPARISON should be an rtx like `(eq (...) (...))'. */
18037 maybe_get_arm_condition_code (rtx comparison)
18039 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18040 enum arm_cond_code code;
18041 enum rtx_code comp_code = GET_CODE (comparison);
18043 if (GET_MODE_CLASS (mode) != MODE_CC)
18044 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18045 XEXP (comparison, 1));
18049 case CC_DNEmode: code = ARM_NE; goto dominance;
18050 case CC_DEQmode: code = ARM_EQ; goto dominance;
18051 case CC_DGEmode: code = ARM_GE; goto dominance;
18052 case CC_DGTmode: code = ARM_GT; goto dominance;
18053 case CC_DLEmode: code = ARM_LE; goto dominance;
18054 case CC_DLTmode: code = ARM_LT; goto dominance;
18055 case CC_DGEUmode: code = ARM_CS; goto dominance;
18056 case CC_DGTUmode: code = ARM_HI; goto dominance;
18057 case CC_DLEUmode: code = ARM_LS; goto dominance;
18058 case CC_DLTUmode: code = ARM_CC;
18061 if (comp_code == EQ)
18062 return ARM_INVERSE_CONDITION_CODE (code);
18063 if (comp_code == NE)
18070 case NE: return ARM_NE;
18071 case EQ: return ARM_EQ;
18072 case GE: return ARM_PL;
18073 case LT: return ARM_MI;
18074 default: return ARM_NV;
18080 case NE: return ARM_NE;
18081 case EQ: return ARM_EQ;
18082 default: return ARM_NV;
18088 case NE: return ARM_MI;
18089 case EQ: return ARM_PL;
18090 default: return ARM_NV;
18095 /* These encodings assume that AC=1 in the FPA system control
18096 byte. This allows us to handle all cases except UNEQ and
18100 case GE: return ARM_GE;
18101 case GT: return ARM_GT;
18102 case LE: return ARM_LS;
18103 case LT: return ARM_MI;
18104 case NE: return ARM_NE;
18105 case EQ: return ARM_EQ;
18106 case ORDERED: return ARM_VC;
18107 case UNORDERED: return ARM_VS;
18108 case UNLT: return ARM_LT;
18109 case UNLE: return ARM_LE;
18110 case UNGT: return ARM_HI;
18111 case UNGE: return ARM_PL;
18112 /* UNEQ and LTGT do not have a representation. */
18113 case UNEQ: /* Fall through. */
18114 case LTGT: /* Fall through. */
18115 default: return ARM_NV;
18121 case NE: return ARM_NE;
18122 case EQ: return ARM_EQ;
18123 case GE: return ARM_LE;
18124 case GT: return ARM_LT;
18125 case LE: return ARM_GE;
18126 case LT: return ARM_GT;
18127 case GEU: return ARM_LS;
18128 case GTU: return ARM_CC;
18129 case LEU: return ARM_CS;
18130 case LTU: return ARM_HI;
18131 default: return ARM_NV;
18137 case LTU: return ARM_CS;
18138 case GEU: return ARM_CC;
18139 default: return ARM_NV;
18145 case NE: return ARM_NE;
18146 case EQ: return ARM_EQ;
18147 case GEU: return ARM_CS;
18148 case GTU: return ARM_HI;
18149 case LEU: return ARM_LS;
18150 case LTU: return ARM_CC;
18151 default: return ARM_NV;
18157 case GE: return ARM_GE;
18158 case LT: return ARM_LT;
18159 case GEU: return ARM_CS;
18160 case LTU: return ARM_CC;
18161 default: return ARM_NV;
18167 case NE: return ARM_NE;
18168 case EQ: return ARM_EQ;
18169 case GE: return ARM_GE;
18170 case GT: return ARM_GT;
18171 case LE: return ARM_LE;
18172 case LT: return ARM_LT;
18173 case GEU: return ARM_CS;
18174 case GTU: return ARM_HI;
18175 case LEU: return ARM_LS;
18176 case LTU: return ARM_CC;
18177 default: return ARM_NV;
18180 default: gcc_unreachable ();
18184 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18185 static enum arm_cond_code
18186 get_arm_condition_code (rtx comparison)
18188 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18189 gcc_assert (code != ARM_NV);
18193 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18196 thumb2_final_prescan_insn (rtx insn)
18198 rtx first_insn = insn;
18199 rtx body = PATTERN (insn);
18201 enum arm_cond_code code;
18205 /* Remove the previous insn from the count of insns to be output. */
18206 if (arm_condexec_count)
18207 arm_condexec_count--;
18209 /* Nothing to do if we are already inside a conditional block. */
18210 if (arm_condexec_count)
18213 if (GET_CODE (body) != COND_EXEC)
18216 /* Conditional jumps are implemented directly. */
18217 if (GET_CODE (insn) == JUMP_INSN)
18220 predicate = COND_EXEC_TEST (body);
18221 arm_current_cc = get_arm_condition_code (predicate);
18223 n = get_attr_ce_count (insn);
18224 arm_condexec_count = 1;
18225 arm_condexec_mask = (1 << n) - 1;
18226 arm_condexec_masklen = n;
18227 /* See if subsequent instructions can be combined into the same block. */
18230 insn = next_nonnote_insn (insn);
18232 /* Jumping into the middle of an IT block is illegal, so a label or
18233 barrier terminates the block. */
18234 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18237 body = PATTERN (insn);
18238 /* USE and CLOBBER aren't really insns, so just skip them. */
18239 if (GET_CODE (body) == USE
18240 || GET_CODE (body) == CLOBBER)
18243 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18244 if (GET_CODE (body) != COND_EXEC)
18246 /* Allow up to 4 conditionally executed instructions in a block. */
18247 n = get_attr_ce_count (insn);
18248 if (arm_condexec_masklen + n > 4)
18251 predicate = COND_EXEC_TEST (body);
18252 code = get_arm_condition_code (predicate);
18253 mask = (1 << n) - 1;
18254 if (arm_current_cc == code)
18255 arm_condexec_mask |= (mask << arm_condexec_masklen);
18256 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18259 arm_condexec_count++;
18260 arm_condexec_masklen += n;
18262 /* A jump must be the last instruction in a conditional block. */
18263 if (GET_CODE(insn) == JUMP_INSN)
18266 /* Restore recog_data (getting the attributes of other insns can
18267 destroy this array, but final.c assumes that it remains intact
18268 across this call). */
18269 extract_constrain_insn_cached (first_insn);
18273 arm_final_prescan_insn (rtx insn)
18275 /* BODY will hold the body of INSN. */
18276 rtx body = PATTERN (insn);
18278 /* This will be 1 if trying to repeat the trick, and things need to be
18279 reversed if it appears to fail. */
18282 /* If we start with a return insn, we only succeed if we find another one. */
18283 int seeking_return = 0;
18284 enum rtx_code return_code = UNKNOWN;
18286 /* START_INSN will hold the insn from where we start looking. This is the
18287 first insn after the following code_label if REVERSE is true. */
18288 rtx start_insn = insn;
18290 /* If in state 4, check if the target branch is reached, in order to
18291 change back to state 0. */
18292 if (arm_ccfsm_state == 4)
18294 if (insn == arm_target_insn)
18296 arm_target_insn = NULL;
18297 arm_ccfsm_state = 0;
18302 /* If in state 3, it is possible to repeat the trick, if this insn is an
18303 unconditional branch to a label, and immediately following this branch
18304 is the previous target label which is only used once, and the label this
18305 branch jumps to is not too far off. */
18306 if (arm_ccfsm_state == 3)
18308 if (simplejump_p (insn))
18310 start_insn = next_nonnote_insn (start_insn);
18311 if (GET_CODE (start_insn) == BARRIER)
18313 /* XXX Isn't this always a barrier? */
18314 start_insn = next_nonnote_insn (start_insn);
18316 if (GET_CODE (start_insn) == CODE_LABEL
18317 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18318 && LABEL_NUSES (start_insn) == 1)
18323 else if (ANY_RETURN_P (body))
18325 start_insn = next_nonnote_insn (start_insn);
18326 if (GET_CODE (start_insn) == BARRIER)
18327 start_insn = next_nonnote_insn (start_insn);
18328 if (GET_CODE (start_insn) == CODE_LABEL
18329 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18330 && LABEL_NUSES (start_insn) == 1)
18333 seeking_return = 1;
18334 return_code = GET_CODE (body);
18343 gcc_assert (!arm_ccfsm_state || reverse);
18344 if (GET_CODE (insn) != JUMP_INSN)
18347 /* This jump might be paralleled with a clobber of the condition codes
18348 the jump should always come first */
18349 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18350 body = XVECEXP (body, 0, 0);
18353 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18354 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18357 int fail = FALSE, succeed = FALSE;
18358 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18359 int then_not_else = TRUE;
18360 rtx this_insn = start_insn, label = 0;
18362 /* Register the insn jumped to. */
18365 if (!seeking_return)
18366 label = XEXP (SET_SRC (body), 0);
18368 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18369 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18370 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18372 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18373 then_not_else = FALSE;
18375 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18377 seeking_return = 1;
18378 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18380 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18382 seeking_return = 1;
18383 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18384 then_not_else = FALSE;
18387 gcc_unreachable ();
18389 /* See how many insns this branch skips, and what kind of insns. If all
18390 insns are okay, and the label or unconditional branch to the same
18391 label is not too far away, succeed. */
18392 for (insns_skipped = 0;
18393 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18397 this_insn = next_nonnote_insn (this_insn);
18401 switch (GET_CODE (this_insn))
18404 /* Succeed if it is the target label, otherwise fail since
18405 control falls in from somewhere else. */
18406 if (this_insn == label)
18408 arm_ccfsm_state = 1;
18416 /* Succeed if the following insn is the target label.
18418 If return insns are used then the last insn in a function
18419 will be a barrier. */
18420 this_insn = next_nonnote_insn (this_insn);
18421 if (this_insn && this_insn == label)
18423 arm_ccfsm_state = 1;
18431 /* The AAPCS says that conditional calls should not be
18432 used since they make interworking inefficient (the
18433 linker can't transform BL<cond> into BLX). That's
18434 only a problem if the machine has BLX. */
18441 /* Succeed if the following insn is the target label, or
18442 if the following two insns are a barrier and the
18444 this_insn = next_nonnote_insn (this_insn);
18445 if (this_insn && GET_CODE (this_insn) == BARRIER)
18446 this_insn = next_nonnote_insn (this_insn);
18448 if (this_insn && this_insn == label
18449 && insns_skipped < max_insns_skipped)
18451 arm_ccfsm_state = 1;
18459 /* If this is an unconditional branch to the same label, succeed.
18460 If it is to another label, do nothing. If it is conditional,
18462 /* XXX Probably, the tests for SET and the PC are
18465 scanbody = PATTERN (this_insn);
18466 if (GET_CODE (scanbody) == SET
18467 && GET_CODE (SET_DEST (scanbody)) == PC)
18469 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18470 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18472 arm_ccfsm_state = 2;
18475 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18478 /* Fail if a conditional return is undesirable (e.g. on a
18479 StrongARM), but still allow this if optimizing for size. */
18480 else if (GET_CODE (scanbody) == return_code
18481 && !use_return_insn (TRUE, NULL)
18484 else if (GET_CODE (scanbody) == return_code)
18486 arm_ccfsm_state = 2;
18489 else if (GET_CODE (scanbody) == PARALLEL)
18491 switch (get_attr_conds (this_insn))
18501 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18506 /* Instructions using or affecting the condition codes make it
18508 scanbody = PATTERN (this_insn);
18509 if (!(GET_CODE (scanbody) == SET
18510 || GET_CODE (scanbody) == PARALLEL)
18511 || get_attr_conds (this_insn) != CONDS_NOCOND)
18514 /* A conditional cirrus instruction must be followed by
18515 a non Cirrus instruction. However, since we
18516 conditionalize instructions in this function and by
18517 the time we get here we can't add instructions
18518 (nops), because shorten_branches() has already been
18519 called, we will disable conditionalizing Cirrus
18520 instructions to be safe. */
18521 if (GET_CODE (scanbody) != USE
18522 && GET_CODE (scanbody) != CLOBBER
18523 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18533 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18534 arm_target_label = CODE_LABEL_NUMBER (label);
18537 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18539 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18541 this_insn = next_nonnote_insn (this_insn);
18542 gcc_assert (!this_insn
18543 || (GET_CODE (this_insn) != BARRIER
18544 && GET_CODE (this_insn) != CODE_LABEL));
18548 /* Oh, dear! we ran off the end.. give up. */
18549 extract_constrain_insn_cached (insn);
18550 arm_ccfsm_state = 0;
18551 arm_target_insn = NULL;
18554 arm_target_insn = this_insn;
18557 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18560 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18562 if (reverse || then_not_else)
18563 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18566 /* Restore recog_data (getting the attributes of other insns can
18567 destroy this array, but final.c assumes that it remains intact
18568 across this call. */
18569 extract_constrain_insn_cached (insn);
18573 /* Output IT instructions. */
18575 thumb2_asm_output_opcode (FILE * stream)
18580 if (arm_condexec_mask)
18582 for (n = 0; n < arm_condexec_masklen; n++)
18583 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18585 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18586 arm_condition_codes[arm_current_cc]);
18587 arm_condexec_mask = 0;
18591 /* Returns true if REGNO is a valid register
18592 for holding a quantity of type MODE. */
18594 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18596 if (GET_MODE_CLASS (mode) == MODE_CC)
18597 return (regno == CC_REGNUM
18598 || (TARGET_HARD_FLOAT && TARGET_VFP
18599 && regno == VFPCC_REGNUM));
18602 /* For the Thumb we only allow values bigger than SImode in
18603 registers 0 - 6, so that there is always a second low
18604 register available to hold the upper part of the value.
18605 We probably we ought to ensure that the register is the
18606 start of an even numbered register pair. */
18607 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18609 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18610 && IS_CIRRUS_REGNUM (regno))
18611 /* We have outlawed SI values in Cirrus registers because they
18612 reside in the lower 32 bits, but SF values reside in the
18613 upper 32 bits. This causes gcc all sorts of grief. We can't
18614 even split the registers into pairs because Cirrus SI values
18615 get sign extended to 64bits-- aldyh. */
18616 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18618 if (TARGET_HARD_FLOAT && TARGET_VFP
18619 && IS_VFP_REGNUM (regno))
18621 if (mode == SFmode || mode == SImode)
18622 return VFP_REGNO_OK_FOR_SINGLE (regno);
18624 if (mode == DFmode)
18625 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18627 /* VFP registers can hold HFmode values, but there is no point in
18628 putting them there unless we have hardware conversion insns. */
18629 if (mode == HFmode)
18630 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18633 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18634 || (VALID_NEON_QREG_MODE (mode)
18635 && NEON_REGNO_OK_FOR_QUAD (regno))
18636 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18637 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18638 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18639 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18640 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18645 if (TARGET_REALLY_IWMMXT)
18647 if (IS_IWMMXT_GR_REGNUM (regno))
18648 return mode == SImode;
18650 if (IS_IWMMXT_REGNUM (regno))
18651 return VALID_IWMMXT_REG_MODE (mode);
18654 /* We allow almost any value to be stored in the general registers.
18655 Restrict doubleword quantities to even register pairs so that we can
18656 use ldrd. Do not allow very large Neon structure opaque modes in
18657 general registers; they would use too many. */
18658 if (regno <= LAST_ARM_REGNUM)
18659 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18660 && ARM_NUM_REGS (mode) <= 4;
18662 if (regno == FRAME_POINTER_REGNUM
18663 || regno == ARG_POINTER_REGNUM)
18664 /* We only allow integers in the fake hard registers. */
18665 return GET_MODE_CLASS (mode) == MODE_INT;
18667 /* The only registers left are the FPA registers
18668 which we only allow to hold FP values. */
18669 return (TARGET_HARD_FLOAT && TARGET_FPA
18670 && GET_MODE_CLASS (mode) == MODE_FLOAT
18671 && regno >= FIRST_FPA_REGNUM
18672 && regno <= LAST_FPA_REGNUM);
18675 /* Implement MODES_TIEABLE_P. */
18678 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18680 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18683 /* We specifically want to allow elements of "structure" modes to
18684 be tieable to the structure. This more general condition allows
18685 other rarer situations too. */
18687 && (VALID_NEON_DREG_MODE (mode1)
18688 || VALID_NEON_QREG_MODE (mode1)
18689 || VALID_NEON_STRUCT_MODE (mode1))
18690 && (VALID_NEON_DREG_MODE (mode2)
18691 || VALID_NEON_QREG_MODE (mode2)
18692 || VALID_NEON_STRUCT_MODE (mode2)))
18698 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18699 not used in arm mode. */
18702 arm_regno_class (int regno)
18706 if (regno == STACK_POINTER_REGNUM)
18708 if (regno == CC_REGNUM)
18715 if (TARGET_THUMB2 && regno < 8)
18718 if ( regno <= LAST_ARM_REGNUM
18719 || regno == FRAME_POINTER_REGNUM
18720 || regno == ARG_POINTER_REGNUM)
18721 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18723 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18724 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18726 if (IS_CIRRUS_REGNUM (regno))
18727 return CIRRUS_REGS;
18729 if (IS_VFP_REGNUM (regno))
18731 if (regno <= D7_VFP_REGNUM)
18732 return VFP_D0_D7_REGS;
18733 else if (regno <= LAST_LO_VFP_REGNUM)
18734 return VFP_LO_REGS;
18736 return VFP_HI_REGS;
18739 if (IS_IWMMXT_REGNUM (regno))
18740 return IWMMXT_REGS;
18742 if (IS_IWMMXT_GR_REGNUM (regno))
18743 return IWMMXT_GR_REGS;
18748 /* Handle a special case when computing the offset
18749 of an argument from the frame pointer. */
18751 arm_debugger_arg_offset (int value, rtx addr)
18755 /* We are only interested if dbxout_parms() failed to compute the offset. */
18759 /* We can only cope with the case where the address is held in a register. */
18760 if (GET_CODE (addr) != REG)
18763 /* If we are using the frame pointer to point at the argument, then
18764 an offset of 0 is correct. */
18765 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18768 /* If we are using the stack pointer to point at the
18769 argument, then an offset of 0 is correct. */
18770 /* ??? Check this is consistent with thumb2 frame layout. */
18771 if ((TARGET_THUMB || !frame_pointer_needed)
18772 && REGNO (addr) == SP_REGNUM)
18775 /* Oh dear. The argument is pointed to by a register rather
18776 than being held in a register, or being stored at a known
18777 offset from the frame pointer. Since GDB only understands
18778 those two kinds of argument we must translate the address
18779 held in the register into an offset from the frame pointer.
18780 We do this by searching through the insns for the function
18781 looking to see where this register gets its value. If the
18782 register is initialized from the frame pointer plus an offset
18783 then we are in luck and we can continue, otherwise we give up.
18785 This code is exercised by producing debugging information
18786 for a function with arguments like this:
18788 double func (double a, double b, int c, double d) {return d;}
18790 Without this code the stab for parameter 'd' will be set to
18791 an offset of 0 from the frame pointer, rather than 8. */
18793 /* The if() statement says:
18795 If the insn is a normal instruction
18796 and if the insn is setting the value in a register
18797 and if the register being set is the register holding the address of the argument
18798 and if the address is computing by an addition
18799 that involves adding to a register
18800 which is the frame pointer
18805 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18807 if ( GET_CODE (insn) == INSN
18808 && GET_CODE (PATTERN (insn)) == SET
18809 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18810 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18811 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18812 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18813 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18816 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18825 warning (0, "unable to compute real location of stacked parameter");
18826 value = 8; /* XXX magic hack */
18846 T_MAX /* Size of enum. Keep last. */
18847 } neon_builtin_type_mode;
18849 #define TYPE_MODE_BIT(X) (1 << (X))
18851 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18852 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18853 | TYPE_MODE_BIT (T_DI))
18854 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18855 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18856 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18858 #define v8qi_UP T_V8QI
18859 #define v4hi_UP T_V4HI
18860 #define v2si_UP T_V2SI
18861 #define v2sf_UP T_V2SF
18863 #define v16qi_UP T_V16QI
18864 #define v8hi_UP T_V8HI
18865 #define v4si_UP T_V4SI
18866 #define v4sf_UP T_V4SF
18867 #define v2di_UP T_V2DI
18872 #define UP(X) X##_UP
18905 NEON_LOADSTRUCTLANE,
18907 NEON_STORESTRUCTLANE,
18916 const neon_itype itype;
18917 const neon_builtin_type_mode mode;
18918 const enum insn_code code;
18919 unsigned int fcode;
18920 } neon_builtin_datum;
18922 #define CF(N,X) CODE_FOR_neon_##N##X
18924 #define VAR1(T, N, A) \
18925 {#N, NEON_##T, UP (A), CF (N, A), 0}
18926 #define VAR2(T, N, A, B) \
18928 {#N, NEON_##T, UP (B), CF (N, B), 0}
18929 #define VAR3(T, N, A, B, C) \
18930 VAR2 (T, N, A, B), \
18931 {#N, NEON_##T, UP (C), CF (N, C), 0}
18932 #define VAR4(T, N, A, B, C, D) \
18933 VAR3 (T, N, A, B, C), \
18934 {#N, NEON_##T, UP (D), CF (N, D), 0}
18935 #define VAR5(T, N, A, B, C, D, E) \
18936 VAR4 (T, N, A, B, C, D), \
18937 {#N, NEON_##T, UP (E), CF (N, E), 0}
18938 #define VAR6(T, N, A, B, C, D, E, F) \
18939 VAR5 (T, N, A, B, C, D, E), \
18940 {#N, NEON_##T, UP (F), CF (N, F), 0}
18941 #define VAR7(T, N, A, B, C, D, E, F, G) \
18942 VAR6 (T, N, A, B, C, D, E, F), \
18943 {#N, NEON_##T, UP (G), CF (N, G), 0}
18944 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18945 VAR7 (T, N, A, B, C, D, E, F, G), \
18946 {#N, NEON_##T, UP (H), CF (N, H), 0}
18947 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18948 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18949 {#N, NEON_##T, UP (I), CF (N, I), 0}
18950 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18951 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18952 {#N, NEON_##T, UP (J), CF (N, J), 0}
18954 /* The mode entries in the following table correspond to the "key" type of the
18955 instruction variant, i.e. equivalent to that which would be specified after
18956 the assembler mnemonic, which usually refers to the last vector operand.
18957 (Signed/unsigned/polynomial types are not differentiated between though, and
18958 are all mapped onto the same mode for a given element size.) The modes
18959 listed per instruction should be the same as those defined for that
18960 instruction's pattern in neon.md. */
18962 static neon_builtin_datum neon_builtin_data[] =
18964 VAR10 (BINOP, vadd,
18965 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18966 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18967 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18968 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18969 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18970 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18971 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18972 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18973 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18974 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18975 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18976 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18977 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18978 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18979 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18980 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18981 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18982 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18983 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18984 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18985 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18986 VAR2 (BINOP, vqdmull, v4hi, v2si),
18987 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18988 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18989 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18990 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18991 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18992 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18993 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18994 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18995 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18996 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18997 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18998 VAR10 (BINOP, vsub,
18999 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19000 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19001 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19002 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19003 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19004 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19005 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19006 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19007 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19008 VAR2 (BINOP, vcage, v2sf, v4sf),
19009 VAR2 (BINOP, vcagt, v2sf, v4sf),
19010 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19011 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19012 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19013 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19014 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19015 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19016 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19017 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19018 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19019 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19020 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19021 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19022 VAR2 (BINOP, vrecps, v2sf, v4sf),
19023 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19024 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19025 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19026 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19027 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19028 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19029 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19030 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19031 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19032 VAR2 (UNOP, vcnt, v8qi, v16qi),
19033 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19034 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19035 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19036 /* FIXME: vget_lane supports more variants than this! */
19037 VAR10 (GETLANE, vget_lane,
19038 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19039 VAR10 (SETLANE, vset_lane,
19040 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19041 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19042 VAR10 (DUP, vdup_n,
19043 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19044 VAR10 (DUPLANE, vdup_lane,
19045 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19046 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19047 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19048 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19049 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19050 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19051 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19052 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19053 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19054 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19055 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19056 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19057 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19058 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19059 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19060 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19061 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19062 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19063 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19064 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19065 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19066 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19067 VAR10 (BINOP, vext,
19068 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19069 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19070 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19071 VAR2 (UNOP, vrev16, v8qi, v16qi),
19072 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19073 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19074 VAR10 (SELECT, vbsl,
19075 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19076 VAR1 (VTBL, vtbl1, v8qi),
19077 VAR1 (VTBL, vtbl2, v8qi),
19078 VAR1 (VTBL, vtbl3, v8qi),
19079 VAR1 (VTBL, vtbl4, v8qi),
19080 VAR1 (VTBX, vtbx1, v8qi),
19081 VAR1 (VTBX, vtbx2, v8qi),
19082 VAR1 (VTBX, vtbx3, v8qi),
19083 VAR1 (VTBX, vtbx4, v8qi),
19084 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19085 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19086 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19087 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19088 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19089 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19090 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19091 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19092 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19093 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19094 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19095 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19096 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19097 VAR10 (LOAD1, vld1,
19098 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19099 VAR10 (LOAD1LANE, vld1_lane,
19100 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19101 VAR10 (LOAD1, vld1_dup,
19102 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19103 VAR10 (STORE1, vst1,
19104 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19105 VAR10 (STORE1LANE, vst1_lane,
19106 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19108 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19109 VAR7 (LOADSTRUCTLANE, vld2_lane,
19110 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19111 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19112 VAR9 (STORESTRUCT, vst2,
19113 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19114 VAR7 (STORESTRUCTLANE, vst2_lane,
19115 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19117 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19118 VAR7 (LOADSTRUCTLANE, vld3_lane,
19119 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19120 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19121 VAR9 (STORESTRUCT, vst3,
19122 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19123 VAR7 (STORESTRUCTLANE, vst3_lane,
19124 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19125 VAR9 (LOADSTRUCT, vld4,
19126 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19127 VAR7 (LOADSTRUCTLANE, vld4_lane,
19128 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19129 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19130 VAR9 (STORESTRUCT, vst4,
19131 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19132 VAR7 (STORESTRUCTLANE, vst4_lane,
19133 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19134 VAR10 (LOGICBINOP, vand,
19135 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19136 VAR10 (LOGICBINOP, vorr,
19137 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19138 VAR10 (BINOP, veor,
19139 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19140 VAR10 (LOGICBINOP, vbic,
19141 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19142 VAR10 (LOGICBINOP, vorn,
19143 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19158 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19159 symbolic names defined here (which would require too much duplication).
19163 ARM_BUILTIN_GETWCX,
19164 ARM_BUILTIN_SETWCX,
19168 ARM_BUILTIN_WAVG2BR,
19169 ARM_BUILTIN_WAVG2HR,
19170 ARM_BUILTIN_WAVG2B,
19171 ARM_BUILTIN_WAVG2H,
19178 ARM_BUILTIN_WMACSZ,
19180 ARM_BUILTIN_WMACUZ,
19183 ARM_BUILTIN_WSADBZ,
19185 ARM_BUILTIN_WSADHZ,
19187 ARM_BUILTIN_WALIGN,
19190 ARM_BUILTIN_TMIAPH,
19191 ARM_BUILTIN_TMIABB,
19192 ARM_BUILTIN_TMIABT,
19193 ARM_BUILTIN_TMIATB,
19194 ARM_BUILTIN_TMIATT,
19196 ARM_BUILTIN_TMOVMSKB,
19197 ARM_BUILTIN_TMOVMSKH,
19198 ARM_BUILTIN_TMOVMSKW,
19200 ARM_BUILTIN_TBCSTB,
19201 ARM_BUILTIN_TBCSTH,
19202 ARM_BUILTIN_TBCSTW,
19204 ARM_BUILTIN_WMADDS,
19205 ARM_BUILTIN_WMADDU,
19207 ARM_BUILTIN_WPACKHSS,
19208 ARM_BUILTIN_WPACKWSS,
19209 ARM_BUILTIN_WPACKDSS,
19210 ARM_BUILTIN_WPACKHUS,
19211 ARM_BUILTIN_WPACKWUS,
19212 ARM_BUILTIN_WPACKDUS,
19217 ARM_BUILTIN_WADDSSB,
19218 ARM_BUILTIN_WADDSSH,
19219 ARM_BUILTIN_WADDSSW,
19220 ARM_BUILTIN_WADDUSB,
19221 ARM_BUILTIN_WADDUSH,
19222 ARM_BUILTIN_WADDUSW,
19226 ARM_BUILTIN_WSUBSSB,
19227 ARM_BUILTIN_WSUBSSH,
19228 ARM_BUILTIN_WSUBSSW,
19229 ARM_BUILTIN_WSUBUSB,
19230 ARM_BUILTIN_WSUBUSH,
19231 ARM_BUILTIN_WSUBUSW,
19238 ARM_BUILTIN_WCMPEQB,
19239 ARM_BUILTIN_WCMPEQH,
19240 ARM_BUILTIN_WCMPEQW,
19241 ARM_BUILTIN_WCMPGTUB,
19242 ARM_BUILTIN_WCMPGTUH,
19243 ARM_BUILTIN_WCMPGTUW,
19244 ARM_BUILTIN_WCMPGTSB,
19245 ARM_BUILTIN_WCMPGTSH,
19246 ARM_BUILTIN_WCMPGTSW,
19248 ARM_BUILTIN_TEXTRMSB,
19249 ARM_BUILTIN_TEXTRMSH,
19250 ARM_BUILTIN_TEXTRMSW,
19251 ARM_BUILTIN_TEXTRMUB,
19252 ARM_BUILTIN_TEXTRMUH,
19253 ARM_BUILTIN_TEXTRMUW,
19254 ARM_BUILTIN_TINSRB,
19255 ARM_BUILTIN_TINSRH,
19256 ARM_BUILTIN_TINSRW,
19258 ARM_BUILTIN_WMAXSW,
19259 ARM_BUILTIN_WMAXSH,
19260 ARM_BUILTIN_WMAXSB,
19261 ARM_BUILTIN_WMAXUW,
19262 ARM_BUILTIN_WMAXUH,
19263 ARM_BUILTIN_WMAXUB,
19264 ARM_BUILTIN_WMINSW,
19265 ARM_BUILTIN_WMINSH,
19266 ARM_BUILTIN_WMINSB,
19267 ARM_BUILTIN_WMINUW,
19268 ARM_BUILTIN_WMINUH,
19269 ARM_BUILTIN_WMINUB,
19271 ARM_BUILTIN_WMULUM,
19272 ARM_BUILTIN_WMULSM,
19273 ARM_BUILTIN_WMULUL,
19275 ARM_BUILTIN_PSADBH,
19276 ARM_BUILTIN_WSHUFH,
19290 ARM_BUILTIN_WSLLHI,
19291 ARM_BUILTIN_WSLLWI,
19292 ARM_BUILTIN_WSLLDI,
19293 ARM_BUILTIN_WSRAHI,
19294 ARM_BUILTIN_WSRAWI,
19295 ARM_BUILTIN_WSRADI,
19296 ARM_BUILTIN_WSRLHI,
19297 ARM_BUILTIN_WSRLWI,
19298 ARM_BUILTIN_WSRLDI,
19299 ARM_BUILTIN_WRORHI,
19300 ARM_BUILTIN_WRORWI,
19301 ARM_BUILTIN_WRORDI,
19303 ARM_BUILTIN_WUNPCKIHB,
19304 ARM_BUILTIN_WUNPCKIHH,
19305 ARM_BUILTIN_WUNPCKIHW,
19306 ARM_BUILTIN_WUNPCKILB,
19307 ARM_BUILTIN_WUNPCKILH,
19308 ARM_BUILTIN_WUNPCKILW,
19310 ARM_BUILTIN_WUNPCKEHSB,
19311 ARM_BUILTIN_WUNPCKEHSH,
19312 ARM_BUILTIN_WUNPCKEHSW,
19313 ARM_BUILTIN_WUNPCKEHUB,
19314 ARM_BUILTIN_WUNPCKEHUH,
19315 ARM_BUILTIN_WUNPCKEHUW,
19316 ARM_BUILTIN_WUNPCKELSB,
19317 ARM_BUILTIN_WUNPCKELSH,
19318 ARM_BUILTIN_WUNPCKELSW,
19319 ARM_BUILTIN_WUNPCKELUB,
19320 ARM_BUILTIN_WUNPCKELUH,
19321 ARM_BUILTIN_WUNPCKELUW,
19323 ARM_BUILTIN_THREAD_POINTER,
19325 ARM_BUILTIN_NEON_BASE,
19327 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19330 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19333 arm_init_neon_builtins (void)
19335 unsigned int i, fcode;
19338 tree neon_intQI_type_node;
19339 tree neon_intHI_type_node;
19340 tree neon_polyQI_type_node;
19341 tree neon_polyHI_type_node;
19342 tree neon_intSI_type_node;
19343 tree neon_intDI_type_node;
19344 tree neon_float_type_node;
19346 tree intQI_pointer_node;
19347 tree intHI_pointer_node;
19348 tree intSI_pointer_node;
19349 tree intDI_pointer_node;
19350 tree float_pointer_node;
19352 tree const_intQI_node;
19353 tree const_intHI_node;
19354 tree const_intSI_node;
19355 tree const_intDI_node;
19356 tree const_float_node;
19358 tree const_intQI_pointer_node;
19359 tree const_intHI_pointer_node;
19360 tree const_intSI_pointer_node;
19361 tree const_intDI_pointer_node;
19362 tree const_float_pointer_node;
19364 tree V8QI_type_node;
19365 tree V4HI_type_node;
19366 tree V2SI_type_node;
19367 tree V2SF_type_node;
19368 tree V16QI_type_node;
19369 tree V8HI_type_node;
19370 tree V4SI_type_node;
19371 tree V4SF_type_node;
19372 tree V2DI_type_node;
19374 tree intUQI_type_node;
19375 tree intUHI_type_node;
19376 tree intUSI_type_node;
19377 tree intUDI_type_node;
19379 tree intEI_type_node;
19380 tree intOI_type_node;
19381 tree intCI_type_node;
19382 tree intXI_type_node;
19384 tree V8QI_pointer_node;
19385 tree V4HI_pointer_node;
19386 tree V2SI_pointer_node;
19387 tree V2SF_pointer_node;
19388 tree V16QI_pointer_node;
19389 tree V8HI_pointer_node;
19390 tree V4SI_pointer_node;
19391 tree V4SF_pointer_node;
19392 tree V2DI_pointer_node;
19394 tree void_ftype_pv8qi_v8qi_v8qi;
19395 tree void_ftype_pv4hi_v4hi_v4hi;
19396 tree void_ftype_pv2si_v2si_v2si;
19397 tree void_ftype_pv2sf_v2sf_v2sf;
19398 tree void_ftype_pdi_di_di;
19399 tree void_ftype_pv16qi_v16qi_v16qi;
19400 tree void_ftype_pv8hi_v8hi_v8hi;
19401 tree void_ftype_pv4si_v4si_v4si;
19402 tree void_ftype_pv4sf_v4sf_v4sf;
19403 tree void_ftype_pv2di_v2di_v2di;
19405 tree reinterp_ftype_dreg[5][5];
19406 tree reinterp_ftype_qreg[5][5];
19407 tree dreg_types[5], qreg_types[5];
19409 /* Create distinguished type nodes for NEON vector element types,
19410 and pointers to values of such types, so we can detect them later. */
19411 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19412 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19413 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19414 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19415 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19416 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19417 neon_float_type_node = make_node (REAL_TYPE);
19418 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19419 layout_type (neon_float_type_node);
19421 /* Define typedefs which exactly correspond to the modes we are basing vector
19422 types on. If you change these names you'll need to change
19423 the table used by arm_mangle_type too. */
19424 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19425 "__builtin_neon_qi");
19426 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19427 "__builtin_neon_hi");
19428 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19429 "__builtin_neon_si");
19430 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19431 "__builtin_neon_sf");
19432 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19433 "__builtin_neon_di");
19434 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19435 "__builtin_neon_poly8");
19436 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19437 "__builtin_neon_poly16");
19439 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19440 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19441 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19442 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19443 float_pointer_node = build_pointer_type (neon_float_type_node);
19445 /* Next create constant-qualified versions of the above types. */
19446 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19448 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19450 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19452 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19454 const_float_node = build_qualified_type (neon_float_type_node,
19457 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19458 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19459 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19460 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19461 const_float_pointer_node = build_pointer_type (const_float_node);
19463 /* Now create vector types based on our NEON element types. */
19464 /* 64-bit vectors. */
19466 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19468 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19470 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19472 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19473 /* 128-bit vectors. */
19475 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19477 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19479 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19481 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19483 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19485 /* Unsigned integer types for various mode sizes. */
19486 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19487 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19488 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19489 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19491 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19492 "__builtin_neon_uqi");
19493 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19494 "__builtin_neon_uhi");
19495 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19496 "__builtin_neon_usi");
19497 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19498 "__builtin_neon_udi");
19500 /* Opaque integer types for structures of vectors. */
19501 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19502 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19503 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19504 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19506 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19507 "__builtin_neon_ti");
19508 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19509 "__builtin_neon_ei");
19510 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19511 "__builtin_neon_oi");
19512 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19513 "__builtin_neon_ci");
19514 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19515 "__builtin_neon_xi");
19517 /* Pointers to vector types. */
19518 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19519 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19520 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19521 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19522 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19523 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19524 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19525 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19526 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19528 /* Operations which return results as pairs. */
19529 void_ftype_pv8qi_v8qi_v8qi =
19530 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19531 V8QI_type_node, NULL);
19532 void_ftype_pv4hi_v4hi_v4hi =
19533 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19534 V4HI_type_node, NULL);
19535 void_ftype_pv2si_v2si_v2si =
19536 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19537 V2SI_type_node, NULL);
19538 void_ftype_pv2sf_v2sf_v2sf =
19539 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19540 V2SF_type_node, NULL);
19541 void_ftype_pdi_di_di =
19542 build_function_type_list (void_type_node, intDI_pointer_node,
19543 neon_intDI_type_node, neon_intDI_type_node, NULL);
19544 void_ftype_pv16qi_v16qi_v16qi =
19545 build_function_type_list (void_type_node, V16QI_pointer_node,
19546 V16QI_type_node, V16QI_type_node, NULL);
19547 void_ftype_pv8hi_v8hi_v8hi =
19548 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19549 V8HI_type_node, NULL);
19550 void_ftype_pv4si_v4si_v4si =
19551 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19552 V4SI_type_node, NULL);
19553 void_ftype_pv4sf_v4sf_v4sf =
19554 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19555 V4SF_type_node, NULL);
19556 void_ftype_pv2di_v2di_v2di =
19557 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19558 V2DI_type_node, NULL);
19560 dreg_types[0] = V8QI_type_node;
19561 dreg_types[1] = V4HI_type_node;
19562 dreg_types[2] = V2SI_type_node;
19563 dreg_types[3] = V2SF_type_node;
19564 dreg_types[4] = neon_intDI_type_node;
19566 qreg_types[0] = V16QI_type_node;
19567 qreg_types[1] = V8HI_type_node;
19568 qreg_types[2] = V4SI_type_node;
19569 qreg_types[3] = V4SF_type_node;
19570 qreg_types[4] = V2DI_type_node;
19572 for (i = 0; i < 5; i++)
19575 for (j = 0; j < 5; j++)
19577 reinterp_ftype_dreg[i][j]
19578 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19579 reinterp_ftype_qreg[i][j]
19580 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19584 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19585 i < ARRAY_SIZE (neon_builtin_data);
19588 neon_builtin_datum *d = &neon_builtin_data[i];
19590 const char* const modenames[] = {
19591 "v8qi", "v4hi", "v2si", "v2sf", "di",
19592 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19597 int is_load = 0, is_store = 0;
19599 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19606 case NEON_LOAD1LANE:
19607 case NEON_LOADSTRUCT:
19608 case NEON_LOADSTRUCTLANE:
19610 /* Fall through. */
19612 case NEON_STORE1LANE:
19613 case NEON_STORESTRUCT:
19614 case NEON_STORESTRUCTLANE:
19617 /* Fall through. */
19620 case NEON_LOGICBINOP:
19621 case NEON_SHIFTINSERT:
19628 case NEON_SHIFTIMM:
19629 case NEON_SHIFTACC:
19635 case NEON_LANEMULL:
19636 case NEON_LANEMULH:
19638 case NEON_SCALARMUL:
19639 case NEON_SCALARMULL:
19640 case NEON_SCALARMULH:
19641 case NEON_SCALARMAC:
19647 tree return_type = void_type_node, args = void_list_node;
19649 /* Build a function type directly from the insn_data for
19650 this builtin. The build_function_type() function takes
19651 care of removing duplicates for us. */
19652 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19656 if (is_load && k == 1)
19658 /* Neon load patterns always have the memory
19659 operand in the operand 1 position. */
19660 gcc_assert (insn_data[d->code].operand[k].predicate
19661 == neon_struct_operand);
19667 eltype = const_intQI_pointer_node;
19672 eltype = const_intHI_pointer_node;
19677 eltype = const_intSI_pointer_node;
19682 eltype = const_float_pointer_node;
19687 eltype = const_intDI_pointer_node;
19690 default: gcc_unreachable ();
19693 else if (is_store && k == 0)
19695 /* Similarly, Neon store patterns use operand 0 as
19696 the memory location to store to. */
19697 gcc_assert (insn_data[d->code].operand[k].predicate
19698 == neon_struct_operand);
19704 eltype = intQI_pointer_node;
19709 eltype = intHI_pointer_node;
19714 eltype = intSI_pointer_node;
19719 eltype = float_pointer_node;
19724 eltype = intDI_pointer_node;
19727 default: gcc_unreachable ();
19732 switch (insn_data[d->code].operand[k].mode)
19734 case VOIDmode: eltype = void_type_node; break;
19736 case QImode: eltype = neon_intQI_type_node; break;
19737 case HImode: eltype = neon_intHI_type_node; break;
19738 case SImode: eltype = neon_intSI_type_node; break;
19739 case SFmode: eltype = neon_float_type_node; break;
19740 case DImode: eltype = neon_intDI_type_node; break;
19741 case TImode: eltype = intTI_type_node; break;
19742 case EImode: eltype = intEI_type_node; break;
19743 case OImode: eltype = intOI_type_node; break;
19744 case CImode: eltype = intCI_type_node; break;
19745 case XImode: eltype = intXI_type_node; break;
19746 /* 64-bit vectors. */
19747 case V8QImode: eltype = V8QI_type_node; break;
19748 case V4HImode: eltype = V4HI_type_node; break;
19749 case V2SImode: eltype = V2SI_type_node; break;
19750 case V2SFmode: eltype = V2SF_type_node; break;
19751 /* 128-bit vectors. */
19752 case V16QImode: eltype = V16QI_type_node; break;
19753 case V8HImode: eltype = V8HI_type_node; break;
19754 case V4SImode: eltype = V4SI_type_node; break;
19755 case V4SFmode: eltype = V4SF_type_node; break;
19756 case V2DImode: eltype = V2DI_type_node; break;
19757 default: gcc_unreachable ();
19761 if (k == 0 && !is_store)
19762 return_type = eltype;
19764 args = tree_cons (NULL_TREE, eltype, args);
19767 ftype = build_function_type (return_type, args);
19771 case NEON_RESULTPAIR:
19773 switch (insn_data[d->code].operand[1].mode)
19775 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19776 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19777 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19778 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19779 case DImode: ftype = void_ftype_pdi_di_di; break;
19780 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19781 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19782 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19783 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19784 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19785 default: gcc_unreachable ();
19790 case NEON_REINTERP:
19792 /* We iterate over 5 doubleword types, then 5 quadword
19794 int rhs = d->mode % 5;
19795 switch (insn_data[d->code].operand[0].mode)
19797 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19798 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19799 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19800 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19801 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19802 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19803 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19804 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19805 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19806 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19807 default: gcc_unreachable ();
19813 gcc_unreachable ();
19816 gcc_assert (ftype != NULL);
19818 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19820 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19822 arm_builtin_decls[fcode] = decl;
19826 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19829 if ((MASK) & insn_flags) \
19832 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19833 BUILT_IN_MD, NULL, NULL_TREE); \
19834 arm_builtin_decls[CODE] = bdecl; \
19839 struct builtin_description
19841 const unsigned int mask;
19842 const enum insn_code icode;
19843 const char * const name;
19844 const enum arm_builtins code;
19845 const enum rtx_code comparison;
19846 const unsigned int flag;
19849 static const struct builtin_description bdesc_2arg[] =
19851 #define IWMMXT_BUILTIN(code, string, builtin) \
19852 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19853 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19855 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19856 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19857 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19858 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19859 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19860 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19861 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19862 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19863 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19864 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19865 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19866 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19867 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19868 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19869 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19870 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19871 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19872 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19873 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19874 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19875 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19876 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19877 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19878 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19879 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19880 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19881 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19882 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19883 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19884 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19885 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19886 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19887 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19888 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19889 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19890 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19891 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19892 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19893 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19894 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19895 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19896 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19897 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19898 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19899 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19900 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19901 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19902 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19903 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19904 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19905 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19906 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19907 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19908 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19909 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19910 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19911 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19912 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19914 #define IWMMXT_BUILTIN2(code, builtin) \
19915 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19917 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19918 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19919 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19920 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19921 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19922 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19923 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
19924 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
19925 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
19926 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
19927 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
19928 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
19929 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
19930 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
19931 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
19932 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
19933 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
19934 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
19935 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
19936 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
19937 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
19938 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
19939 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
19940 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
19941 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
19942 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
19943 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
19944 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
19945 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
19946 IWMMXT_BUILTIN2 (rordi3, WRORDI)
19947 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19948 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19951 static const struct builtin_description bdesc_1arg[] =
19953 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19954 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19955 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19956 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19957 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19958 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19959 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19960 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19961 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19962 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19963 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19964 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19965 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19966 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19967 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19968 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19969 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19970 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19973 /* Set up all the iWMMXt builtins. This is not called if
19974 TARGET_IWMMXT is zero. */
19977 arm_init_iwmmxt_builtins (void)
19979 const struct builtin_description * d;
19982 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19983 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19984 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19987 = build_function_type_list (integer_type_node,
19988 integer_type_node, NULL_TREE);
19989 tree v8qi_ftype_v8qi_v8qi_int
19990 = build_function_type_list (V8QI_type_node,
19991 V8QI_type_node, V8QI_type_node,
19992 integer_type_node, NULL_TREE);
19993 tree v4hi_ftype_v4hi_int
19994 = build_function_type_list (V4HI_type_node,
19995 V4HI_type_node, integer_type_node, NULL_TREE);
19996 tree v2si_ftype_v2si_int
19997 = build_function_type_list (V2SI_type_node,
19998 V2SI_type_node, integer_type_node, NULL_TREE);
19999 tree v2si_ftype_di_di
20000 = build_function_type_list (V2SI_type_node,
20001 long_long_integer_type_node,
20002 long_long_integer_type_node,
20004 tree di_ftype_di_int
20005 = build_function_type_list (long_long_integer_type_node,
20006 long_long_integer_type_node,
20007 integer_type_node, NULL_TREE);
20008 tree di_ftype_di_int_int
20009 = build_function_type_list (long_long_integer_type_node,
20010 long_long_integer_type_node,
20012 integer_type_node, NULL_TREE);
20013 tree int_ftype_v8qi
20014 = build_function_type_list (integer_type_node,
20015 V8QI_type_node, NULL_TREE);
20016 tree int_ftype_v4hi
20017 = build_function_type_list (integer_type_node,
20018 V4HI_type_node, NULL_TREE);
20019 tree int_ftype_v2si
20020 = build_function_type_list (integer_type_node,
20021 V2SI_type_node, NULL_TREE);
20022 tree int_ftype_v8qi_int
20023 = build_function_type_list (integer_type_node,
20024 V8QI_type_node, integer_type_node, NULL_TREE);
20025 tree int_ftype_v4hi_int
20026 = build_function_type_list (integer_type_node,
20027 V4HI_type_node, integer_type_node, NULL_TREE);
20028 tree int_ftype_v2si_int
20029 = build_function_type_list (integer_type_node,
20030 V2SI_type_node, integer_type_node, NULL_TREE);
20031 tree v8qi_ftype_v8qi_int_int
20032 = build_function_type_list (V8QI_type_node,
20033 V8QI_type_node, integer_type_node,
20034 integer_type_node, NULL_TREE);
20035 tree v4hi_ftype_v4hi_int_int
20036 = build_function_type_list (V4HI_type_node,
20037 V4HI_type_node, integer_type_node,
20038 integer_type_node, NULL_TREE);
20039 tree v2si_ftype_v2si_int_int
20040 = build_function_type_list (V2SI_type_node,
20041 V2SI_type_node, integer_type_node,
20042 integer_type_node, NULL_TREE);
20043 /* Miscellaneous. */
20044 tree v8qi_ftype_v4hi_v4hi
20045 = build_function_type_list (V8QI_type_node,
20046 V4HI_type_node, V4HI_type_node, NULL_TREE);
20047 tree v4hi_ftype_v2si_v2si
20048 = build_function_type_list (V4HI_type_node,
20049 V2SI_type_node, V2SI_type_node, NULL_TREE);
20050 tree v2si_ftype_v4hi_v4hi
20051 = build_function_type_list (V2SI_type_node,
20052 V4HI_type_node, V4HI_type_node, NULL_TREE);
20053 tree v2si_ftype_v8qi_v8qi
20054 = build_function_type_list (V2SI_type_node,
20055 V8QI_type_node, V8QI_type_node, NULL_TREE);
20056 tree v4hi_ftype_v4hi_di
20057 = build_function_type_list (V4HI_type_node,
20058 V4HI_type_node, long_long_integer_type_node,
20060 tree v2si_ftype_v2si_di
20061 = build_function_type_list (V2SI_type_node,
20062 V2SI_type_node, long_long_integer_type_node,
20064 tree void_ftype_int_int
20065 = build_function_type_list (void_type_node,
20066 integer_type_node, integer_type_node,
20069 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20071 = build_function_type_list (long_long_integer_type_node,
20072 V8QI_type_node, NULL_TREE);
20074 = build_function_type_list (long_long_integer_type_node,
20075 V4HI_type_node, NULL_TREE);
20077 = build_function_type_list (long_long_integer_type_node,
20078 V2SI_type_node, NULL_TREE);
20079 tree v2si_ftype_v4hi
20080 = build_function_type_list (V2SI_type_node,
20081 V4HI_type_node, NULL_TREE);
20082 tree v4hi_ftype_v8qi
20083 = build_function_type_list (V4HI_type_node,
20084 V8QI_type_node, NULL_TREE);
20086 tree di_ftype_di_v4hi_v4hi
20087 = build_function_type_list (long_long_unsigned_type_node,
20088 long_long_unsigned_type_node,
20089 V4HI_type_node, V4HI_type_node,
20092 tree di_ftype_v4hi_v4hi
20093 = build_function_type_list (long_long_unsigned_type_node,
20094 V4HI_type_node,V4HI_type_node,
20097 /* Normal vector binops. */
20098 tree v8qi_ftype_v8qi_v8qi
20099 = build_function_type_list (V8QI_type_node,
20100 V8QI_type_node, V8QI_type_node, NULL_TREE);
20101 tree v4hi_ftype_v4hi_v4hi
20102 = build_function_type_list (V4HI_type_node,
20103 V4HI_type_node,V4HI_type_node, NULL_TREE);
20104 tree v2si_ftype_v2si_v2si
20105 = build_function_type_list (V2SI_type_node,
20106 V2SI_type_node, V2SI_type_node, NULL_TREE);
20107 tree di_ftype_di_di
20108 = build_function_type_list (long_long_unsigned_type_node,
20109 long_long_unsigned_type_node,
20110 long_long_unsigned_type_node,
20113 /* Add all builtins that are more or less simple operations on two
20115 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20117 /* Use one of the operands; the target can have a different mode for
20118 mask-generating compares. */
20119 enum machine_mode mode;
20125 mode = insn_data[d->icode].operand[1].mode;
20130 type = v8qi_ftype_v8qi_v8qi;
20133 type = v4hi_ftype_v4hi_v4hi;
20136 type = v2si_ftype_v2si_v2si;
20139 type = di_ftype_di_di;
20143 gcc_unreachable ();
20146 def_mbuiltin (d->mask, d->name, type, d->code);
20149 /* Add the remaining MMX insns with somewhat more complicated types. */
20150 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20151 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20152 ARM_BUILTIN_ ## CODE)
20154 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20155 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20156 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20158 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20159 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20160 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20161 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20162 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20163 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20165 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20166 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20167 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20168 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20169 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20170 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20172 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20173 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20174 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20175 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20176 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20177 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20179 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20180 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20181 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20182 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20183 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20184 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20186 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20188 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20189 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20190 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20191 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20193 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20194 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20195 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20196 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20197 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20198 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20199 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20200 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20201 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20203 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20204 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20205 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20207 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20208 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20209 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20211 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20212 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20213 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20214 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20215 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20216 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20218 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20219 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20220 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20221 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20222 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20223 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20224 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20225 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20226 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20227 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20228 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20229 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20231 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20232 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20233 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20234 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20236 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20237 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20238 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20239 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20240 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20241 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20242 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20244 #undef iwmmx_mbuiltin
20248 arm_init_tls_builtins (void)
20252 ftype = build_function_type (ptr_type_node, void_list_node);
20253 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20254 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20256 TREE_NOTHROW (decl) = 1;
20257 TREE_READONLY (decl) = 1;
20258 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20262 arm_init_fp16_builtins (void)
20264 tree fp16_type = make_node (REAL_TYPE);
20265 TYPE_PRECISION (fp16_type) = 16;
20266 layout_type (fp16_type);
20267 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20271 arm_init_builtins (void)
20273 arm_init_tls_builtins ();
20275 if (TARGET_REALLY_IWMMXT)
20276 arm_init_iwmmxt_builtins ();
20279 arm_init_neon_builtins ();
20281 if (arm_fp16_format)
20282 arm_init_fp16_builtins ();
20285 /* Return the ARM builtin for CODE. */
20288 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20290 if (code >= ARM_BUILTIN_MAX)
20291 return error_mark_node;
20293 return arm_builtin_decls[code];
20296 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20298 static const char *
20299 arm_invalid_parameter_type (const_tree t)
20301 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20302 return N_("function parameters cannot have __fp16 type");
20306 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20308 static const char *
20309 arm_invalid_return_type (const_tree t)
20311 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20312 return N_("functions cannot return __fp16 type");
20316 /* Implement TARGET_PROMOTED_TYPE. */
20319 arm_promoted_type (const_tree t)
20321 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20322 return float_type_node;
20326 /* Implement TARGET_CONVERT_TO_TYPE.
20327 Specifically, this hook implements the peculiarity of the ARM
20328 half-precision floating-point C semantics that requires conversions between
20329 __fp16 to or from double to do an intermediate conversion to float. */
20332 arm_convert_to_type (tree type, tree expr)
20334 tree fromtype = TREE_TYPE (expr);
20335 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20337 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20338 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20339 return convert (type, convert (float_type_node, expr));
20343 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20344 This simply adds HFmode as a supported mode; even though we don't
20345 implement arithmetic on this type directly, it's supported by
20346 optabs conversions, much the way the double-word arithmetic is
20347 special-cased in the default hook. */
20350 arm_scalar_mode_supported_p (enum machine_mode mode)
20352 if (mode == HFmode)
20353 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20354 else if (ALL_FIXED_POINT_MODE_P (mode))
20357 return default_scalar_mode_supported_p (mode);
20360 /* Errors in the source file can cause expand_expr to return const0_rtx
20361 where we expect a vector. To avoid crashing, use one of the vector
20362 clear instructions. */
20365 safe_vector_operand (rtx x, enum machine_mode mode)
20367 if (x != const0_rtx)
20369 x = gen_reg_rtx (mode);
20371 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20372 : gen_rtx_SUBREG (DImode, x, 0)));
20376 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20379 arm_expand_binop_builtin (enum insn_code icode,
20380 tree exp, rtx target)
20383 tree arg0 = CALL_EXPR_ARG (exp, 0);
20384 tree arg1 = CALL_EXPR_ARG (exp, 1);
20385 rtx op0 = expand_normal (arg0);
20386 rtx op1 = expand_normal (arg1);
20387 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20388 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20389 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20391 if (VECTOR_MODE_P (mode0))
20392 op0 = safe_vector_operand (op0, mode0);
20393 if (VECTOR_MODE_P (mode1))
20394 op1 = safe_vector_operand (op1, mode1);
20397 || GET_MODE (target) != tmode
20398 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20399 target = gen_reg_rtx (tmode);
20401 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20403 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20404 op0 = copy_to_mode_reg (mode0, op0);
20405 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20406 op1 = copy_to_mode_reg (mode1, op1);
20408 pat = GEN_FCN (icode) (target, op0, op1);
20415 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20418 arm_expand_unop_builtin (enum insn_code icode,
20419 tree exp, rtx target, int do_load)
20422 tree arg0 = CALL_EXPR_ARG (exp, 0);
20423 rtx op0 = expand_normal (arg0);
20424 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20425 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20428 || GET_MODE (target) != tmode
20429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20430 target = gen_reg_rtx (tmode);
20432 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20435 if (VECTOR_MODE_P (mode0))
20436 op0 = safe_vector_operand (op0, mode0);
20438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20439 op0 = copy_to_mode_reg (mode0, op0);
20442 pat = GEN_FCN (icode) (target, op0);
20450 NEON_ARG_COPY_TO_REG,
20456 #define NEON_MAX_BUILTIN_ARGS 5
20458 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20459 and return an expression for the accessed memory.
20461 The intrinsic function operates on a block of registers that has
20462 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20463 The function references the memory at EXP in mode MEM_MODE;
20464 this mode may be BLKmode if no more suitable mode is available. */
20467 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20468 enum machine_mode reg_mode,
20469 neon_builtin_type_mode type_mode)
20471 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20472 tree elem_type, upper_bound, array_type;
20474 /* Work out the size of the register block in bytes. */
20475 reg_size = GET_MODE_SIZE (reg_mode);
20477 /* Work out the size of each vector in bytes. */
20478 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20479 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20481 /* Work out how many vectors there are. */
20482 gcc_assert (reg_size % vector_size == 0);
20483 nvectors = reg_size / vector_size;
20485 /* Work out how many elements are being loaded or stored.
20486 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20487 and memory elements; anything else implies a lane load or store. */
20488 if (mem_mode == reg_mode)
20489 nelems = vector_size * nvectors;
20493 /* Work out the type of each element. */
20494 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20495 elem_type = TREE_TYPE (TREE_TYPE (exp));
20497 /* Create a type that describes the full access. */
20498 upper_bound = build_int_cst (size_type_node, nelems - 1);
20499 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20501 /* Dereference EXP using that type. */
20502 exp = convert (build_pointer_type (array_type), exp);
20503 return fold_build2 (MEM_REF, array_type, exp,
20504 build_int_cst (TREE_TYPE (exp), 0));
20507 /* Expand a Neon builtin. */
20509 arm_expand_neon_args (rtx target, int icode, int have_retval,
20510 neon_builtin_type_mode type_mode,
20515 tree arg[NEON_MAX_BUILTIN_ARGS];
20516 rtx op[NEON_MAX_BUILTIN_ARGS];
20517 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20518 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20519 enum machine_mode other_mode;
20525 || GET_MODE (target) != tmode
20526 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20527 target = gen_reg_rtx (tmode);
20529 va_start (ap, exp);
20533 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20535 if (thisarg == NEON_ARG_STOP)
20539 opno = argc + have_retval;
20540 mode[argc] = insn_data[icode].operand[opno].mode;
20541 arg[argc] = CALL_EXPR_ARG (exp, argc);
20542 if (thisarg == NEON_ARG_MEMORY)
20544 other_mode = insn_data[icode].operand[1 - opno].mode;
20545 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20546 other_mode, type_mode);
20548 op[argc] = expand_normal (arg[argc]);
20552 case NEON_ARG_COPY_TO_REG:
20553 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20554 if (!(*insn_data[icode].operand[opno].predicate)
20555 (op[argc], mode[argc]))
20556 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20559 case NEON_ARG_CONSTANT:
20560 /* FIXME: This error message is somewhat unhelpful. */
20561 if (!(*insn_data[icode].operand[opno].predicate)
20562 (op[argc], mode[argc]))
20563 error ("argument must be a constant");
20566 case NEON_ARG_MEMORY:
20567 gcc_assert (MEM_P (op[argc]));
20568 PUT_MODE (op[argc], mode[argc]);
20569 /* ??? arm_neon.h uses the same built-in functions for signed
20570 and unsigned accesses, casting where necessary. This isn't
20572 set_mem_alias_set (op[argc], 0);
20573 if (!(*insn_data[icode].operand[opno].predicate)
20574 (op[argc], mode[argc]))
20575 op[argc] = (replace_equiv_address
20576 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20579 case NEON_ARG_STOP:
20580 gcc_unreachable ();
20593 pat = GEN_FCN (icode) (target, op[0]);
20597 pat = GEN_FCN (icode) (target, op[0], op[1]);
20601 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20605 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20609 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20613 gcc_unreachable ();
20619 pat = GEN_FCN (icode) (op[0]);
20623 pat = GEN_FCN (icode) (op[0], op[1]);
20627 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20631 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20635 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20639 gcc_unreachable ();
20650 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20651 constants defined per-instruction or per instruction-variant. Instead, the
20652 required info is looked up in the table neon_builtin_data. */
20654 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20656 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20657 neon_itype itype = d->itype;
20658 enum insn_code icode = d->code;
20659 neon_builtin_type_mode type_mode = d->mode;
20666 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20667 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20671 case NEON_SCALARMUL:
20672 case NEON_SCALARMULL:
20673 case NEON_SCALARMULH:
20674 case NEON_SHIFTINSERT:
20675 case NEON_LOGICBINOP:
20676 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20677 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20681 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20682 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20683 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20687 case NEON_SHIFTIMM:
20688 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20689 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20693 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20694 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20698 case NEON_REINTERP:
20699 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20700 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20704 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20705 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20707 case NEON_RESULTPAIR:
20708 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20709 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20713 case NEON_LANEMULL:
20714 case NEON_LANEMULH:
20715 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20716 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20717 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20720 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20721 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20722 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20724 case NEON_SHIFTACC:
20725 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20726 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20727 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20729 case NEON_SCALARMAC:
20730 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20731 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20732 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20736 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20737 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20741 case NEON_LOADSTRUCT:
20742 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20743 NEON_ARG_MEMORY, NEON_ARG_STOP);
20745 case NEON_LOAD1LANE:
20746 case NEON_LOADSTRUCTLANE:
20747 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20748 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20752 case NEON_STORESTRUCT:
20753 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20754 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20756 case NEON_STORE1LANE:
20757 case NEON_STORESTRUCTLANE:
20758 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20759 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20763 gcc_unreachable ();
20766 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20768 neon_reinterpret (rtx dest, rtx src)
20770 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20773 /* Emit code to place a Neon pair result in memory locations (with equal
20776 neon_emit_pair_result_insn (enum machine_mode mode,
20777 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20780 rtx mem = gen_rtx_MEM (mode, destaddr);
20781 rtx tmp1 = gen_reg_rtx (mode);
20782 rtx tmp2 = gen_reg_rtx (mode);
20784 emit_insn (intfn (tmp1, op1, op2, tmp2));
20786 emit_move_insn (mem, tmp1);
20787 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20788 emit_move_insn (mem, tmp2);
20791 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20792 not to early-clobber SRC registers in the process.
20794 We assume that the operands described by SRC and DEST represent a
20795 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20796 number of components into which the copy has been decomposed. */
20798 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20802 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20803 || REGNO (operands[0]) < REGNO (operands[1]))
20805 for (i = 0; i < count; i++)
20807 operands[2 * i] = dest[i];
20808 operands[2 * i + 1] = src[i];
20813 for (i = 0; i < count; i++)
20815 operands[2 * i] = dest[count - i - 1];
20816 operands[2 * i + 1] = src[count - i - 1];
20821 /* Expand an expression EXP that calls a built-in function,
20822 with result going to TARGET if that's convenient
20823 (and in mode MODE if that's convenient).
20824 SUBTARGET may be used as the target for computing one of EXP's operands.
20825 IGNORE is nonzero if the value is to be ignored. */
20828 arm_expand_builtin (tree exp,
20830 rtx subtarget ATTRIBUTE_UNUSED,
20831 enum machine_mode mode ATTRIBUTE_UNUSED,
20832 int ignore ATTRIBUTE_UNUSED)
20834 const struct builtin_description * d;
20835 enum insn_code icode;
20836 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20844 int fcode = DECL_FUNCTION_CODE (fndecl);
20846 enum machine_mode tmode;
20847 enum machine_mode mode0;
20848 enum machine_mode mode1;
20849 enum machine_mode mode2;
20851 if (fcode >= ARM_BUILTIN_NEON_BASE)
20852 return arm_expand_neon_builtin (fcode, exp, target);
20856 case ARM_BUILTIN_TEXTRMSB:
20857 case ARM_BUILTIN_TEXTRMUB:
20858 case ARM_BUILTIN_TEXTRMSH:
20859 case ARM_BUILTIN_TEXTRMUH:
20860 case ARM_BUILTIN_TEXTRMSW:
20861 case ARM_BUILTIN_TEXTRMUW:
20862 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20863 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20864 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20865 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20866 : CODE_FOR_iwmmxt_textrmw);
20868 arg0 = CALL_EXPR_ARG (exp, 0);
20869 arg1 = CALL_EXPR_ARG (exp, 1);
20870 op0 = expand_normal (arg0);
20871 op1 = expand_normal (arg1);
20872 tmode = insn_data[icode].operand[0].mode;
20873 mode0 = insn_data[icode].operand[1].mode;
20874 mode1 = insn_data[icode].operand[2].mode;
20876 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20877 op0 = copy_to_mode_reg (mode0, op0);
20878 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20880 /* @@@ better error message */
20881 error ("selector must be an immediate");
20882 return gen_reg_rtx (tmode);
20885 || GET_MODE (target) != tmode
20886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20887 target = gen_reg_rtx (tmode);
20888 pat = GEN_FCN (icode) (target, op0, op1);
20894 case ARM_BUILTIN_TINSRB:
20895 case ARM_BUILTIN_TINSRH:
20896 case ARM_BUILTIN_TINSRW:
20897 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20898 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20899 : CODE_FOR_iwmmxt_tinsrw);
20900 arg0 = CALL_EXPR_ARG (exp, 0);
20901 arg1 = CALL_EXPR_ARG (exp, 1);
20902 arg2 = CALL_EXPR_ARG (exp, 2);
20903 op0 = expand_normal (arg0);
20904 op1 = expand_normal (arg1);
20905 op2 = expand_normal (arg2);
20906 tmode = insn_data[icode].operand[0].mode;
20907 mode0 = insn_data[icode].operand[1].mode;
20908 mode1 = insn_data[icode].operand[2].mode;
20909 mode2 = insn_data[icode].operand[3].mode;
20911 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20912 op0 = copy_to_mode_reg (mode0, op0);
20913 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20914 op1 = copy_to_mode_reg (mode1, op1);
20915 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20917 /* @@@ better error message */
20918 error ("selector must be an immediate");
20922 || GET_MODE (target) != tmode
20923 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20924 target = gen_reg_rtx (tmode);
20925 pat = GEN_FCN (icode) (target, op0, op1, op2);
20931 case ARM_BUILTIN_SETWCX:
20932 arg0 = CALL_EXPR_ARG (exp, 0);
20933 arg1 = CALL_EXPR_ARG (exp, 1);
20934 op0 = force_reg (SImode, expand_normal (arg0));
20935 op1 = expand_normal (arg1);
20936 emit_insn (gen_iwmmxt_tmcr (op1, op0));
20939 case ARM_BUILTIN_GETWCX:
20940 arg0 = CALL_EXPR_ARG (exp, 0);
20941 op0 = expand_normal (arg0);
20942 target = gen_reg_rtx (SImode);
20943 emit_insn (gen_iwmmxt_tmrc (target, op0));
20946 case ARM_BUILTIN_WSHUFH:
20947 icode = CODE_FOR_iwmmxt_wshufh;
20948 arg0 = CALL_EXPR_ARG (exp, 0);
20949 arg1 = CALL_EXPR_ARG (exp, 1);
20950 op0 = expand_normal (arg0);
20951 op1 = expand_normal (arg1);
20952 tmode = insn_data[icode].operand[0].mode;
20953 mode1 = insn_data[icode].operand[1].mode;
20954 mode2 = insn_data[icode].operand[2].mode;
20956 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20957 op0 = copy_to_mode_reg (mode1, op0);
20958 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20960 /* @@@ better error message */
20961 error ("mask must be an immediate");
20965 || GET_MODE (target) != tmode
20966 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20967 target = gen_reg_rtx (tmode);
20968 pat = GEN_FCN (icode) (target, op0, op1);
20974 case ARM_BUILTIN_WSADB:
20975 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20976 case ARM_BUILTIN_WSADH:
20977 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20978 case ARM_BUILTIN_WSADBZ:
20979 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20980 case ARM_BUILTIN_WSADHZ:
20981 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20983 /* Several three-argument builtins. */
20984 case ARM_BUILTIN_WMACS:
20985 case ARM_BUILTIN_WMACU:
20986 case ARM_BUILTIN_WALIGN:
20987 case ARM_BUILTIN_TMIA:
20988 case ARM_BUILTIN_TMIAPH:
20989 case ARM_BUILTIN_TMIATT:
20990 case ARM_BUILTIN_TMIATB:
20991 case ARM_BUILTIN_TMIABT:
20992 case ARM_BUILTIN_TMIABB:
20993 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20994 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20995 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20996 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20997 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20998 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20999 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21000 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21001 : CODE_FOR_iwmmxt_walign);
21002 arg0 = CALL_EXPR_ARG (exp, 0);
21003 arg1 = CALL_EXPR_ARG (exp, 1);
21004 arg2 = CALL_EXPR_ARG (exp, 2);
21005 op0 = expand_normal (arg0);
21006 op1 = expand_normal (arg1);
21007 op2 = expand_normal (arg2);
21008 tmode = insn_data[icode].operand[0].mode;
21009 mode0 = insn_data[icode].operand[1].mode;
21010 mode1 = insn_data[icode].operand[2].mode;
21011 mode2 = insn_data[icode].operand[3].mode;
21013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21014 op0 = copy_to_mode_reg (mode0, op0);
21015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21016 op1 = copy_to_mode_reg (mode1, op1);
21017 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21018 op2 = copy_to_mode_reg (mode2, op2);
21020 || GET_MODE (target) != tmode
21021 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21022 target = gen_reg_rtx (tmode);
21023 pat = GEN_FCN (icode) (target, op0, op1, op2);
21029 case ARM_BUILTIN_WZERO:
21030 target = gen_reg_rtx (DImode);
21031 emit_insn (gen_iwmmxt_clrdi (target));
21034 case ARM_BUILTIN_THREAD_POINTER:
21035 return arm_load_tp (target);
21041 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21042 if (d->code == (const enum arm_builtins) fcode)
21043 return arm_expand_binop_builtin (d->icode, exp, target);
21045 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21046 if (d->code == (const enum arm_builtins) fcode)
21047 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21049 /* @@@ Should really do something sensible here. */
21053 /* Return the number (counting from 0) of
21054 the least significant set bit in MASK. */
21057 number_of_first_bit_set (unsigned mask)
21059 return ctz_hwi (mask);
21062 /* Like emit_multi_reg_push, but allowing for a different set of
21063 registers to be described as saved. MASK is the set of registers
21064 to be saved; REAL_REGS is the set of registers to be described as
21065 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21068 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21070 unsigned long regno;
21071 rtx par[10], tmp, reg, insn;
21074 /* Build the parallel of the registers actually being stored. */
21075 for (i = 0; mask; ++i, mask &= mask - 1)
21077 regno = ctz_hwi (mask);
21078 reg = gen_rtx_REG (SImode, regno);
21081 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21083 tmp = gen_rtx_USE (VOIDmode, reg);
21088 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21089 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21090 tmp = gen_frame_mem (BLKmode, tmp);
21091 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21094 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21095 insn = emit_insn (tmp);
21097 /* Always build the stack adjustment note for unwind info. */
21098 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21099 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21102 /* Build the parallel of the registers recorded as saved for unwind. */
21103 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21105 regno = ctz_hwi (real_regs);
21106 reg = gen_rtx_REG (SImode, regno);
21108 tmp = plus_constant (stack_pointer_rtx, j * 4);
21109 tmp = gen_frame_mem (SImode, tmp);
21110 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21111 RTX_FRAME_RELATED_P (tmp) = 1;
21119 RTX_FRAME_RELATED_P (par[0]) = 1;
21120 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21123 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21128 /* Emit code to push or pop registers to or from the stack. F is the
21129 assembly file. MASK is the registers to pop. */
21131 thumb_pop (FILE *f, unsigned long mask)
21134 int lo_mask = mask & 0xFF;
21135 int pushed_words = 0;
21139 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21141 /* Special case. Do not generate a POP PC statement here, do it in
21143 thumb_exit (f, -1);
21147 fprintf (f, "\tpop\t{");
21149 /* Look at the low registers first. */
21150 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21154 asm_fprintf (f, "%r", regno);
21156 if ((lo_mask & ~1) != 0)
21163 if (mask & (1 << PC_REGNUM))
21165 /* Catch popping the PC. */
21166 if (TARGET_INTERWORK || TARGET_BACKTRACE
21167 || crtl->calls_eh_return)
21169 /* The PC is never poped directly, instead
21170 it is popped into r3 and then BX is used. */
21171 fprintf (f, "}\n");
21173 thumb_exit (f, -1);
21182 asm_fprintf (f, "%r", PC_REGNUM);
21186 fprintf (f, "}\n");
21189 /* Generate code to return from a thumb function.
21190 If 'reg_containing_return_addr' is -1, then the return address is
21191 actually on the stack, at the stack pointer. */
21193 thumb_exit (FILE *f, int reg_containing_return_addr)
21195 unsigned regs_available_for_popping;
21196 unsigned regs_to_pop;
21198 unsigned available;
21202 int restore_a4 = FALSE;
21204 /* Compute the registers we need to pop. */
21208 if (reg_containing_return_addr == -1)
21210 regs_to_pop |= 1 << LR_REGNUM;
21214 if (TARGET_BACKTRACE)
21216 /* Restore the (ARM) frame pointer and stack pointer. */
21217 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21221 /* If there is nothing to pop then just emit the BX instruction and
21223 if (pops_needed == 0)
21225 if (crtl->calls_eh_return)
21226 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21228 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21231 /* Otherwise if we are not supporting interworking and we have not created
21232 a backtrace structure and the function was not entered in ARM mode then
21233 just pop the return address straight into the PC. */
21234 else if (!TARGET_INTERWORK
21235 && !TARGET_BACKTRACE
21236 && !is_called_in_ARM_mode (current_function_decl)
21237 && !crtl->calls_eh_return)
21239 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21243 /* Find out how many of the (return) argument registers we can corrupt. */
21244 regs_available_for_popping = 0;
21246 /* If returning via __builtin_eh_return, the bottom three registers
21247 all contain information needed for the return. */
21248 if (crtl->calls_eh_return)
21252 /* If we can deduce the registers used from the function's
21253 return value. This is more reliable that examining
21254 df_regs_ever_live_p () because that will be set if the register is
21255 ever used in the function, not just if the register is used
21256 to hold a return value. */
21258 if (crtl->return_rtx != 0)
21259 mode = GET_MODE (crtl->return_rtx);
21261 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21263 size = GET_MODE_SIZE (mode);
21267 /* In a void function we can use any argument register.
21268 In a function that returns a structure on the stack
21269 we can use the second and third argument registers. */
21270 if (mode == VOIDmode)
21271 regs_available_for_popping =
21272 (1 << ARG_REGISTER (1))
21273 | (1 << ARG_REGISTER (2))
21274 | (1 << ARG_REGISTER (3));
21276 regs_available_for_popping =
21277 (1 << ARG_REGISTER (2))
21278 | (1 << ARG_REGISTER (3));
21280 else if (size <= 4)
21281 regs_available_for_popping =
21282 (1 << ARG_REGISTER (2))
21283 | (1 << ARG_REGISTER (3));
21284 else if (size <= 8)
21285 regs_available_for_popping =
21286 (1 << ARG_REGISTER (3));
21289 /* Match registers to be popped with registers into which we pop them. */
21290 for (available = regs_available_for_popping,
21291 required = regs_to_pop;
21292 required != 0 && available != 0;
21293 available &= ~(available & - available),
21294 required &= ~(required & - required))
21297 /* If we have any popping registers left over, remove them. */
21299 regs_available_for_popping &= ~available;
21301 /* Otherwise if we need another popping register we can use
21302 the fourth argument register. */
21303 else if (pops_needed)
21305 /* If we have not found any free argument registers and
21306 reg a4 contains the return address, we must move it. */
21307 if (regs_available_for_popping == 0
21308 && reg_containing_return_addr == LAST_ARG_REGNUM)
21310 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21311 reg_containing_return_addr = LR_REGNUM;
21313 else if (size > 12)
21315 /* Register a4 is being used to hold part of the return value,
21316 but we have dire need of a free, low register. */
21319 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21322 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21324 /* The fourth argument register is available. */
21325 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21331 /* Pop as many registers as we can. */
21332 thumb_pop (f, regs_available_for_popping);
21334 /* Process the registers we popped. */
21335 if (reg_containing_return_addr == -1)
21337 /* The return address was popped into the lowest numbered register. */
21338 regs_to_pop &= ~(1 << LR_REGNUM);
21340 reg_containing_return_addr =
21341 number_of_first_bit_set (regs_available_for_popping);
21343 /* Remove this register for the mask of available registers, so that
21344 the return address will not be corrupted by further pops. */
21345 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21348 /* If we popped other registers then handle them here. */
21349 if (regs_available_for_popping)
21353 /* Work out which register currently contains the frame pointer. */
21354 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21356 /* Move it into the correct place. */
21357 asm_fprintf (f, "\tmov\t%r, %r\n",
21358 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21360 /* (Temporarily) remove it from the mask of popped registers. */
21361 regs_available_for_popping &= ~(1 << frame_pointer);
21362 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21364 if (regs_available_for_popping)
21368 /* We popped the stack pointer as well,
21369 find the register that contains it. */
21370 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21372 /* Move it into the stack register. */
21373 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21375 /* At this point we have popped all necessary registers, so
21376 do not worry about restoring regs_available_for_popping
21377 to its correct value:
21379 assert (pops_needed == 0)
21380 assert (regs_available_for_popping == (1 << frame_pointer))
21381 assert (regs_to_pop == (1 << STACK_POINTER)) */
21385 /* Since we have just move the popped value into the frame
21386 pointer, the popping register is available for reuse, and
21387 we know that we still have the stack pointer left to pop. */
21388 regs_available_for_popping |= (1 << frame_pointer);
21392 /* If we still have registers left on the stack, but we no longer have
21393 any registers into which we can pop them, then we must move the return
21394 address into the link register and make available the register that
21396 if (regs_available_for_popping == 0 && pops_needed > 0)
21398 regs_available_for_popping |= 1 << reg_containing_return_addr;
21400 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21401 reg_containing_return_addr);
21403 reg_containing_return_addr = LR_REGNUM;
21406 /* If we have registers left on the stack then pop some more.
21407 We know that at most we will want to pop FP and SP. */
21408 if (pops_needed > 0)
21413 thumb_pop (f, regs_available_for_popping);
21415 /* We have popped either FP or SP.
21416 Move whichever one it is into the correct register. */
21417 popped_into = number_of_first_bit_set (regs_available_for_popping);
21418 move_to = number_of_first_bit_set (regs_to_pop);
21420 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21422 regs_to_pop &= ~(1 << move_to);
21427 /* If we still have not popped everything then we must have only
21428 had one register available to us and we are now popping the SP. */
21429 if (pops_needed > 0)
21433 thumb_pop (f, regs_available_for_popping);
21435 popped_into = number_of_first_bit_set (regs_available_for_popping);
21437 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21439 assert (regs_to_pop == (1 << STACK_POINTER))
21440 assert (pops_needed == 1)
21444 /* If necessary restore the a4 register. */
21447 if (reg_containing_return_addr != LR_REGNUM)
21449 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21450 reg_containing_return_addr = LR_REGNUM;
21453 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21456 if (crtl->calls_eh_return)
21457 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21459 /* Return to caller. */
21460 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21463 /* Scan INSN just before assembler is output for it.
21464 For Thumb-1, we track the status of the condition codes; this
21465 information is used in the cbranchsi4_insn pattern. */
21467 thumb1_final_prescan_insn (rtx insn)
21469 if (flag_print_asm_name)
21470 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21471 INSN_ADDRESSES (INSN_UID (insn)));
21472 /* Don't overwrite the previous setter when we get to a cbranch. */
21473 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21475 enum attr_conds conds;
21477 if (cfun->machine->thumb1_cc_insn)
21479 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21480 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21483 conds = get_attr_conds (insn);
21484 if (conds == CONDS_SET)
21486 rtx set = single_set (insn);
21487 cfun->machine->thumb1_cc_insn = insn;
21488 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21489 cfun->machine->thumb1_cc_op1 = const0_rtx;
21490 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21491 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21493 rtx src1 = XEXP (SET_SRC (set), 1);
21494 if (src1 == const0_rtx)
21495 cfun->machine->thumb1_cc_mode = CCmode;
21498 else if (conds != CONDS_NOCOND)
21499 cfun->machine->thumb1_cc_insn = NULL_RTX;
21504 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21506 unsigned HOST_WIDE_INT mask = 0xff;
21509 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21510 if (val == 0) /* XXX */
21513 for (i = 0; i < 25; i++)
21514 if ((val & (mask << i)) == val)
21520 /* Returns nonzero if the current function contains,
21521 or might contain a far jump. */
21523 thumb_far_jump_used_p (void)
21527 /* This test is only important for leaf functions. */
21528 /* assert (!leaf_function_p ()); */
21530 /* If we have already decided that far jumps may be used,
21531 do not bother checking again, and always return true even if
21532 it turns out that they are not being used. Once we have made
21533 the decision that far jumps are present (and that hence the link
21534 register will be pushed onto the stack) we cannot go back on it. */
21535 if (cfun->machine->far_jump_used)
21538 /* If this function is not being called from the prologue/epilogue
21539 generation code then it must be being called from the
21540 INITIAL_ELIMINATION_OFFSET macro. */
21541 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21543 /* In this case we know that we are being asked about the elimination
21544 of the arg pointer register. If that register is not being used,
21545 then there are no arguments on the stack, and we do not have to
21546 worry that a far jump might force the prologue to push the link
21547 register, changing the stack offsets. In this case we can just
21548 return false, since the presence of far jumps in the function will
21549 not affect stack offsets.
21551 If the arg pointer is live (or if it was live, but has now been
21552 eliminated and so set to dead) then we do have to test to see if
21553 the function might contain a far jump. This test can lead to some
21554 false negatives, since before reload is completed, then length of
21555 branch instructions is not known, so gcc defaults to returning their
21556 longest length, which in turn sets the far jump attribute to true.
21558 A false negative will not result in bad code being generated, but it
21559 will result in a needless push and pop of the link register. We
21560 hope that this does not occur too often.
21562 If we need doubleword stack alignment this could affect the other
21563 elimination offsets so we can't risk getting it wrong. */
21564 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21565 cfun->machine->arg_pointer_live = 1;
21566 else if (!cfun->machine->arg_pointer_live)
21570 /* Check to see if the function contains a branch
21571 insn with the far jump attribute set. */
21572 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21574 if (GET_CODE (insn) == JUMP_INSN
21575 /* Ignore tablejump patterns. */
21576 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21577 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21578 && get_attr_far_jump (insn) == FAR_JUMP_YES
21581 /* Record the fact that we have decided that
21582 the function does use far jumps. */
21583 cfun->machine->far_jump_used = 1;
21591 /* Return nonzero if FUNC must be entered in ARM mode. */
21593 is_called_in_ARM_mode (tree func)
21595 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21597 /* Ignore the problem about functions whose address is taken. */
21598 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21602 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21608 /* Given the stack offsets and register mask in OFFSETS, decide how
21609 many additional registers to push instead of subtracting a constant
21610 from SP. For epilogues the principle is the same except we use pop.
21611 FOR_PROLOGUE indicates which we're generating. */
21613 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21615 HOST_WIDE_INT amount;
21616 unsigned long live_regs_mask = offsets->saved_regs_mask;
21617 /* Extract a mask of the ones we can give to the Thumb's push/pop
21619 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21620 /* Then count how many other high registers will need to be pushed. */
21621 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21622 int n_free, reg_base;
21624 if (!for_prologue && frame_pointer_needed)
21625 amount = offsets->locals_base - offsets->saved_regs;
21627 amount = offsets->outgoing_args - offsets->saved_regs;
21629 /* If the stack frame size is 512 exactly, we can save one load
21630 instruction, which should make this a win even when optimizing
21632 if (!optimize_size && amount != 512)
21635 /* Can't do this if there are high registers to push. */
21636 if (high_regs_pushed != 0)
21639 /* Shouldn't do it in the prologue if no registers would normally
21640 be pushed at all. In the epilogue, also allow it if we'll have
21641 a pop insn for the PC. */
21644 || TARGET_BACKTRACE
21645 || (live_regs_mask & 1 << LR_REGNUM) == 0
21646 || TARGET_INTERWORK
21647 || crtl->args.pretend_args_size != 0))
21650 /* Don't do this if thumb_expand_prologue wants to emit instructions
21651 between the push and the stack frame allocation. */
21653 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21654 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21661 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21662 live_regs_mask >>= reg_base;
21665 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21666 && (for_prologue || call_used_regs[reg_base + n_free]))
21668 live_regs_mask >>= 1;
21674 gcc_assert (amount / 4 * 4 == amount);
21676 if (amount >= 512 && (amount - n_free * 4) < 512)
21677 return (amount - 508) / 4;
21678 if (amount <= n_free * 4)
21683 /* The bits which aren't usefully expanded as rtl. */
21685 thumb_unexpanded_epilogue (void)
21687 arm_stack_offsets *offsets;
21689 unsigned long live_regs_mask = 0;
21690 int high_regs_pushed = 0;
21692 int had_to_push_lr;
21695 if (cfun->machine->return_used_this_function != 0)
21698 if (IS_NAKED (arm_current_func_type ()))
21701 offsets = arm_get_frame_offsets ();
21702 live_regs_mask = offsets->saved_regs_mask;
21703 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21705 /* If we can deduce the registers used from the function's return value.
21706 This is more reliable that examining df_regs_ever_live_p () because that
21707 will be set if the register is ever used in the function, not just if
21708 the register is used to hold a return value. */
21709 size = arm_size_return_regs ();
21711 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21714 unsigned long extra_mask = (1 << extra_pop) - 1;
21715 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21719 /* The prolog may have pushed some high registers to use as
21720 work registers. e.g. the testsuite file:
21721 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21722 compiles to produce:
21723 push {r4, r5, r6, r7, lr}
21727 as part of the prolog. We have to undo that pushing here. */
21729 if (high_regs_pushed)
21731 unsigned long mask = live_regs_mask & 0xff;
21734 /* The available low registers depend on the size of the value we are
21742 /* Oh dear! We have no low registers into which we can pop
21745 ("no low registers available for popping high registers");
21747 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21748 if (live_regs_mask & (1 << next_hi_reg))
21751 while (high_regs_pushed)
21753 /* Find lo register(s) into which the high register(s) can
21755 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21757 if (mask & (1 << regno))
21758 high_regs_pushed--;
21759 if (high_regs_pushed == 0)
21763 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21765 /* Pop the values into the low register(s). */
21766 thumb_pop (asm_out_file, mask);
21768 /* Move the value(s) into the high registers. */
21769 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21771 if (mask & (1 << regno))
21773 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21776 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21777 if (live_regs_mask & (1 << next_hi_reg))
21782 live_regs_mask &= ~0x0f00;
21785 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21786 live_regs_mask &= 0xff;
21788 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21790 /* Pop the return address into the PC. */
21791 if (had_to_push_lr)
21792 live_regs_mask |= 1 << PC_REGNUM;
21794 /* Either no argument registers were pushed or a backtrace
21795 structure was created which includes an adjusted stack
21796 pointer, so just pop everything. */
21797 if (live_regs_mask)
21798 thumb_pop (asm_out_file, live_regs_mask);
21800 /* We have either just popped the return address into the
21801 PC or it is was kept in LR for the entire function.
21802 Note that thumb_pop has already called thumb_exit if the
21803 PC was in the list. */
21804 if (!had_to_push_lr)
21805 thumb_exit (asm_out_file, LR_REGNUM);
21809 /* Pop everything but the return address. */
21810 if (live_regs_mask)
21811 thumb_pop (asm_out_file, live_regs_mask);
21813 if (had_to_push_lr)
21817 /* We have no free low regs, so save one. */
21818 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21822 /* Get the return address into a temporary register. */
21823 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21827 /* Move the return address to lr. */
21828 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21830 /* Restore the low register. */
21831 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21836 regno = LAST_ARG_REGNUM;
21841 /* Remove the argument registers that were pushed onto the stack. */
21842 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21843 SP_REGNUM, SP_REGNUM,
21844 crtl->args.pretend_args_size);
21846 thumb_exit (asm_out_file, regno);
21852 /* Functions to save and restore machine-specific function data. */
21853 static struct machine_function *
21854 arm_init_machine_status (void)
21856 struct machine_function *machine;
21857 machine = ggc_alloc_cleared_machine_function ();
21859 #if ARM_FT_UNKNOWN != 0
21860 machine->func_type = ARM_FT_UNKNOWN;
21865 /* Return an RTX indicating where the return address to the
21866 calling function can be found. */
21868 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21873 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21876 /* Do anything needed before RTL is emitted for each function. */
21878 arm_init_expanders (void)
21880 /* Arrange to initialize and mark the machine per-function status. */
21881 init_machine_status = arm_init_machine_status;
21883 /* This is to stop the combine pass optimizing away the alignment
21884 adjustment of va_arg. */
21885 /* ??? It is claimed that this should not be necessary. */
21887 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21891 /* Like arm_compute_initial_elimination offset. Simpler because there
21892 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21893 to point at the base of the local variables after static stack
21894 space for a function has been allocated. */
21897 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21899 arm_stack_offsets *offsets;
21901 offsets = arm_get_frame_offsets ();
21905 case ARG_POINTER_REGNUM:
21908 case STACK_POINTER_REGNUM:
21909 return offsets->outgoing_args - offsets->saved_args;
21911 case FRAME_POINTER_REGNUM:
21912 return offsets->soft_frame - offsets->saved_args;
21914 case ARM_HARD_FRAME_POINTER_REGNUM:
21915 return offsets->saved_regs - offsets->saved_args;
21917 case THUMB_HARD_FRAME_POINTER_REGNUM:
21918 return offsets->locals_base - offsets->saved_args;
21921 gcc_unreachable ();
21925 case FRAME_POINTER_REGNUM:
21928 case STACK_POINTER_REGNUM:
21929 return offsets->outgoing_args - offsets->soft_frame;
21931 case ARM_HARD_FRAME_POINTER_REGNUM:
21932 return offsets->saved_regs - offsets->soft_frame;
21934 case THUMB_HARD_FRAME_POINTER_REGNUM:
21935 return offsets->locals_base - offsets->soft_frame;
21938 gcc_unreachable ();
21943 gcc_unreachable ();
21947 /* Generate the function's prologue. */
21950 thumb1_expand_prologue (void)
21954 HOST_WIDE_INT amount;
21955 arm_stack_offsets *offsets;
21956 unsigned long func_type;
21958 unsigned long live_regs_mask;
21959 unsigned long l_mask;
21960 unsigned high_regs_pushed = 0;
21962 func_type = arm_current_func_type ();
21964 /* Naked functions don't have prologues. */
21965 if (IS_NAKED (func_type))
21968 if (IS_INTERRUPT (func_type))
21970 error ("interrupt Service Routines cannot be coded in Thumb mode");
21974 if (is_called_in_ARM_mode (current_function_decl))
21975 emit_insn (gen_prologue_thumb1_interwork ());
21977 offsets = arm_get_frame_offsets ();
21978 live_regs_mask = offsets->saved_regs_mask;
21980 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21981 l_mask = live_regs_mask & 0x40ff;
21982 /* Then count how many other high registers will need to be pushed. */
21983 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21985 if (crtl->args.pretend_args_size)
21987 rtx x = GEN_INT (-crtl->args.pretend_args_size);
21989 if (cfun->machine->uses_anonymous_args)
21991 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21992 unsigned long mask;
21994 mask = 1ul << (LAST_ARG_REGNUM + 1);
21995 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
21997 insn = thumb1_emit_multi_reg_push (mask, 0);
22001 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22002 stack_pointer_rtx, x));
22004 RTX_FRAME_RELATED_P (insn) = 1;
22007 if (TARGET_BACKTRACE)
22009 HOST_WIDE_INT offset = 0;
22010 unsigned work_register;
22011 rtx work_reg, x, arm_hfp_rtx;
22013 /* We have been asked to create a stack backtrace structure.
22014 The code looks like this:
22018 0 sub SP, #16 Reserve space for 4 registers.
22019 2 push {R7} Push low registers.
22020 4 add R7, SP, #20 Get the stack pointer before the push.
22021 6 str R7, [SP, #8] Store the stack pointer
22022 (before reserving the space).
22023 8 mov R7, PC Get hold of the start of this code + 12.
22024 10 str R7, [SP, #16] Store it.
22025 12 mov R7, FP Get hold of the current frame pointer.
22026 14 str R7, [SP, #4] Store it.
22027 16 mov R7, LR Get hold of the current return address.
22028 18 str R7, [SP, #12] Store it.
22029 20 add R7, SP, #16 Point at the start of the
22030 backtrace structure.
22031 22 mov FP, R7 Put this value into the frame pointer. */
22033 work_register = thumb_find_work_register (live_regs_mask);
22034 work_reg = gen_rtx_REG (SImode, work_register);
22035 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22037 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22038 stack_pointer_rtx, GEN_INT (-16)));
22039 RTX_FRAME_RELATED_P (insn) = 1;
22043 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22044 RTX_FRAME_RELATED_P (insn) = 1;
22046 offset = bit_count (l_mask) * UNITS_PER_WORD;
22049 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22050 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22052 x = plus_constant (stack_pointer_rtx, offset + 4);
22053 x = gen_frame_mem (SImode, x);
22054 emit_move_insn (x, work_reg);
22056 /* Make sure that the instruction fetching the PC is in the right place
22057 to calculate "start of backtrace creation code + 12". */
22058 /* ??? The stores using the common WORK_REG ought to be enough to
22059 prevent the scheduler from doing anything weird. Failing that
22060 we could always move all of the following into an UNSPEC_VOLATILE. */
22063 x = gen_rtx_REG (SImode, PC_REGNUM);
22064 emit_move_insn (work_reg, x);
22066 x = plus_constant (stack_pointer_rtx, offset + 12);
22067 x = gen_frame_mem (SImode, x);
22068 emit_move_insn (x, work_reg);
22070 emit_move_insn (work_reg, arm_hfp_rtx);
22072 x = plus_constant (stack_pointer_rtx, offset);
22073 x = gen_frame_mem (SImode, x);
22074 emit_move_insn (x, work_reg);
22078 emit_move_insn (work_reg, arm_hfp_rtx);
22080 x = plus_constant (stack_pointer_rtx, offset);
22081 x = gen_frame_mem (SImode, x);
22082 emit_move_insn (x, work_reg);
22084 x = gen_rtx_REG (SImode, PC_REGNUM);
22085 emit_move_insn (work_reg, x);
22087 x = plus_constant (stack_pointer_rtx, offset + 12);
22088 x = gen_frame_mem (SImode, x);
22089 emit_move_insn (x, work_reg);
22092 x = gen_rtx_REG (SImode, LR_REGNUM);
22093 emit_move_insn (work_reg, x);
22095 x = plus_constant (stack_pointer_rtx, offset + 8);
22096 x = gen_frame_mem (SImode, x);
22097 emit_move_insn (x, work_reg);
22099 x = GEN_INT (offset + 12);
22100 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22102 emit_move_insn (arm_hfp_rtx, work_reg);
22104 /* Optimization: If we are not pushing any low registers but we are going
22105 to push some high registers then delay our first push. This will just
22106 be a push of LR and we can combine it with the push of the first high
22108 else if ((l_mask & 0xff) != 0
22109 || (high_regs_pushed == 0 && l_mask))
22111 unsigned long mask = l_mask;
22112 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22113 insn = thumb1_emit_multi_reg_push (mask, mask);
22114 RTX_FRAME_RELATED_P (insn) = 1;
22117 if (high_regs_pushed)
22119 unsigned pushable_regs;
22120 unsigned next_hi_reg;
22122 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22123 if (live_regs_mask & (1 << next_hi_reg))
22126 pushable_regs = l_mask & 0xff;
22128 if (pushable_regs == 0)
22129 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22131 while (high_regs_pushed > 0)
22133 unsigned long real_regs_mask = 0;
22135 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22137 if (pushable_regs & (1 << regno))
22139 emit_move_insn (gen_rtx_REG (SImode, regno),
22140 gen_rtx_REG (SImode, next_hi_reg));
22142 high_regs_pushed --;
22143 real_regs_mask |= (1 << next_hi_reg);
22145 if (high_regs_pushed)
22147 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22149 if (live_regs_mask & (1 << next_hi_reg))
22154 pushable_regs &= ~((1 << regno) - 1);
22160 /* If we had to find a work register and we have not yet
22161 saved the LR then add it to the list of regs to push. */
22162 if (l_mask == (1 << LR_REGNUM))
22164 pushable_regs |= l_mask;
22165 real_regs_mask |= l_mask;
22169 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22170 RTX_FRAME_RELATED_P (insn) = 1;
22174 /* Load the pic register before setting the frame pointer,
22175 so we can use r7 as a temporary work register. */
22176 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22177 arm_load_pic_register (live_regs_mask);
22179 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22180 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22181 stack_pointer_rtx);
22183 if (flag_stack_usage_info)
22184 current_function_static_stack_size
22185 = offsets->outgoing_args - offsets->saved_args;
22187 amount = offsets->outgoing_args - offsets->saved_regs;
22188 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22193 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22194 GEN_INT (- amount)));
22195 RTX_FRAME_RELATED_P (insn) = 1;
22201 /* The stack decrement is too big for an immediate value in a single
22202 insn. In theory we could issue multiple subtracts, but after
22203 three of them it becomes more space efficient to place the full
22204 value in the constant pool and load into a register. (Also the
22205 ARM debugger really likes to see only one stack decrement per
22206 function). So instead we look for a scratch register into which
22207 we can load the decrement, and then we subtract this from the
22208 stack pointer. Unfortunately on the thumb the only available
22209 scratch registers are the argument registers, and we cannot use
22210 these as they may hold arguments to the function. Instead we
22211 attempt to locate a call preserved register which is used by this
22212 function. If we can find one, then we know that it will have
22213 been pushed at the start of the prologue and so we can corrupt
22215 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22216 if (live_regs_mask & (1 << regno))
22219 gcc_assert(regno <= LAST_LO_REGNUM);
22221 reg = gen_rtx_REG (SImode, regno);
22223 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22225 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22226 stack_pointer_rtx, reg));
22228 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22229 plus_constant (stack_pointer_rtx,
22231 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22232 RTX_FRAME_RELATED_P (insn) = 1;
22236 if (frame_pointer_needed)
22237 thumb_set_frame_pointer (offsets);
22239 /* If we are profiling, make sure no instructions are scheduled before
22240 the call to mcount. Similarly if the user has requested no
22241 scheduling in the prolog. Similarly if we want non-call exceptions
22242 using the EABI unwinder, to prevent faulting instructions from being
22243 swapped with a stack adjustment. */
22244 if (crtl->profile || !TARGET_SCHED_PROLOG
22245 || (arm_except_unwind_info (&global_options) == UI_TARGET
22246 && cfun->can_throw_non_call_exceptions))
22247 emit_insn (gen_blockage ());
22249 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22250 if (live_regs_mask & 0xff)
22251 cfun->machine->lr_save_eliminated = 0;
22256 thumb1_expand_epilogue (void)
22258 HOST_WIDE_INT amount;
22259 arm_stack_offsets *offsets;
22262 /* Naked functions don't have prologues. */
22263 if (IS_NAKED (arm_current_func_type ()))
22266 offsets = arm_get_frame_offsets ();
22267 amount = offsets->outgoing_args - offsets->saved_regs;
22269 if (frame_pointer_needed)
22271 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22272 amount = offsets->locals_base - offsets->saved_regs;
22274 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22276 gcc_assert (amount >= 0);
22279 emit_insn (gen_blockage ());
22282 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22283 GEN_INT (amount)));
22286 /* r3 is always free in the epilogue. */
22287 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22289 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22290 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22294 /* Emit a USE (stack_pointer_rtx), so that
22295 the stack adjustment will not be deleted. */
22296 emit_insn (gen_prologue_use (stack_pointer_rtx));
22298 if (crtl->profile || !TARGET_SCHED_PROLOG)
22299 emit_insn (gen_blockage ());
22301 /* Emit a clobber for each insn that will be restored in the epilogue,
22302 so that flow2 will get register lifetimes correct. */
22303 for (regno = 0; regno < 13; regno++)
22304 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22305 emit_clobber (gen_rtx_REG (SImode, regno));
22307 if (! df_regs_ever_live_p (LR_REGNUM))
22308 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22311 /* Implementation of insn prologue_thumb1_interwork. This is the first
22312 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22315 thumb1_output_interwork (void)
22318 FILE *f = asm_out_file;
22320 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22321 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22323 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22325 /* Generate code sequence to switch us into Thumb mode. */
22326 /* The .code 32 directive has already been emitted by
22327 ASM_DECLARE_FUNCTION_NAME. */
22328 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22329 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22331 /* Generate a label, so that the debugger will notice the
22332 change in instruction sets. This label is also used by
22333 the assembler to bypass the ARM code when this function
22334 is called from a Thumb encoded function elsewhere in the
22335 same file. Hence the definition of STUB_NAME here must
22336 agree with the definition in gas/config/tc-arm.c. */
22338 #define STUB_NAME ".real_start_of"
22340 fprintf (f, "\t.code\t16\n");
22342 if (arm_dllexport_name_p (name))
22343 name = arm_strip_name_encoding (name);
22345 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22346 fprintf (f, "\t.thumb_func\n");
22347 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22352 /* Handle the case of a double word load into a low register from
22353 a computed memory address. The computed address may involve a
22354 register which is overwritten by the load. */
22356 thumb_load_double_from_address (rtx *operands)
22364 gcc_assert (GET_CODE (operands[0]) == REG);
22365 gcc_assert (GET_CODE (operands[1]) == MEM);
22367 /* Get the memory address. */
22368 addr = XEXP (operands[1], 0);
22370 /* Work out how the memory address is computed. */
22371 switch (GET_CODE (addr))
22374 operands[2] = adjust_address (operands[1], SImode, 4);
22376 if (REGNO (operands[0]) == REGNO (addr))
22378 output_asm_insn ("ldr\t%H0, %2", operands);
22379 output_asm_insn ("ldr\t%0, %1", operands);
22383 output_asm_insn ("ldr\t%0, %1", operands);
22384 output_asm_insn ("ldr\t%H0, %2", operands);
22389 /* Compute <address> + 4 for the high order load. */
22390 operands[2] = adjust_address (operands[1], SImode, 4);
22392 output_asm_insn ("ldr\t%0, %1", operands);
22393 output_asm_insn ("ldr\t%H0, %2", operands);
22397 arg1 = XEXP (addr, 0);
22398 arg2 = XEXP (addr, 1);
22400 if (CONSTANT_P (arg1))
22401 base = arg2, offset = arg1;
22403 base = arg1, offset = arg2;
22405 gcc_assert (GET_CODE (base) == REG);
22407 /* Catch the case of <address> = <reg> + <reg> */
22408 if (GET_CODE (offset) == REG)
22410 int reg_offset = REGNO (offset);
22411 int reg_base = REGNO (base);
22412 int reg_dest = REGNO (operands[0]);
22414 /* Add the base and offset registers together into the
22415 higher destination register. */
22416 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22417 reg_dest + 1, reg_base, reg_offset);
22419 /* Load the lower destination register from the address in
22420 the higher destination register. */
22421 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22422 reg_dest, reg_dest + 1);
22424 /* Load the higher destination register from its own address
22426 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22427 reg_dest + 1, reg_dest + 1);
22431 /* Compute <address> + 4 for the high order load. */
22432 operands[2] = adjust_address (operands[1], SImode, 4);
22434 /* If the computed address is held in the low order register
22435 then load the high order register first, otherwise always
22436 load the low order register first. */
22437 if (REGNO (operands[0]) == REGNO (base))
22439 output_asm_insn ("ldr\t%H0, %2", operands);
22440 output_asm_insn ("ldr\t%0, %1", operands);
22444 output_asm_insn ("ldr\t%0, %1", operands);
22445 output_asm_insn ("ldr\t%H0, %2", operands);
22451 /* With no registers to worry about we can just load the value
22453 operands[2] = adjust_address (operands[1], SImode, 4);
22455 output_asm_insn ("ldr\t%H0, %2", operands);
22456 output_asm_insn ("ldr\t%0, %1", operands);
22460 gcc_unreachable ();
22467 thumb_output_move_mem_multiple (int n, rtx *operands)
22474 if (REGNO (operands[4]) > REGNO (operands[5]))
22477 operands[4] = operands[5];
22480 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22481 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22485 if (REGNO (operands[4]) > REGNO (operands[5]))
22488 operands[4] = operands[5];
22491 if (REGNO (operands[5]) > REGNO (operands[6]))
22494 operands[5] = operands[6];
22497 if (REGNO (operands[4]) > REGNO (operands[5]))
22500 operands[4] = operands[5];
22504 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22505 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22509 gcc_unreachable ();
22515 /* Output a call-via instruction for thumb state. */
22517 thumb_call_via_reg (rtx reg)
22519 int regno = REGNO (reg);
22522 gcc_assert (regno < LR_REGNUM);
22524 /* If we are in the normal text section we can use a single instance
22525 per compilation unit. If we are doing function sections, then we need
22526 an entry per section, since we can't rely on reachability. */
22527 if (in_section == text_section)
22529 thumb_call_reg_needed = 1;
22531 if (thumb_call_via_label[regno] == NULL)
22532 thumb_call_via_label[regno] = gen_label_rtx ();
22533 labelp = thumb_call_via_label + regno;
22537 if (cfun->machine->call_via[regno] == NULL)
22538 cfun->machine->call_via[regno] = gen_label_rtx ();
22539 labelp = cfun->machine->call_via + regno;
22542 output_asm_insn ("bl\t%a0", labelp);
22546 /* Routines for generating rtl. */
22548 thumb_expand_movmemqi (rtx *operands)
22550 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22551 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22552 HOST_WIDE_INT len = INTVAL (operands[2]);
22553 HOST_WIDE_INT offset = 0;
22557 emit_insn (gen_movmem12b (out, in, out, in));
22563 emit_insn (gen_movmem8b (out, in, out, in));
22569 rtx reg = gen_reg_rtx (SImode);
22570 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22571 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22578 rtx reg = gen_reg_rtx (HImode);
22579 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22580 plus_constant (in, offset))));
22581 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22589 rtx reg = gen_reg_rtx (QImode);
22590 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22591 plus_constant (in, offset))));
22592 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22598 thumb_reload_out_hi (rtx *operands)
22600 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22603 /* Handle reading a half-word from memory during reload. */
22605 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22607 gcc_unreachable ();
22610 /* Return the length of a function name prefix
22611 that starts with the character 'c'. */
22613 arm_get_strip_length (int c)
22617 ARM_NAME_ENCODING_LENGTHS
22622 /* Return a pointer to a function's name with any
22623 and all prefix encodings stripped from it. */
22625 arm_strip_name_encoding (const char *name)
22629 while ((skip = arm_get_strip_length (* name)))
22635 /* If there is a '*' anywhere in the name's prefix, then
22636 emit the stripped name verbatim, otherwise prepend an
22637 underscore if leading underscores are being used. */
22639 arm_asm_output_labelref (FILE *stream, const char *name)
22644 while ((skip = arm_get_strip_length (* name)))
22646 verbatim |= (*name == '*');
22651 fputs (name, stream);
22653 asm_fprintf (stream, "%U%s", name);
22657 arm_file_start (void)
22661 if (TARGET_UNIFIED_ASM)
22662 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22666 const char *fpu_name;
22667 if (arm_selected_arch)
22668 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22669 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22670 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22672 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22674 if (TARGET_SOFT_FLOAT)
22677 fpu_name = "softvfp";
22679 fpu_name = "softfpa";
22683 fpu_name = arm_fpu_desc->name;
22684 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22686 if (TARGET_HARD_FLOAT)
22687 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22688 if (TARGET_HARD_FLOAT_ABI)
22689 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22692 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22694 /* Some of these attributes only apply when the corresponding features
22695 are used. However we don't have any easy way of figuring this out.
22696 Conservatively record the setting that would have been used. */
22698 if (flag_rounding_math)
22699 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22701 if (!flag_unsafe_math_optimizations)
22703 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22704 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22706 if (flag_signaling_nans)
22707 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22709 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22710 flag_finite_math_only ? 1 : 3);
22712 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22713 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22714 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22716 /* Tag_ABI_optimization_goals. */
22719 else if (optimize >= 2)
22725 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22727 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22729 if (arm_fp16_format)
22730 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22732 if (arm_lang_output_object_attributes_hook)
22733 arm_lang_output_object_attributes_hook();
22736 default_file_start ();
22740 arm_file_end (void)
22744 if (NEED_INDICATE_EXEC_STACK)
22745 /* Add .note.GNU-stack. */
22746 file_end_indicate_exec_stack ();
22748 if (! thumb_call_reg_needed)
22751 switch_to_section (text_section);
22752 asm_fprintf (asm_out_file, "\t.code 16\n");
22753 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22755 for (regno = 0; regno < LR_REGNUM; regno++)
22757 rtx label = thumb_call_via_label[regno];
22761 targetm.asm_out.internal_label (asm_out_file, "L",
22762 CODE_LABEL_NUMBER (label));
22763 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22769 /* Symbols in the text segment can be accessed without indirecting via the
22770 constant pool; it may take an extra binary operation, but this is still
22771 faster than indirecting via memory. Don't do this when not optimizing,
22772 since we won't be calculating al of the offsets necessary to do this
22776 arm_encode_section_info (tree decl, rtx rtl, int first)
22778 if (optimize > 0 && TREE_CONSTANT (decl))
22779 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22781 default_encode_section_info (decl, rtl, first);
22783 #endif /* !ARM_PE */
22786 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22788 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22789 && !strcmp (prefix, "L"))
22791 arm_ccfsm_state = 0;
22792 arm_target_insn = NULL;
22794 default_internal_label (stream, prefix, labelno);
22797 /* Output code to add DELTA to the first argument, and then jump
22798 to FUNCTION. Used for C++ multiple inheritance. */
22800 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22801 HOST_WIDE_INT delta,
22802 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22805 static int thunk_label = 0;
22808 int mi_delta = delta;
22809 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22811 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22814 mi_delta = - mi_delta;
22818 int labelno = thunk_label++;
22819 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22820 /* Thunks are entered in arm mode when avaiable. */
22821 if (TARGET_THUMB1_ONLY)
22823 /* push r3 so we can use it as a temporary. */
22824 /* TODO: Omit this save if r3 is not used. */
22825 fputs ("\tpush {r3}\n", file);
22826 fputs ("\tldr\tr3, ", file);
22830 fputs ("\tldr\tr12, ", file);
22832 assemble_name (file, label);
22833 fputc ('\n', file);
22836 /* If we are generating PIC, the ldr instruction below loads
22837 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22838 the address of the add + 8, so we have:
22840 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22843 Note that we have "+ 1" because some versions of GNU ld
22844 don't set the low bit of the result for R_ARM_REL32
22845 relocations against thumb function symbols.
22846 On ARMv6M this is +4, not +8. */
22847 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22848 assemble_name (file, labelpc);
22849 fputs (":\n", file);
22850 if (TARGET_THUMB1_ONLY)
22852 /* This is 2 insns after the start of the thunk, so we know it
22853 is 4-byte aligned. */
22854 fputs ("\tadd\tr3, pc, r3\n", file);
22855 fputs ("\tmov r12, r3\n", file);
22858 fputs ("\tadd\tr12, pc, r12\n", file);
22860 else if (TARGET_THUMB1_ONLY)
22861 fputs ("\tmov r12, r3\n", file);
22863 if (TARGET_THUMB1_ONLY)
22865 if (mi_delta > 255)
22867 fputs ("\tldr\tr3, ", file);
22868 assemble_name (file, label);
22869 fputs ("+4\n", file);
22870 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22871 mi_op, this_regno, this_regno);
22873 else if (mi_delta != 0)
22875 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22876 mi_op, this_regno, this_regno,
22882 /* TODO: Use movw/movt for large constants when available. */
22883 while (mi_delta != 0)
22885 if ((mi_delta & (3 << shift)) == 0)
22889 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22890 mi_op, this_regno, this_regno,
22891 mi_delta & (0xff << shift));
22892 mi_delta &= ~(0xff << shift);
22899 if (TARGET_THUMB1_ONLY)
22900 fputs ("\tpop\t{r3}\n", file);
22902 fprintf (file, "\tbx\tr12\n");
22903 ASM_OUTPUT_ALIGN (file, 2);
22904 assemble_name (file, label);
22905 fputs (":\n", file);
22908 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22909 rtx tem = XEXP (DECL_RTL (function), 0);
22910 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22911 tem = gen_rtx_MINUS (GET_MODE (tem),
22913 gen_rtx_SYMBOL_REF (Pmode,
22914 ggc_strdup (labelpc)));
22915 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22918 /* Output ".word .LTHUNKn". */
22919 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
22921 if (TARGET_THUMB1_ONLY && mi_delta > 255)
22922 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
22926 fputs ("\tb\t", file);
22927 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
22928 if (NEED_PLT_RELOC)
22929 fputs ("(PLT)", file);
22930 fputc ('\n', file);
22935 arm_emit_vector_const (FILE *file, rtx x)
22938 const char * pattern;
22940 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22942 switch (GET_MODE (x))
22944 case V2SImode: pattern = "%08x"; break;
22945 case V4HImode: pattern = "%04x"; break;
22946 case V8QImode: pattern = "%02x"; break;
22947 default: gcc_unreachable ();
22950 fprintf (file, "0x");
22951 for (i = CONST_VECTOR_NUNITS (x); i--;)
22955 element = CONST_VECTOR_ELT (x, i);
22956 fprintf (file, pattern, INTVAL (element));
22962 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22963 HFmode constant pool entries are actually loaded with ldr. */
22965 arm_emit_fp16_const (rtx c)
22970 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22971 bits = real_to_target (NULL, &r, HFmode);
22972 if (WORDS_BIG_ENDIAN)
22973 assemble_zeros (2);
22974 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22975 if (!WORDS_BIG_ENDIAN)
22976 assemble_zeros (2);
22980 arm_output_load_gr (rtx *operands)
22987 if (GET_CODE (operands [1]) != MEM
22988 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22989 || GET_CODE (reg = XEXP (sum, 0)) != REG
22990 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22991 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22992 return "wldrw%?\t%0, %1";
22994 /* Fix up an out-of-range load of a GR register. */
22995 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22996 wcgr = operands[0];
22998 output_asm_insn ("ldr%?\t%0, %1", operands);
23000 operands[0] = wcgr;
23002 output_asm_insn ("tmcr%?\t%0, %1", operands);
23003 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23008 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23010 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23011 named arg and all anonymous args onto the stack.
23012 XXX I know the prologue shouldn't be pushing registers, but it is faster
23016 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23017 enum machine_mode mode,
23020 int second_time ATTRIBUTE_UNUSED)
23022 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23025 cfun->machine->uses_anonymous_args = 1;
23026 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23028 nregs = pcum->aapcs_ncrn;
23029 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23033 nregs = pcum->nregs;
23035 if (nregs < NUM_ARG_REGS)
23036 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23039 /* Return nonzero if the CONSUMER instruction (a store) does not need
23040 PRODUCER's value to calculate the address. */
23043 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23045 rtx value = PATTERN (producer);
23046 rtx addr = PATTERN (consumer);
23048 if (GET_CODE (value) == COND_EXEC)
23049 value = COND_EXEC_CODE (value);
23050 if (GET_CODE (value) == PARALLEL)
23051 value = XVECEXP (value, 0, 0);
23052 value = XEXP (value, 0);
23053 if (GET_CODE (addr) == COND_EXEC)
23054 addr = COND_EXEC_CODE (addr);
23055 if (GET_CODE (addr) == PARALLEL)
23056 addr = XVECEXP (addr, 0, 0);
23057 addr = XEXP (addr, 0);
23059 return !reg_overlap_mentioned_p (value, addr);
23062 /* Return nonzero if the CONSUMER instruction (a store) does need
23063 PRODUCER's value to calculate the address. */
23066 arm_early_store_addr_dep (rtx producer, rtx consumer)
23068 return !arm_no_early_store_addr_dep (producer, consumer);
23071 /* Return nonzero if the CONSUMER instruction (a load) does need
23072 PRODUCER's value to calculate the address. */
23075 arm_early_load_addr_dep (rtx producer, rtx consumer)
23077 rtx value = PATTERN (producer);
23078 rtx addr = PATTERN (consumer);
23080 if (GET_CODE (value) == COND_EXEC)
23081 value = COND_EXEC_CODE (value);
23082 if (GET_CODE (value) == PARALLEL)
23083 value = XVECEXP (value, 0, 0);
23084 value = XEXP (value, 0);
23085 if (GET_CODE (addr) == COND_EXEC)
23086 addr = COND_EXEC_CODE (addr);
23087 if (GET_CODE (addr) == PARALLEL)
23088 addr = XVECEXP (addr, 0, 0);
23089 addr = XEXP (addr, 1);
23091 return reg_overlap_mentioned_p (value, addr);
23094 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23095 have an early register shift value or amount dependency on the
23096 result of PRODUCER. */
23099 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23101 rtx value = PATTERN (producer);
23102 rtx op = PATTERN (consumer);
23105 if (GET_CODE (value) == COND_EXEC)
23106 value = COND_EXEC_CODE (value);
23107 if (GET_CODE (value) == PARALLEL)
23108 value = XVECEXP (value, 0, 0);
23109 value = XEXP (value, 0);
23110 if (GET_CODE (op) == COND_EXEC)
23111 op = COND_EXEC_CODE (op);
23112 if (GET_CODE (op) == PARALLEL)
23113 op = XVECEXP (op, 0, 0);
23116 early_op = XEXP (op, 0);
23117 /* This is either an actual independent shift, or a shift applied to
23118 the first operand of another operation. We want the whole shift
23120 if (GET_CODE (early_op) == REG)
23123 return !reg_overlap_mentioned_p (value, early_op);
23126 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23127 have an early register shift value dependency on the result of
23131 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23133 rtx value = PATTERN (producer);
23134 rtx op = PATTERN (consumer);
23137 if (GET_CODE (value) == COND_EXEC)
23138 value = COND_EXEC_CODE (value);
23139 if (GET_CODE (value) == PARALLEL)
23140 value = XVECEXP (value, 0, 0);
23141 value = XEXP (value, 0);
23142 if (GET_CODE (op) == COND_EXEC)
23143 op = COND_EXEC_CODE (op);
23144 if (GET_CODE (op) == PARALLEL)
23145 op = XVECEXP (op, 0, 0);
23148 early_op = XEXP (op, 0);
23150 /* This is either an actual independent shift, or a shift applied to
23151 the first operand of another operation. We want the value being
23152 shifted, in either case. */
23153 if (GET_CODE (early_op) != REG)
23154 early_op = XEXP (early_op, 0);
23156 return !reg_overlap_mentioned_p (value, early_op);
23159 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23160 have an early register mult dependency on the result of
23164 arm_no_early_mul_dep (rtx producer, rtx consumer)
23166 rtx value = PATTERN (producer);
23167 rtx op = PATTERN (consumer);
23169 if (GET_CODE (value) == COND_EXEC)
23170 value = COND_EXEC_CODE (value);
23171 if (GET_CODE (value) == PARALLEL)
23172 value = XVECEXP (value, 0, 0);
23173 value = XEXP (value, 0);
23174 if (GET_CODE (op) == COND_EXEC)
23175 op = COND_EXEC_CODE (op);
23176 if (GET_CODE (op) == PARALLEL)
23177 op = XVECEXP (op, 0, 0);
23180 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23182 if (GET_CODE (XEXP (op, 0)) == MULT)
23183 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23185 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23191 /* We can't rely on the caller doing the proper promotion when
23192 using APCS or ATPCS. */
23195 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23197 return !TARGET_AAPCS_BASED;
23200 static enum machine_mode
23201 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23202 enum machine_mode mode,
23203 int *punsignedp ATTRIBUTE_UNUSED,
23204 const_tree fntype ATTRIBUTE_UNUSED,
23205 int for_return ATTRIBUTE_UNUSED)
23207 if (GET_MODE_CLASS (mode) == MODE_INT
23208 && GET_MODE_SIZE (mode) < 4)
23214 /* AAPCS based ABIs use short enums by default. */
23217 arm_default_short_enums (void)
23219 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23223 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23226 arm_align_anon_bitfield (void)
23228 return TARGET_AAPCS_BASED;
23232 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23235 arm_cxx_guard_type (void)
23237 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23240 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23241 has an accumulator dependency on the result of the producer (a
23242 multiplication instruction) and no other dependency on that result. */
23244 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23246 rtx mul = PATTERN (producer);
23247 rtx mac = PATTERN (consumer);
23249 rtx mac_op0, mac_op1, mac_acc;
23251 if (GET_CODE (mul) == COND_EXEC)
23252 mul = COND_EXEC_CODE (mul);
23253 if (GET_CODE (mac) == COND_EXEC)
23254 mac = COND_EXEC_CODE (mac);
23256 /* Check that mul is of the form (set (...) (mult ...))
23257 and mla is of the form (set (...) (plus (mult ...) (...))). */
23258 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23259 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23260 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23263 mul_result = XEXP (mul, 0);
23264 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23265 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23266 mac_acc = XEXP (XEXP (mac, 1), 1);
23268 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23269 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23270 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23274 /* The EABI says test the least significant bit of a guard variable. */
23277 arm_cxx_guard_mask_bit (void)
23279 return TARGET_AAPCS_BASED;
23283 /* The EABI specifies that all array cookies are 8 bytes long. */
23286 arm_get_cookie_size (tree type)
23290 if (!TARGET_AAPCS_BASED)
23291 return default_cxx_get_cookie_size (type);
23293 size = build_int_cst (sizetype, 8);
23298 /* The EABI says that array cookies should also contain the element size. */
23301 arm_cookie_has_size (void)
23303 return TARGET_AAPCS_BASED;
23307 /* The EABI says constructors and destructors should return a pointer to
23308 the object constructed/destroyed. */
23311 arm_cxx_cdtor_returns_this (void)
23313 return TARGET_AAPCS_BASED;
23316 /* The EABI says that an inline function may never be the key
23320 arm_cxx_key_method_may_be_inline (void)
23322 return !TARGET_AAPCS_BASED;
23326 arm_cxx_determine_class_data_visibility (tree decl)
23328 if (!TARGET_AAPCS_BASED
23329 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23332 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23333 is exported. However, on systems without dynamic vague linkage,
23334 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23335 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23336 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23338 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23339 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23343 arm_cxx_class_data_always_comdat (void)
23345 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23346 vague linkage if the class has no key function. */
23347 return !TARGET_AAPCS_BASED;
23351 /* The EABI says __aeabi_atexit should be used to register static
23355 arm_cxx_use_aeabi_atexit (void)
23357 return TARGET_AAPCS_BASED;
23362 arm_set_return_address (rtx source, rtx scratch)
23364 arm_stack_offsets *offsets;
23365 HOST_WIDE_INT delta;
23367 unsigned long saved_regs;
23369 offsets = arm_get_frame_offsets ();
23370 saved_regs = offsets->saved_regs_mask;
23372 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23373 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23376 if (frame_pointer_needed)
23377 addr = plus_constant(hard_frame_pointer_rtx, -4);
23380 /* LR will be the first saved register. */
23381 delta = offsets->outgoing_args - (offsets->frame + 4);
23386 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23387 GEN_INT (delta & ~4095)));
23392 addr = stack_pointer_rtx;
23394 addr = plus_constant (addr, delta);
23396 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23402 thumb_set_return_address (rtx source, rtx scratch)
23404 arm_stack_offsets *offsets;
23405 HOST_WIDE_INT delta;
23406 HOST_WIDE_INT limit;
23409 unsigned long mask;
23413 offsets = arm_get_frame_offsets ();
23414 mask = offsets->saved_regs_mask;
23415 if (mask & (1 << LR_REGNUM))
23418 /* Find the saved regs. */
23419 if (frame_pointer_needed)
23421 delta = offsets->soft_frame - offsets->saved_args;
23422 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23428 delta = offsets->outgoing_args - offsets->saved_args;
23431 /* Allow for the stack frame. */
23432 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23434 /* The link register is always the first saved register. */
23437 /* Construct the address. */
23438 addr = gen_rtx_REG (SImode, reg);
23441 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23442 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23446 addr = plus_constant (addr, delta);
23448 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23451 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23454 /* Implements target hook vector_mode_supported_p. */
23456 arm_vector_mode_supported_p (enum machine_mode mode)
23458 /* Neon also supports V2SImode, etc. listed in the clause below. */
23459 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23460 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23463 if ((TARGET_NEON || TARGET_IWMMXT)
23464 && ((mode == V2SImode)
23465 || (mode == V4HImode)
23466 || (mode == V8QImode)))
23469 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23470 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23471 || mode == V2HAmode))
23477 /* Implements target hook array_mode_supported_p. */
23480 arm_array_mode_supported_p (enum machine_mode mode,
23481 unsigned HOST_WIDE_INT nelems)
23484 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23485 && (nelems >= 2 && nelems <= 4))
23491 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23492 registers when autovectorizing for Neon, at least until multiple vector
23493 widths are supported properly by the middle-end. */
23495 static enum machine_mode
23496 arm_preferred_simd_mode (enum machine_mode mode)
23502 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23504 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23506 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23508 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23510 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23517 if (TARGET_REALLY_IWMMXT)
23533 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23535 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23536 using r0-r4 for function arguments, r7 for the stack frame and don't have
23537 enough left over to do doubleword arithmetic. For Thumb-2 all the
23538 potentially problematic instructions accept high registers so this is not
23539 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23540 that require many low registers. */
23542 arm_class_likely_spilled_p (reg_class_t rclass)
23544 if ((TARGET_THUMB1 && rclass == LO_REGS)
23545 || rclass == CC_REG)
23551 /* Implements target hook small_register_classes_for_mode_p. */
23553 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23555 return TARGET_THUMB1;
23558 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23559 ARM insns and therefore guarantee that the shift count is modulo 256.
23560 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23561 guarantee no particular behavior for out-of-range counts. */
23563 static unsigned HOST_WIDE_INT
23564 arm_shift_truncation_mask (enum machine_mode mode)
23566 return mode == SImode ? 255 : 0;
23570 /* Map internal gcc register numbers to DWARF2 register numbers. */
23573 arm_dbx_register_number (unsigned int regno)
23578 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23579 compatibility. The EABI defines them as registers 96-103. */
23580 if (IS_FPA_REGNUM (regno))
23581 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23583 if (IS_VFP_REGNUM (regno))
23585 /* See comment in arm_dwarf_register_span. */
23586 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23587 return 64 + regno - FIRST_VFP_REGNUM;
23589 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23592 if (IS_IWMMXT_GR_REGNUM (regno))
23593 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23595 if (IS_IWMMXT_REGNUM (regno))
23596 return 112 + regno - FIRST_IWMMXT_REGNUM;
23598 gcc_unreachable ();
23601 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23602 GCC models tham as 64 32-bit registers, so we need to describe this to
23603 the DWARF generation code. Other registers can use the default. */
23605 arm_dwarf_register_span (rtx rtl)
23612 regno = REGNO (rtl);
23613 if (!IS_VFP_REGNUM (regno))
23616 /* XXX FIXME: The EABI defines two VFP register ranges:
23617 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23619 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23620 corresponding D register. Until GDB supports this, we shall use the
23621 legacy encodings. We also use these encodings for D0-D15 for
23622 compatibility with older debuggers. */
23623 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23626 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23627 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23628 regno = (regno - FIRST_VFP_REGNUM) / 2;
23629 for (i = 0; i < nregs; i++)
23630 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23635 #if ARM_UNWIND_INFO
23636 /* Emit unwind directives for a store-multiple instruction or stack pointer
23637 push during alignment.
23638 These should only ever be generated by the function prologue code, so
23639 expect them to have a particular form. */
23642 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23645 HOST_WIDE_INT offset;
23646 HOST_WIDE_INT nregs;
23652 e = XVECEXP (p, 0, 0);
23653 if (GET_CODE (e) != SET)
23656 /* First insn will adjust the stack pointer. */
23657 if (GET_CODE (e) != SET
23658 || GET_CODE (XEXP (e, 0)) != REG
23659 || REGNO (XEXP (e, 0)) != SP_REGNUM
23660 || GET_CODE (XEXP (e, 1)) != PLUS)
23663 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23664 nregs = XVECLEN (p, 0) - 1;
23666 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23669 /* The function prologue may also push pc, but not annotate it as it is
23670 never restored. We turn this into a stack pointer adjustment. */
23671 if (nregs * 4 == offset - 4)
23673 fprintf (asm_out_file, "\t.pad #4\n");
23677 fprintf (asm_out_file, "\t.save {");
23679 else if (IS_VFP_REGNUM (reg))
23682 fprintf (asm_out_file, "\t.vsave {");
23684 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23686 /* FPA registers are done differently. */
23687 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23691 /* Unknown register type. */
23694 /* If the stack increment doesn't match the size of the saved registers,
23695 something has gone horribly wrong. */
23696 if (offset != nregs * reg_size)
23701 /* The remaining insns will describe the stores. */
23702 for (i = 1; i <= nregs; i++)
23704 /* Expect (set (mem <addr>) (reg)).
23705 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23706 e = XVECEXP (p, 0, i);
23707 if (GET_CODE (e) != SET
23708 || GET_CODE (XEXP (e, 0)) != MEM
23709 || GET_CODE (XEXP (e, 1)) != REG)
23712 reg = REGNO (XEXP (e, 1));
23717 fprintf (asm_out_file, ", ");
23718 /* We can't use %r for vfp because we need to use the
23719 double precision register names. */
23720 if (IS_VFP_REGNUM (reg))
23721 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23723 asm_fprintf (asm_out_file, "%r", reg);
23725 #ifdef ENABLE_CHECKING
23726 /* Check that the addresses are consecutive. */
23727 e = XEXP (XEXP (e, 0), 0);
23728 if (GET_CODE (e) == PLUS)
23730 offset += reg_size;
23731 if (GET_CODE (XEXP (e, 0)) != REG
23732 || REGNO (XEXP (e, 0)) != SP_REGNUM
23733 || GET_CODE (XEXP (e, 1)) != CONST_INT
23734 || offset != INTVAL (XEXP (e, 1)))
23738 || GET_CODE (e) != REG
23739 || REGNO (e) != SP_REGNUM)
23743 fprintf (asm_out_file, "}\n");
23746 /* Emit unwind directives for a SET. */
23749 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23757 switch (GET_CODE (e0))
23760 /* Pushing a single register. */
23761 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23762 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23763 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23766 asm_fprintf (asm_out_file, "\t.save ");
23767 if (IS_VFP_REGNUM (REGNO (e1)))
23768 asm_fprintf(asm_out_file, "{d%d}\n",
23769 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23771 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23775 if (REGNO (e0) == SP_REGNUM)
23777 /* A stack increment. */
23778 if (GET_CODE (e1) != PLUS
23779 || GET_CODE (XEXP (e1, 0)) != REG
23780 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23781 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23784 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23785 -INTVAL (XEXP (e1, 1)));
23787 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23789 HOST_WIDE_INT offset;
23791 if (GET_CODE (e1) == PLUS)
23793 if (GET_CODE (XEXP (e1, 0)) != REG
23794 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23796 reg = REGNO (XEXP (e1, 0));
23797 offset = INTVAL (XEXP (e1, 1));
23798 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23799 HARD_FRAME_POINTER_REGNUM, reg,
23802 else if (GET_CODE (e1) == REG)
23805 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23806 HARD_FRAME_POINTER_REGNUM, reg);
23811 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23813 /* Move from sp to reg. */
23814 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23816 else if (GET_CODE (e1) == PLUS
23817 && GET_CODE (XEXP (e1, 0)) == REG
23818 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23819 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23821 /* Set reg to offset from sp. */
23822 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23823 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23835 /* Emit unwind directives for the given insn. */
23838 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23841 bool handled_one = false;
23843 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23846 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23847 && (TREE_NOTHROW (current_function_decl)
23848 || crtl->all_throwers_are_sibcalls))
23851 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23854 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23856 pat = XEXP (note, 0);
23857 switch (REG_NOTE_KIND (note))
23859 case REG_FRAME_RELATED_EXPR:
23862 case REG_CFA_REGISTER:
23865 pat = PATTERN (insn);
23866 if (GET_CODE (pat) == PARALLEL)
23867 pat = XVECEXP (pat, 0, 0);
23870 /* Only emitted for IS_STACKALIGN re-alignment. */
23875 src = SET_SRC (pat);
23876 dest = SET_DEST (pat);
23878 gcc_assert (src == stack_pointer_rtx);
23879 reg = REGNO (dest);
23880 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23883 handled_one = true;
23886 case REG_CFA_DEF_CFA:
23887 case REG_CFA_EXPRESSION:
23888 case REG_CFA_ADJUST_CFA:
23889 case REG_CFA_OFFSET:
23890 /* ??? Only handling here what we actually emit. */
23891 gcc_unreachable ();
23899 pat = PATTERN (insn);
23902 switch (GET_CODE (pat))
23905 arm_unwind_emit_set (asm_out_file, pat);
23909 /* Store multiple. */
23910 arm_unwind_emit_sequence (asm_out_file, pat);
23919 /* Output a reference from a function exception table to the type_info
23920 object X. The EABI specifies that the symbol should be relocated by
23921 an R_ARM_TARGET2 relocation. */
23924 arm_output_ttype (rtx x)
23926 fputs ("\t.word\t", asm_out_file);
23927 output_addr_const (asm_out_file, x);
23928 /* Use special relocations for symbol references. */
23929 if (GET_CODE (x) != CONST_INT)
23930 fputs ("(TARGET2)", asm_out_file);
23931 fputc ('\n', asm_out_file);
23936 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23939 arm_asm_emit_except_personality (rtx personality)
23941 fputs ("\t.personality\t", asm_out_file);
23942 output_addr_const (asm_out_file, personality);
23943 fputc ('\n', asm_out_file);
23946 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23949 arm_asm_init_sections (void)
23951 exception_section = get_unnamed_section (0, output_section_asm_op,
23954 #endif /* ARM_UNWIND_INFO */
23956 /* Output unwind directives for the start/end of a function. */
23959 arm_output_fn_unwind (FILE * f, bool prologue)
23961 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23965 fputs ("\t.fnstart\n", f);
23968 /* If this function will never be unwound, then mark it as such.
23969 The came condition is used in arm_unwind_emit to suppress
23970 the frame annotations. */
23971 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23972 && (TREE_NOTHROW (current_function_decl)
23973 || crtl->all_throwers_are_sibcalls))
23974 fputs("\t.cantunwind\n", f);
23976 fputs ("\t.fnend\n", f);
23981 arm_emit_tls_decoration (FILE *fp, rtx x)
23983 enum tls_reloc reloc;
23986 val = XVECEXP (x, 0, 0);
23987 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23989 output_addr_const (fp, val);
23994 fputs ("(tlsgd)", fp);
23997 fputs ("(tlsldm)", fp);
24000 fputs ("(tlsldo)", fp);
24003 fputs ("(gottpoff)", fp);
24006 fputs ("(tpoff)", fp);
24009 fputs ("(tlsdesc)", fp);
24012 gcc_unreachable ();
24021 fputs (" + (. - ", fp);
24022 output_addr_const (fp, XVECEXP (x, 0, 2));
24023 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24024 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24025 output_addr_const (fp, XVECEXP (x, 0, 3));
24035 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24038 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24040 gcc_assert (size == 4);
24041 fputs ("\t.word\t", file);
24042 output_addr_const (file, x);
24043 fputs ("(tlsldo)", file);
24046 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24049 arm_output_addr_const_extra (FILE *fp, rtx x)
24051 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24052 return arm_emit_tls_decoration (fp, x);
24053 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24056 int labelno = INTVAL (XVECEXP (x, 0, 0));
24058 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24059 assemble_name_raw (fp, label);
24063 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24065 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24069 output_addr_const (fp, XVECEXP (x, 0, 0));
24073 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24075 output_addr_const (fp, XVECEXP (x, 0, 0));
24079 output_addr_const (fp, XVECEXP (x, 0, 1));
24083 else if (GET_CODE (x) == CONST_VECTOR)
24084 return arm_emit_vector_const (fp, x);
24089 /* Output assembly for a shift instruction.
24090 SET_FLAGS determines how the instruction modifies the condition codes.
24091 0 - Do not set condition codes.
24092 1 - Set condition codes.
24093 2 - Use smallest instruction. */
24095 arm_output_shift(rtx * operands, int set_flags)
24098 static const char flag_chars[3] = {'?', '.', '!'};
24103 c = flag_chars[set_flags];
24104 if (TARGET_UNIFIED_ASM)
24106 shift = shift_op(operands[3], &val);
24110 operands[2] = GEN_INT(val);
24111 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24114 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24117 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24118 output_asm_insn (pattern, operands);
24122 /* Output a Thumb-1 casesi dispatch sequence. */
24124 thumb1_output_casesi (rtx *operands)
24126 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24128 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24130 switch (GET_MODE(diff_vec))
24133 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24134 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24136 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24137 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24139 return "bl\t%___gnu_thumb1_case_si";
24141 gcc_unreachable ();
24145 /* Output a Thumb-2 casesi instruction. */
24147 thumb2_output_casesi (rtx *operands)
24149 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24151 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24153 output_asm_insn ("cmp\t%0, %1", operands);
24154 output_asm_insn ("bhi\t%l3", operands);
24155 switch (GET_MODE(diff_vec))
24158 return "tbb\t[%|pc, %0]";
24160 return "tbh\t[%|pc, %0, lsl #1]";
24164 output_asm_insn ("adr\t%4, %l2", operands);
24165 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24166 output_asm_insn ("add\t%4, %4, %5", operands);
24171 output_asm_insn ("adr\t%4, %l2", operands);
24172 return "ldr\t%|pc, [%4, %0, lsl #2]";
24175 gcc_unreachable ();
24179 /* Most ARM cores are single issue, but some newer ones can dual issue.
24180 The scheduler descriptions rely on this being correct. */
24182 arm_issue_rate (void)
24204 /* A table and a function to perform ARM-specific name mangling for
24205 NEON vector types in order to conform to the AAPCS (see "Procedure
24206 Call Standard for the ARM Architecture", Appendix A). To qualify
24207 for emission with the mangled names defined in that document, a
24208 vector type must not only be of the correct mode but also be
24209 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24212 enum machine_mode mode;
24213 const char *element_type_name;
24214 const char *aapcs_name;
24215 } arm_mangle_map_entry;
24217 static arm_mangle_map_entry arm_mangle_map[] = {
24218 /* 64-bit containerized types. */
24219 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24220 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24221 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24222 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24223 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24224 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24225 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24226 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24227 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24228 /* 128-bit containerized types. */
24229 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24230 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24231 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24232 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24233 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24234 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24235 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24236 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24237 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24238 { VOIDmode, NULL, NULL }
24242 arm_mangle_type (const_tree type)
24244 arm_mangle_map_entry *pos = arm_mangle_map;
24246 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24247 has to be managled as if it is in the "std" namespace. */
24248 if (TARGET_AAPCS_BASED
24249 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24251 static bool warned;
24252 if (!warned && warn_psabi && !in_system_header)
24255 inform (input_location,
24256 "the mangling of %<va_list%> has changed in GCC 4.4");
24258 return "St9__va_list";
24261 /* Half-precision float. */
24262 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24265 if (TREE_CODE (type) != VECTOR_TYPE)
24268 /* Check the mode of the vector type, and the name of the vector
24269 element type, against the table. */
24270 while (pos->mode != VOIDmode)
24272 tree elt_type = TREE_TYPE (type);
24274 if (pos->mode == TYPE_MODE (type)
24275 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24276 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24277 pos->element_type_name))
24278 return pos->aapcs_name;
24283 /* Use the default mangling for unrecognized (possibly user-defined)
24288 /* Order of allocation of core registers for Thumb: this allocation is
24289 written over the corresponding initial entries of the array
24290 initialized with REG_ALLOC_ORDER. We allocate all low registers
24291 first. Saving and restoring a low register is usually cheaper than
24292 using a call-clobbered high register. */
24294 static const int thumb_core_reg_alloc_order[] =
24296 3, 2, 1, 0, 4, 5, 6, 7,
24297 14, 12, 8, 9, 10, 11, 13, 15
24300 /* Adjust register allocation order when compiling for Thumb. */
24303 arm_order_regs_for_local_alloc (void)
24305 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24306 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24308 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24309 sizeof (thumb_core_reg_alloc_order));
24312 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24315 arm_frame_pointer_required (void)
24317 return (cfun->has_nonlocal_label
24318 || SUBTARGET_FRAME_POINTER_REQUIRED
24319 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24322 /* Only thumb1 can't support conditional execution, so return true if
24323 the target is not thumb1. */
24325 arm_have_conditional_execution (void)
24327 return !TARGET_THUMB1;
24330 /* Legitimize a memory reference for sync primitive implemented using
24331 ldrex / strex. We currently force the form of the reference to be
24332 indirect without offset. We do not yet support the indirect offset
24333 addressing supported by some ARM targets for these
24336 arm_legitimize_sync_memory (rtx memory)
24338 rtx addr = force_reg (Pmode, XEXP (memory, 0));
24339 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
24341 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
24342 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
24343 return legitimate_memory;
24346 /* An instruction emitter. */
24347 typedef void (* emit_f) (int label, const char *, rtx *);
24349 /* An instruction emitter that emits via the conventional
24350 output_asm_insn. */
24352 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
24354 output_asm_insn (pattern, operands);
24357 /* Count the number of emitted synchronization instructions. */
24358 static unsigned arm_insn_count;
24360 /* An emitter that counts emitted instructions but does not actually
24361 emit instruction into the instruction stream. */
24363 arm_count (int label,
24364 const char *pattern ATTRIBUTE_UNUSED,
24365 rtx *operands ATTRIBUTE_UNUSED)
24371 /* Construct a pattern using conventional output formatting and feed
24372 it to output_asm_insn. Provides a mechanism to construct the
24373 output pattern on the fly. Note the hard limit on the pattern
24375 static void ATTRIBUTE_PRINTF_4
24376 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
24377 const char *pattern, ...)
24382 va_start (ap, pattern);
24383 vsprintf (buffer, pattern, ap);
24385 emit (label, buffer, operands);
24388 /* Emit the memory barrier instruction, if any, provided by this
24389 target to a specified emitter. */
24391 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
24393 if (TARGET_HAVE_DMB)
24395 /* Note we issue a system level barrier. We should consider
24396 issuing a inner shareabilty zone barrier here instead, ie.
24398 emit (0, "dmb\tsy", operands);
24402 if (TARGET_HAVE_DMB_MCR)
24404 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
24408 gcc_unreachable ();
24411 /* Emit the memory barrier instruction, if any, provided by this
24414 arm_output_memory_barrier (rtx *operands)
24416 arm_process_output_memory_barrier (arm_emit, operands);
24420 /* Helper to figure out the instruction suffix required on ldrex/strex
24421 for operations on an object of the specified mode. */
24422 static const char *
24423 arm_ldrex_suffix (enum machine_mode mode)
24427 case QImode: return "b";
24428 case HImode: return "h";
24429 case SImode: return "";
24430 case DImode: return "d";
24432 gcc_unreachable ();
24437 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
24440 arm_output_ldrex (emit_f emit,
24441 enum machine_mode mode,
24447 operands[0] = target;
24448 if (mode != DImode)
24450 const char *suffix = arm_ldrex_suffix (mode);
24451 operands[1] = memory;
24452 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
24456 /* The restrictions on target registers in ARM mode are that the two
24457 registers are consecutive and the first one is even; Thumb is
24458 actually more flexible, but DI should give us this anyway.
24459 Note that the 1st register always gets the lowest word in memory. */
24460 gcc_assert ((REGNO (target) & 1) == 0);
24461 operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
24462 operands[2] = memory;
24463 arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
24467 /* Emit a strex{b,h,d, } instruction appropriate for the specified
24470 arm_output_strex (emit_f emit,
24471 enum machine_mode mode,
24479 operands[0] = result;
24480 operands[1] = value;
24481 if (mode != DImode)
24483 const char *suffix = arm_ldrex_suffix (mode);
24484 operands[2] = memory;
24485 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
24490 /* The restrictions on target registers in ARM mode are that the two
24491 registers are consecutive and the first one is even; Thumb is
24492 actually more flexible, but DI should give us this anyway.
24493 Note that the 1st register always gets the lowest word in memory. */
24494 gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
24495 operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
24496 operands[3] = memory;
24497 arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
24502 /* Helper to emit an it instruction in Thumb2 mode only; although the assembler
24503 will ignore it in ARM mode, emitting it will mess up instruction counts we
24504 sometimes keep 'flags' are the extra t's and e's if it's more than one
24505 instruction that is conditional. */
24507 arm_output_it (emit_f emit, const char *flags, const char *cond)
24509 rtx operands[1]; /* Don't actually use the operand. */
24511 arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
24514 /* Helper to emit a two operand instruction. */
24516 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
24522 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
24525 /* Helper to emit a three operand instruction. */
24527 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
24534 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
24537 /* Emit a load store exclusive synchronization loop.
24541 if old_value != required_value
24543 t1 = sync_op (old_value, new_value)
24544 [mem] = t1, t2 = [0|1]
24548 t1 == t2 is not permitted
24549 t1 == old_value is permitted
24553 RTX register representing the required old_value for
24554 the modify to continue, if NULL no comparsion is performed. */
24556 arm_output_sync_loop (emit_f emit,
24557 enum machine_mode mode,
24560 rtx required_value,
24564 enum attr_sync_op sync_op,
24565 int early_barrier_required)
24568 /* We'll use the lo for the normal rtx in the none-DI case
24569 as well as the least-sig word in the DI case. */
24570 rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
24571 rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
24573 bool is_di = mode == DImode;
24575 gcc_assert (t1 != t2);
24577 if (early_barrier_required)
24578 arm_process_output_memory_barrier (emit, NULL);
24580 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
24582 arm_output_ldrex (emit, mode, old_value, memory);
24586 old_value_lo = gen_lowpart (SImode, old_value);
24587 old_value_hi = gen_highpart (SImode, old_value);
24588 if (required_value)
24590 required_value_lo = gen_lowpart (SImode, required_value);
24591 required_value_hi = gen_highpart (SImode, required_value);
24595 /* Silence false potentially unused warning. */
24596 required_value_lo = NULL_RTX;
24597 required_value_hi = NULL_RTX;
24599 new_value_lo = gen_lowpart (SImode, new_value);
24600 new_value_hi = gen_highpart (SImode, new_value);
24601 t1_lo = gen_lowpart (SImode, t1);
24602 t1_hi = gen_highpart (SImode, t1);
24606 old_value_lo = old_value;
24607 new_value_lo = new_value;
24608 required_value_lo = required_value;
24611 /* Silence false potentially unused warning. */
24613 new_value_hi = NULL_RTX;
24614 required_value_hi = NULL_RTX;
24615 old_value_hi = NULL_RTX;
24618 if (required_value)
24620 operands[0] = old_value_lo;
24621 operands[1] = required_value_lo;
24623 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
24626 arm_output_it (emit, "", "eq");
24627 arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
24629 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
24635 arm_output_op3 (emit, is_di ? "adds" : "add",
24636 t1_lo, old_value_lo, new_value_lo);
24638 arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
24642 arm_output_op3 (emit, is_di ? "subs" : "sub",
24643 t1_lo, old_value_lo, new_value_lo);
24645 arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
24649 arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
24651 arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
24655 arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
24657 arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
24661 arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
24663 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24667 arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
24669 arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
24670 arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
24672 arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
24677 t1_lo = new_value_lo;
24679 t1_hi = new_value_hi;
24683 /* Note that the result of strex is a 0/1 flag that's always 1 register. */
24686 arm_output_strex (emit, mode, "", t2, t1, memory);
24688 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24689 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24690 LOCAL_LABEL_PREFIX);
24694 /* Use old_value for the return value because for some operations
24695 the old_value can easily be restored. This saves one register. */
24696 arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
24697 operands[0] = old_value_lo;
24698 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24699 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24700 LOCAL_LABEL_PREFIX);
24702 /* Note that we only used the _lo half of old_value as a temporary
24703 so in DI we don't have to restore the _hi part. */
24707 arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
24711 arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
24715 arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
24719 arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
24723 gcc_unreachable ();
24727 /* Note: label is before barrier so that in cmp failure case we still get
24728 a barrier to stop subsequent loads floating upwards past the ldrex
24729 PR target/48126. */
24730 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
24731 arm_process_output_memory_barrier (emit, NULL);
24735 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
24738 default_value = operands[index - 1];
24740 return default_value;
24743 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
24744 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
24746 /* Extract the operands for a synchroniztion instruction from the
24747 instructions attributes and emit the instruction. */
24749 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
24751 rtx result, memory, required_value, new_value, t1, t2;
24753 enum machine_mode mode;
24754 enum attr_sync_op sync_op;
24756 result = FETCH_SYNC_OPERAND(result, 0);
24757 memory = FETCH_SYNC_OPERAND(memory, 0);
24758 required_value = FETCH_SYNC_OPERAND(required_value, 0);
24759 new_value = FETCH_SYNC_OPERAND(new_value, 0);
24760 t1 = FETCH_SYNC_OPERAND(t1, 0);
24761 t2 = FETCH_SYNC_OPERAND(t2, 0);
24763 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
24764 sync_op = get_attr_sync_op (insn);
24765 mode = GET_MODE (memory);
24767 arm_output_sync_loop (emit, mode, result, memory, required_value,
24768 new_value, t1, t2, sync_op, early_barrier);
24771 /* Emit a synchronization instruction loop. */
24773 arm_output_sync_insn (rtx insn, rtx *operands)
24775 arm_process_output_sync_insn (arm_emit, insn, operands);
24779 /* Count the number of machine instruction that will be emitted for a
24780 synchronization instruction. Note that the emitter used does not
24781 emit instructions, it just counts instructions being carefull not
24782 to count labels. */
24784 arm_sync_loop_insns (rtx insn, rtx *operands)
24786 arm_insn_count = 0;
24787 arm_process_output_sync_insn (arm_count, insn, operands);
24788 return arm_insn_count;
24791 /* Helper to call a target sync instruction generator, dealing with
24792 the variation in operands required by the different generators. */
24794 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
24795 rtx memory, rtx required_value, rtx new_value)
24797 switch (generator->op)
24799 case arm_sync_generator_omn:
24800 gcc_assert (! required_value);
24801 return generator->u.omn (old_value, memory, new_value);
24803 case arm_sync_generator_omrn:
24804 gcc_assert (required_value);
24805 return generator->u.omrn (old_value, memory, required_value, new_value);
24811 /* Expand a synchronization loop. The synchronization loop is expanded
24812 as an opaque block of instructions in order to ensure that we do
24813 not subsequently get extraneous memory accesses inserted within the
24814 critical region. The exclusive access property of ldrex/strex is
24815 only guaranteed in there are no intervening memory accesses. */
24817 arm_expand_sync (enum machine_mode mode,
24818 struct arm_sync_generator *generator,
24819 rtx target, rtx memory, rtx required_value, rtx new_value)
24821 if (target == NULL)
24822 target = gen_reg_rtx (mode);
24824 memory = arm_legitimize_sync_memory (memory);
24825 if (mode != SImode && mode != DImode)
24827 rtx load_temp = gen_reg_rtx (SImode);
24829 if (required_value)
24830 required_value = convert_modes (SImode, mode, required_value, true);
24832 new_value = convert_modes (SImode, mode, new_value, true);
24833 emit_insn (arm_call_generator (generator, load_temp, memory,
24834 required_value, new_value));
24835 emit_move_insn (target, gen_lowpart (mode, load_temp));
24839 emit_insn (arm_call_generator (generator, target, memory, required_value,
24844 static unsigned int
24845 arm_autovectorize_vector_sizes (void)
24847 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24851 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24853 /* Vectors which aren't in packed structures will not be less aligned than
24854 the natural alignment of their element type, so this is safe. */
24855 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24858 return default_builtin_vector_alignment_reachable (type, is_packed);
24862 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24863 const_tree type, int misalignment,
24866 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24868 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24873 /* If the misalignment is unknown, we should be able to handle the access
24874 so long as it is not to a member of a packed data structure. */
24875 if (misalignment == -1)
24878 /* Return true if the misalignment is a multiple of the natural alignment
24879 of the vector's element type. This is probably always going to be
24880 true in practice, since we've already established that this isn't a
24882 return ((misalignment % align) == 0);
24885 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24890 arm_conditional_register_usage (void)
24894 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24896 for (regno = FIRST_FPA_REGNUM;
24897 regno <= LAST_FPA_REGNUM; ++regno)
24898 fixed_regs[regno] = call_used_regs[regno] = 1;
24901 if (TARGET_THUMB1 && optimize_size)
24903 /* When optimizing for size on Thumb-1, it's better not
24904 to use the HI regs, because of the overhead of
24906 for (regno = FIRST_HI_REGNUM;
24907 regno <= LAST_HI_REGNUM; ++regno)
24908 fixed_regs[regno] = call_used_regs[regno] = 1;
24911 /* The link register can be clobbered by any branch insn,
24912 but we have no way to track that at present, so mark
24913 it as unavailable. */
24915 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24917 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24919 if (TARGET_MAVERICK)
24921 for (regno = FIRST_FPA_REGNUM;
24922 regno <= LAST_FPA_REGNUM; ++ regno)
24923 fixed_regs[regno] = call_used_regs[regno] = 1;
24924 for (regno = FIRST_CIRRUS_FP_REGNUM;
24925 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24927 fixed_regs[regno] = 0;
24928 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24933 /* VFPv3 registers are disabled when earlier VFP
24934 versions are selected due to the definition of
24935 LAST_VFP_REGNUM. */
24936 for (regno = FIRST_VFP_REGNUM;
24937 regno <= LAST_VFP_REGNUM; ++ regno)
24939 fixed_regs[regno] = 0;
24940 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24941 || regno >= FIRST_VFP_REGNUM + 32;
24946 if (TARGET_REALLY_IWMMXT)
24948 regno = FIRST_IWMMXT_GR_REGNUM;
24949 /* The 2002/10/09 revision of the XScale ABI has wCG0
24950 and wCG1 as call-preserved registers. The 2002/11/21
24951 revision changed this so that all wCG registers are
24952 scratch registers. */
24953 for (regno = FIRST_IWMMXT_GR_REGNUM;
24954 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24955 fixed_regs[regno] = 0;
24956 /* The XScale ABI has wR0 - wR9 as scratch registers,
24957 the rest as call-preserved registers. */
24958 for (regno = FIRST_IWMMXT_REGNUM;
24959 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24961 fixed_regs[regno] = 0;
24962 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24966 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24968 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24969 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24971 else if (TARGET_APCS_STACK)
24973 fixed_regs[10] = 1;
24974 call_used_regs[10] = 1;
24976 /* -mcaller-super-interworking reserves r11 for calls to
24977 _interwork_r11_call_via_rN(). Making the register global
24978 is an easy way of ensuring that it remains valid for all
24980 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24981 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24983 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24984 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24985 if (TARGET_CALLER_INTERWORKING)
24986 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24988 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24992 arm_preferred_rename_class (reg_class_t rclass)
24994 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24995 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24996 and code size can be reduced. */
24997 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25003 /* Compute the atrribute "length" of insn "*push_multi".
25004 So this function MUST be kept in sync with that insn pattern. */
25006 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25008 int i, regno, hi_reg;
25009 int num_saves = XVECLEN (parallel_op, 0);
25019 regno = REGNO (first_op);
25020 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25021 for (i = 1; i < num_saves && !hi_reg; i++)
25023 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25024 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25032 /* Compute the number of instructions emitted by output_move_double. */
25034 arm_count_output_move_double_insns (rtx *operands)
25037 output_move_double (operands, false, &count);
25041 #include "gt-arm.h"