1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static bool arm_memory_load_p (rtx);
170 static bool arm_cirrus_insn_p (rtx);
171 static void cirrus_reorg (rtx);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_encode_section_info (tree, rtx, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
266 static void arm_conditional_register_usage (void);
267 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
272 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
273 const unsigned char *sel);
276 /* Table of machine attributes. */
277 static const struct attribute_spec arm_attribute_table[] =
279 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
280 affects_type_identity } */
281 /* Function calls made to this symbol must be done indirectly, because
282 it may lie outside of the 26 bit addressing range of a normal function
284 { "long_call", 0, 0, false, true, true, NULL, false },
285 /* Whereas these functions are always known to reside within the 26 bit
287 { "short_call", 0, 0, false, true, true, NULL, false },
288 /* Specify the procedure call conventions for a function. */
289 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
291 /* Interrupt Service Routines have special prologue and epilogue requirements. */
292 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
294 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
296 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
299 /* ARM/PE has three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
308 { "dllimport", 0, 0, true, false, false, NULL, false },
309 { "dllexport", 0, 0, true, false, false, NULL, false },
310 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
313 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
314 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
315 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
318 { NULL, 0, 0, false, false, false, NULL, false }
321 /* Initialize the GCC target structure. */
322 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
323 #undef TARGET_MERGE_DECL_ATTRIBUTES
324 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
327 #undef TARGET_LEGITIMIZE_ADDRESS
328 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
330 #undef TARGET_ATTRIBUTE_TABLE
331 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
333 #undef TARGET_ASM_FILE_START
334 #define TARGET_ASM_FILE_START arm_file_start
335 #undef TARGET_ASM_FILE_END
336 #define TARGET_ASM_FILE_END arm_file_end
338 #undef TARGET_ASM_ALIGNED_SI_OP
339 #define TARGET_ASM_ALIGNED_SI_OP NULL
340 #undef TARGET_ASM_INTEGER
341 #define TARGET_ASM_INTEGER arm_assemble_integer
343 #undef TARGET_PRINT_OPERAND
344 #define TARGET_PRINT_OPERAND arm_print_operand
345 #undef TARGET_PRINT_OPERAND_ADDRESS
346 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
347 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
348 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
350 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
351 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
353 #undef TARGET_ASM_FUNCTION_PROLOGUE
354 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
356 #undef TARGET_ASM_FUNCTION_EPILOGUE
357 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE arm_option_override
362 #undef TARGET_COMP_TYPE_ATTRIBUTES
363 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
365 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
366 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
368 #undef TARGET_SCHED_ADJUST_COST
369 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
371 #undef TARGET_REGISTER_MOVE_COST
372 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
374 #undef TARGET_MEMORY_MOVE_COST
375 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
377 #undef TARGET_ENCODE_SECTION_INFO
379 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
381 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
384 #undef TARGET_STRIP_NAME_ENCODING
385 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
387 #undef TARGET_ASM_INTERNAL_LABEL
388 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
393 #undef TARGET_FUNCTION_VALUE
394 #define TARGET_FUNCTION_VALUE arm_function_value
396 #undef TARGET_LIBCALL_VALUE
397 #define TARGET_LIBCALL_VALUE arm_libcall_value
399 #undef TARGET_FUNCTION_VALUE_REGNO_P
400 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
402 #undef TARGET_ASM_OUTPUT_MI_THUNK
403 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
404 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
405 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
407 #undef TARGET_RTX_COSTS
408 #define TARGET_RTX_COSTS arm_rtx_costs
409 #undef TARGET_ADDRESS_COST
410 #define TARGET_ADDRESS_COST arm_address_cost
412 #undef TARGET_SHIFT_TRUNCATION_MASK
413 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
414 #undef TARGET_VECTOR_MODE_SUPPORTED_P
415 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
416 #undef TARGET_ARRAY_MODE_SUPPORTED_P
417 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
418 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
419 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
420 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
421 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
422 arm_autovectorize_vector_sizes
424 #undef TARGET_MACHINE_DEPENDENT_REORG
425 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS arm_init_builtins
429 #undef TARGET_EXPAND_BUILTIN
430 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
431 #undef TARGET_BUILTIN_DECL
432 #define TARGET_BUILTIN_DECL arm_builtin_decl
434 #undef TARGET_INIT_LIBFUNCS
435 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
437 #undef TARGET_PROMOTE_FUNCTION_MODE
438 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
439 #undef TARGET_PROMOTE_PROTOTYPES
440 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
441 #undef TARGET_PASS_BY_REFERENCE
442 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
443 #undef TARGET_ARG_PARTIAL_BYTES
444 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
445 #undef TARGET_FUNCTION_ARG
446 #define TARGET_FUNCTION_ARG arm_function_arg
447 #undef TARGET_FUNCTION_ARG_ADVANCE
448 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
449 #undef TARGET_FUNCTION_ARG_BOUNDARY
450 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
455 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
456 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
458 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
459 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
460 #undef TARGET_TRAMPOLINE_INIT
461 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
462 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
463 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
465 #undef TARGET_DEFAULT_SHORT_ENUMS
466 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468 #undef TARGET_ALIGN_ANON_BITFIELD
469 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471 #undef TARGET_NARROW_VOLATILE_BITFIELD
472 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474 #undef TARGET_CXX_GUARD_TYPE
475 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477 #undef TARGET_CXX_GUARD_MASK_BIT
478 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480 #undef TARGET_CXX_GET_COOKIE_SIZE
481 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483 #undef TARGET_CXX_COOKIE_HAS_SIZE
484 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486 #undef TARGET_CXX_CDTOR_RETURNS_THIS
487 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
490 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492 #undef TARGET_CXX_USE_AEABI_ATEXIT
493 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
496 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
497 arm_cxx_determine_class_data_visibility
499 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
500 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502 #undef TARGET_RETURN_IN_MSB
503 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505 #undef TARGET_RETURN_IN_MEMORY
506 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508 #undef TARGET_MUST_PASS_IN_STACK
509 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
512 #undef TARGET_ASM_UNWIND_EMIT
513 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515 /* EABI unwinding tables use a different format for the typeinfo tables. */
516 #undef TARGET_ASM_TTYPE
517 #define TARGET_ASM_TTYPE arm_output_ttype
519 #undef TARGET_ARM_EABI_UNWINDER
520 #define TARGET_ARM_EABI_UNWINDER true
522 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
523 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525 #undef TARGET_ASM_INIT_SECTIONS
526 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
527 #endif /* ARM_UNWIND_INFO */
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532 #undef TARGET_CANNOT_COPY_INSN_P
533 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
536 #undef TARGET_HAVE_TLS
537 #define TARGET_HAVE_TLS true
540 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
541 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543 #undef TARGET_LEGITIMATE_CONSTANT_P
544 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546 #undef TARGET_CANNOT_FORCE_CONST_MEM
547 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549 #undef TARGET_MAX_ANCHOR_OFFSET
550 #define TARGET_MAX_ANCHOR_OFFSET 4095
552 /* The minimum is set such that the total size of the block
553 for a particular anchor is -4088 + 1 + 4095 bytes, which is
554 divisible by eight, ensuring natural spacing of anchors. */
555 #undef TARGET_MIN_ANCHOR_OFFSET
556 #define TARGET_MIN_ANCHOR_OFFSET -4088
558 #undef TARGET_SCHED_ISSUE_RATE
559 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561 #undef TARGET_MANGLE_TYPE
562 #define TARGET_MANGLE_TYPE arm_mangle_type
564 #undef TARGET_BUILD_BUILTIN_VA_LIST
565 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
566 #undef TARGET_EXPAND_BUILTIN_VA_START
567 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
568 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
569 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
572 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
573 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
576 #undef TARGET_LEGITIMATE_ADDRESS_P
577 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579 #undef TARGET_INVALID_PARAMETER_TYPE
580 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
582 #undef TARGET_INVALID_RETURN_TYPE
583 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
585 #undef TARGET_PROMOTED_TYPE
586 #define TARGET_PROMOTED_TYPE arm_promoted_type
588 #undef TARGET_CONVERT_TO_TYPE
589 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
591 #undef TARGET_SCALAR_MODE_SUPPORTED_P
592 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
594 #undef TARGET_FRAME_POINTER_REQUIRED
595 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
597 #undef TARGET_CAN_ELIMINATE
598 #define TARGET_CAN_ELIMINATE arm_can_eliminate
600 #undef TARGET_CONDITIONAL_REGISTER_USAGE
601 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
603 #undef TARGET_CLASS_LIKELY_SPILLED_P
604 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
606 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
607 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
608 arm_vector_alignment_reachable
610 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
611 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
612 arm_builtin_support_vector_misalignment
614 #undef TARGET_PREFERRED_RENAME_CLASS
615 #define TARGET_PREFERRED_RENAME_CLASS \
616 arm_preferred_rename_class
618 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
619 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
620 arm_vectorize_vec_perm_const_ok
622 struct gcc_target targetm = TARGET_INITIALIZER;
624 /* Obstack for minipool constant handling. */
625 static struct obstack minipool_obstack;
626 static char * minipool_startobj;
628 /* The maximum number of insns skipped which
629 will be conditionalised if possible. */
630 static int max_insns_skipped = 5;
632 extern FILE * asm_out_file;
634 /* True if we are currently building a constant table. */
635 int making_const_table;
637 /* The processor for which instructions should be scheduled. */
638 enum processor_type arm_tune = arm_none;
640 /* The current tuning set. */
641 const struct tune_params *current_tune;
643 /* Which floating point hardware to schedule for. */
646 /* Which floating popint hardware to use. */
647 const struct arm_fpu_desc *arm_fpu_desc;
649 /* Used for Thumb call_via trampolines. */
650 rtx thumb_call_via_label[14];
651 static int thumb_call_reg_needed;
653 /* Bit values used to identify processor capabilities. */
654 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
655 #define FL_ARCH3M (1 << 1) /* Extended multiply */
656 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
657 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
658 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
659 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
660 #define FL_THUMB (1 << 6) /* Thumb aware */
661 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
662 #define FL_STRONG (1 << 8) /* StrongARM */
663 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
664 #define FL_XSCALE (1 << 10) /* XScale */
665 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
666 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
667 media instructions. */
668 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
669 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
670 Note: ARM6 & 7 derivatives only. */
671 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
672 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
673 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
675 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
676 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
677 #define FL_NEON (1 << 20) /* Neon instructions. */
678 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
680 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
681 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
683 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
716 /* The bits in this mask specify which instruction scheduling options should
718 static unsigned long tune_flags = 0;
720 /* The following are used in the arm.md file as equivalents to bits
721 in the above two flag variables. */
723 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
726 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
729 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
732 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
735 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
738 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
741 /* Nonzero if this chip supports the ARM 6K extensions. */
744 /* Nonzero if this chip supports the ARM 7 extensions. */
747 /* Nonzero if instructions not present in the 'M' profile can be used. */
748 int arm_arch_notm = 0;
750 /* Nonzero if instructions present in ARMv7E-M can be used. */
753 /* Nonzero if this chip can benefit from load scheduling. */
754 int arm_ld_sched = 0;
756 /* Nonzero if this chip is a StrongARM. */
757 int arm_tune_strongarm = 0;
759 /* Nonzero if this chip is a Cirrus variant. */
760 int arm_arch_cirrus = 0;
762 /* Nonzero if this chip supports Intel Wireless MMX technology. */
763 int arm_arch_iwmmxt = 0;
765 /* Nonzero if this chip is an XScale. */
766 int arm_arch_xscale = 0;
768 /* Nonzero if tuning for XScale */
769 int arm_tune_xscale = 0;
771 /* Nonzero if we want to tune for stores that access the write-buffer.
772 This typically means an ARM6 or ARM7 with MMU or MPU. */
773 int arm_tune_wbuf = 0;
775 /* Nonzero if tuning for Cortex-A9. */
776 int arm_tune_cortex_a9 = 0;
778 /* Nonzero if generating Thumb instructions. */
781 /* Nonzero if generating Thumb-1 instructions. */
784 /* Nonzero if we should define __THUMB_INTERWORK__ in the
786 XXX This is a bit of a hack, it's intended to help work around
787 problems in GLD which doesn't understand that armv5t code is
788 interworking clean. */
789 int arm_cpp_interwork = 0;
791 /* Nonzero if chip supports Thumb 2. */
794 /* Nonzero if chip supports integer division instruction. */
795 int arm_arch_arm_hwdiv;
796 int arm_arch_thumb_hwdiv;
798 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
799 we must report the mode of the memory reference from
800 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
801 enum machine_mode output_memory_reference_mode;
803 /* The register number to be used for the PIC offset register. */
804 unsigned arm_pic_register = INVALID_REGNUM;
806 /* Set to 1 after arm_reorg has started. Reset to start at the start of
807 the next function. */
808 static int after_arm_reorg = 0;
810 enum arm_pcs arm_pcs_default;
812 /* For an explanation of these variables, see final_prescan_insn below. */
814 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
815 enum arm_cond_code arm_current_cc;
818 int arm_target_label;
819 /* The number of conditionally executed insns, including the current insn. */
820 int arm_condexec_count = 0;
821 /* A bitmask specifying the patterns for the IT block.
822 Zero means do not output an IT block before this insn. */
823 int arm_condexec_mask = 0;
824 /* The number of bits used in arm_condexec_mask. */
825 int arm_condexec_masklen = 0;
827 /* The condition codes of the ARM, and the inverse function. */
828 static const char * const arm_condition_codes[] =
830 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
831 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
834 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
835 int arm_regs_in_sequence[] =
837 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
840 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
841 #define streq(string1, string2) (strcmp (string1, string2) == 0)
843 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
844 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
845 | (1 << PIC_OFFSET_TABLE_REGNUM)))
847 /* Initialization code. */
851 const char *const name;
852 enum processor_type core;
854 const unsigned long flags;
855 const struct tune_params *const tune;
859 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
860 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
865 const struct tune_params arm_slowmul_tune =
867 arm_slowmul_rtx_costs,
869 3, /* Constant limit. */
870 5, /* Max cond insns. */
871 ARM_PREFETCH_NOT_BENEFICIAL,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
876 const struct tune_params arm_fastmul_tune =
878 arm_fastmul_rtx_costs,
880 1, /* Constant limit. */
881 5, /* Max cond insns. */
882 ARM_PREFETCH_NOT_BENEFICIAL,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
887 /* StrongARM has early execution of branches, so a sequence that is worth
888 skipping is shorter. Set max_insns_skipped to a lower value. */
890 const struct tune_params arm_strongarm_tune =
892 arm_fastmul_rtx_costs,
894 1, /* Constant limit. */
895 3, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost
901 const struct tune_params arm_xscale_tune =
903 arm_xscale_rtx_costs,
904 xscale_sched_adjust_cost,
905 2, /* Constant limit. */
906 3, /* Max cond insns. */
907 ARM_PREFETCH_NOT_BENEFICIAL,
908 true, /* Prefer constant pool. */
909 arm_default_branch_cost
912 const struct tune_params arm_9e_tune =
916 1, /* Constant limit. */
917 5, /* Max cond insns. */
918 ARM_PREFETCH_NOT_BENEFICIAL,
919 true, /* Prefer constant pool. */
920 arm_default_branch_cost
923 const struct tune_params arm_v6t2_tune =
927 1, /* Constant limit. */
928 5, /* Max cond insns. */
929 ARM_PREFETCH_NOT_BENEFICIAL,
930 false, /* Prefer constant pool. */
931 arm_default_branch_cost
934 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
935 const struct tune_params arm_cortex_tune =
939 1, /* Constant limit. */
940 5, /* Max cond insns. */
941 ARM_PREFETCH_NOT_BENEFICIAL,
942 false, /* Prefer constant pool. */
943 arm_default_branch_cost
946 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
947 less appealing. Set max_insns_skipped to a low value. */
949 const struct tune_params arm_cortex_a5_tune =
953 1, /* Constant limit. */
954 1, /* Max cond insns. */
955 ARM_PREFETCH_NOT_BENEFICIAL,
956 false, /* Prefer constant pool. */
957 arm_cortex_a5_branch_cost
960 const struct tune_params arm_cortex_a9_tune =
963 cortex_a9_sched_adjust_cost,
964 1, /* Constant limit. */
965 5, /* Max cond insns. */
966 ARM_PREFETCH_BENEFICIAL(4,32,32),
967 false, /* Prefer constant pool. */
968 arm_default_branch_cost
971 const struct tune_params arm_cortex_a15_tune =
975 1, /* Constant limit. */
976 1, /* Max cond insns. */
977 ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */
978 false, /* Prefer constant pool. */
979 arm_cortex_a5_branch_cost
982 const struct tune_params arm_fa726te_tune =
985 fa726te_sched_adjust_cost,
986 1, /* Constant limit. */
987 5, /* Max cond insns. */
988 ARM_PREFETCH_NOT_BENEFICIAL,
989 true, /* Prefer constant pool. */
990 arm_default_branch_cost
994 /* Not all of these give usefully different compilation alternatives,
995 but there is no simple way of generalizing them. */
996 static const struct processors all_cores[] =
999 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1000 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1001 #include "arm-cores.def"
1003 {NULL, arm_none, NULL, 0, NULL}
1006 static const struct processors all_architectures[] =
1008 /* ARM Architectures */
1009 /* We don't specify tuning costs here as it will be figured out
1012 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1013 {NAME, CORE, #ARCH, FLAGS, NULL},
1014 #include "arm-arches.def"
1016 {NULL, arm_none, NULL, 0 , NULL}
1020 /* These are populated as commandline arguments are processed, or NULL
1021 if not specified. */
1022 static const struct processors *arm_selected_arch;
1023 static const struct processors *arm_selected_cpu;
1024 static const struct processors *arm_selected_tune;
1026 /* The name of the preprocessor macro to define for this architecture. */
1028 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1030 /* Available values for -mfpu=. */
1032 static const struct arm_fpu_desc all_fpus[] =
1034 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1035 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1036 #include "arm-fpus.def"
1041 /* Supported TLS relocations. */
1049 TLS_DESCSEQ /* GNU scheme */
1052 /* The maximum number of insns to be used when loading a constant. */
1054 arm_constant_limit (bool size_p)
1056 return size_p ? 1 : current_tune->constant_limit;
1059 /* Emit an insn that's a simple single-set. Both the operands must be known
1062 emit_set_insn (rtx x, rtx y)
1064 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1067 /* Return the number of bits set in VALUE. */
1069 bit_count (unsigned long value)
1071 unsigned long count = 0;
1076 value &= value - 1; /* Clear the least-significant set bit. */
1084 enum machine_mode mode;
1086 } arm_fixed_mode_set;
1088 /* A small helper for setting fixed-point library libfuncs. */
1091 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1092 const char *funcname, const char *modename,
1097 if (num_suffix == 0)
1098 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1100 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1102 set_optab_libfunc (optable, mode, buffer);
1106 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1107 enum machine_mode from, const char *funcname,
1108 const char *toname, const char *fromname)
1111 const char *maybe_suffix_2 = "";
1113 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1114 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1115 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1116 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1117 maybe_suffix_2 = "2";
1119 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1122 set_conv_libfunc (optable, to, from, buffer);
1125 /* Set up library functions unique to ARM. */
1128 arm_init_libfuncs (void)
1130 /* For Linux, we have access to kernel support for atomic operations. */
1131 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1132 init_sync_libfuncs (2 * UNITS_PER_WORD);
1134 /* There are no special library functions unless we are using the
1139 /* The functions below are described in Section 4 of the "Run-Time
1140 ABI for the ARM architecture", Version 1.0. */
1142 /* Double-precision floating-point arithmetic. Table 2. */
1143 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1144 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1145 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1146 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1147 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1149 /* Double-precision comparisons. Table 3. */
1150 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1151 set_optab_libfunc (ne_optab, DFmode, NULL);
1152 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1153 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1154 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1155 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1156 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1158 /* Single-precision floating-point arithmetic. Table 4. */
1159 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1160 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1161 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1162 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1163 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1165 /* Single-precision comparisons. Table 5. */
1166 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1167 set_optab_libfunc (ne_optab, SFmode, NULL);
1168 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1169 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1170 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1171 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1172 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1174 /* Floating-point to integer conversions. Table 6. */
1175 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1176 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1177 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1178 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1179 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1180 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1181 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1182 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1184 /* Conversions between floating types. Table 7. */
1185 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1186 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1188 /* Integer to floating-point conversions. Table 8. */
1189 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1190 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1191 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1192 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1193 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1194 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1195 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1196 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1198 /* Long long. Table 9. */
1199 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1200 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1201 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1202 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1203 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1204 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1205 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1206 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1208 /* Integer (32/32->32) division. \S 4.3.1. */
1209 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1210 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1212 /* The divmod functions are designed so that they can be used for
1213 plain division, even though they return both the quotient and the
1214 remainder. The quotient is returned in the usual location (i.e.,
1215 r0 for SImode, {r0, r1} for DImode), just as would be expected
1216 for an ordinary division routine. Because the AAPCS calling
1217 conventions specify that all of { r0, r1, r2, r3 } are
1218 callee-saved registers, there is no need to tell the compiler
1219 explicitly that those registers are clobbered by these
1221 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1222 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1224 /* For SImode division the ABI provides div-without-mod routines,
1225 which are faster. */
1226 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1227 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1229 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1230 divmod libcalls instead. */
1231 set_optab_libfunc (smod_optab, DImode, NULL);
1232 set_optab_libfunc (umod_optab, DImode, NULL);
1233 set_optab_libfunc (smod_optab, SImode, NULL);
1234 set_optab_libfunc (umod_optab, SImode, NULL);
1236 /* Half-precision float operations. The compiler handles all operations
1237 with NULL libfuncs by converting the SFmode. */
1238 switch (arm_fp16_format)
1240 case ARM_FP16_FORMAT_IEEE:
1241 case ARM_FP16_FORMAT_ALTERNATIVE:
1244 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1245 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1247 : "__gnu_f2h_alternative"));
1248 set_conv_libfunc (sext_optab, SFmode, HFmode,
1249 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1251 : "__gnu_h2f_alternative"));
1254 set_optab_libfunc (add_optab, HFmode, NULL);
1255 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1256 set_optab_libfunc (smul_optab, HFmode, NULL);
1257 set_optab_libfunc (neg_optab, HFmode, NULL);
1258 set_optab_libfunc (sub_optab, HFmode, NULL);
1261 set_optab_libfunc (eq_optab, HFmode, NULL);
1262 set_optab_libfunc (ne_optab, HFmode, NULL);
1263 set_optab_libfunc (lt_optab, HFmode, NULL);
1264 set_optab_libfunc (le_optab, HFmode, NULL);
1265 set_optab_libfunc (ge_optab, HFmode, NULL);
1266 set_optab_libfunc (gt_optab, HFmode, NULL);
1267 set_optab_libfunc (unord_optab, HFmode, NULL);
1274 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1276 const arm_fixed_mode_set fixed_arith_modes[] =
1297 const arm_fixed_mode_set fixed_conv_modes[] =
1327 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1329 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1330 "add", fixed_arith_modes[i].name, 3);
1331 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1332 "ssadd", fixed_arith_modes[i].name, 3);
1333 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1334 "usadd", fixed_arith_modes[i].name, 3);
1335 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1336 "sub", fixed_arith_modes[i].name, 3);
1337 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1338 "sssub", fixed_arith_modes[i].name, 3);
1339 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1340 "ussub", fixed_arith_modes[i].name, 3);
1341 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1342 "mul", fixed_arith_modes[i].name, 3);
1343 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1344 "ssmul", fixed_arith_modes[i].name, 3);
1345 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1346 "usmul", fixed_arith_modes[i].name, 3);
1347 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1348 "div", fixed_arith_modes[i].name, 3);
1349 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1350 "udiv", fixed_arith_modes[i].name, 3);
1351 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1352 "ssdiv", fixed_arith_modes[i].name, 3);
1353 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1354 "usdiv", fixed_arith_modes[i].name, 3);
1355 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1356 "neg", fixed_arith_modes[i].name, 2);
1357 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1358 "ssneg", fixed_arith_modes[i].name, 2);
1359 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1360 "usneg", fixed_arith_modes[i].name, 2);
1361 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1362 "ashl", fixed_arith_modes[i].name, 3);
1363 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1364 "ashr", fixed_arith_modes[i].name, 3);
1365 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1366 "lshr", fixed_arith_modes[i].name, 3);
1367 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1368 "ssashl", fixed_arith_modes[i].name, 3);
1369 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1370 "usashl", fixed_arith_modes[i].name, 3);
1371 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1372 "cmp", fixed_arith_modes[i].name, 2);
1375 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1376 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1379 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1380 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1383 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1384 fixed_conv_modes[j].mode, "fract",
1385 fixed_conv_modes[i].name,
1386 fixed_conv_modes[j].name);
1387 arm_set_fixed_conv_libfunc (satfract_optab,
1388 fixed_conv_modes[i].mode,
1389 fixed_conv_modes[j].mode, "satfract",
1390 fixed_conv_modes[i].name,
1391 fixed_conv_modes[j].name);
1392 arm_set_fixed_conv_libfunc (fractuns_optab,
1393 fixed_conv_modes[i].mode,
1394 fixed_conv_modes[j].mode, "fractuns",
1395 fixed_conv_modes[i].name,
1396 fixed_conv_modes[j].name);
1397 arm_set_fixed_conv_libfunc (satfractuns_optab,
1398 fixed_conv_modes[i].mode,
1399 fixed_conv_modes[j].mode, "satfractuns",
1400 fixed_conv_modes[i].name,
1401 fixed_conv_modes[j].name);
1405 if (TARGET_AAPCS_BASED)
1406 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1409 /* On AAPCS systems, this is the "struct __va_list". */
1410 static GTY(()) tree va_list_type;
1412 /* Return the type to use as __builtin_va_list. */
1414 arm_build_builtin_va_list (void)
1419 if (!TARGET_AAPCS_BASED)
1420 return std_build_builtin_va_list ();
1422 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1430 The C Library ABI further reinforces this definition in \S
1433 We must follow this definition exactly. The structure tag
1434 name is visible in C++ mangled names, and thus forms a part
1435 of the ABI. The field name may be used by people who
1436 #include <stdarg.h>. */
1437 /* Create the type. */
1438 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1439 /* Give it the required name. */
1440 va_list_name = build_decl (BUILTINS_LOCATION,
1442 get_identifier ("__va_list"),
1444 DECL_ARTIFICIAL (va_list_name) = 1;
1445 TYPE_NAME (va_list_type) = va_list_name;
1446 TYPE_STUB_DECL (va_list_type) = va_list_name;
1447 /* Create the __ap field. */
1448 ap_field = build_decl (BUILTINS_LOCATION,
1450 get_identifier ("__ap"),
1452 DECL_ARTIFICIAL (ap_field) = 1;
1453 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1454 TYPE_FIELDS (va_list_type) = ap_field;
1455 /* Compute its layout. */
1456 layout_type (va_list_type);
1458 return va_list_type;
1461 /* Return an expression of type "void *" pointing to the next
1462 available argument in a variable-argument list. VALIST is the
1463 user-level va_list object, of type __builtin_va_list. */
1465 arm_extract_valist_ptr (tree valist)
1467 if (TREE_TYPE (valist) == error_mark_node)
1468 return error_mark_node;
1470 /* On an AAPCS target, the pointer is stored within "struct
1472 if (TARGET_AAPCS_BASED)
1474 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1475 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1476 valist, ap_field, NULL_TREE);
1482 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1484 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1486 valist = arm_extract_valist_ptr (valist);
1487 std_expand_builtin_va_start (valist, nextarg);
1490 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1492 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1495 valist = arm_extract_valist_ptr (valist);
1496 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1499 /* Fix up any incompatible options that the user has specified. */
1501 arm_option_override (void)
1503 if (global_options_set.x_arm_arch_option)
1504 arm_selected_arch = &all_architectures[arm_arch_option];
1506 if (global_options_set.x_arm_cpu_option)
1507 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1509 if (global_options_set.x_arm_tune_option)
1510 arm_selected_tune = &all_cores[(int) arm_tune_option];
1512 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1513 SUBTARGET_OVERRIDE_OPTIONS;
1516 if (arm_selected_arch)
1518 if (arm_selected_cpu)
1520 /* Check for conflict between mcpu and march. */
1521 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1523 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1524 arm_selected_cpu->name, arm_selected_arch->name);
1525 /* -march wins for code generation.
1526 -mcpu wins for default tuning. */
1527 if (!arm_selected_tune)
1528 arm_selected_tune = arm_selected_cpu;
1530 arm_selected_cpu = arm_selected_arch;
1534 arm_selected_arch = NULL;
1537 /* Pick a CPU based on the architecture. */
1538 arm_selected_cpu = arm_selected_arch;
1541 /* If the user did not specify a processor, choose one for them. */
1542 if (!arm_selected_cpu)
1544 const struct processors * sel;
1545 unsigned int sought;
1547 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1548 if (!arm_selected_cpu->name)
1550 #ifdef SUBTARGET_CPU_DEFAULT
1551 /* Use the subtarget default CPU if none was specified by
1553 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1555 /* Default to ARM6. */
1556 if (!arm_selected_cpu->name)
1557 arm_selected_cpu = &all_cores[arm6];
1560 sel = arm_selected_cpu;
1561 insn_flags = sel->flags;
1563 /* Now check to see if the user has specified some command line
1564 switch that require certain abilities from the cpu. */
1567 if (TARGET_INTERWORK || TARGET_THUMB)
1569 sought |= (FL_THUMB | FL_MODE32);
1571 /* There are no ARM processors that support both APCS-26 and
1572 interworking. Therefore we force FL_MODE26 to be removed
1573 from insn_flags here (if it was set), so that the search
1574 below will always be able to find a compatible processor. */
1575 insn_flags &= ~FL_MODE26;
1578 if (sought != 0 && ((sought & insn_flags) != sought))
1580 /* Try to locate a CPU type that supports all of the abilities
1581 of the default CPU, plus the extra abilities requested by
1583 for (sel = all_cores; sel->name != NULL; sel++)
1584 if ((sel->flags & sought) == (sought | insn_flags))
1587 if (sel->name == NULL)
1589 unsigned current_bit_count = 0;
1590 const struct processors * best_fit = NULL;
1592 /* Ideally we would like to issue an error message here
1593 saying that it was not possible to find a CPU compatible
1594 with the default CPU, but which also supports the command
1595 line options specified by the programmer, and so they
1596 ought to use the -mcpu=<name> command line option to
1597 override the default CPU type.
1599 If we cannot find a cpu that has both the
1600 characteristics of the default cpu and the given
1601 command line options we scan the array again looking
1602 for a best match. */
1603 for (sel = all_cores; sel->name != NULL; sel++)
1604 if ((sel->flags & sought) == sought)
1608 count = bit_count (sel->flags & insn_flags);
1610 if (count >= current_bit_count)
1613 current_bit_count = count;
1617 gcc_assert (best_fit);
1621 arm_selected_cpu = sel;
1625 gcc_assert (arm_selected_cpu);
1626 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1627 if (!arm_selected_tune)
1628 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1630 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1631 insn_flags = arm_selected_cpu->flags;
1633 arm_tune = arm_selected_tune->core;
1634 tune_flags = arm_selected_tune->flags;
1635 current_tune = arm_selected_tune->tune;
1637 /* Make sure that the processor choice does not conflict with any of the
1638 other command line choices. */
1639 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1640 error ("target CPU does not support ARM mode");
1642 /* BPABI targets use linker tricks to allow interworking on cores
1643 without thumb support. */
1644 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1646 warning (0, "target CPU does not support interworking" );
1647 target_flags &= ~MASK_INTERWORK;
1650 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1652 warning (0, "target CPU does not support THUMB instructions");
1653 target_flags &= ~MASK_THUMB;
1656 if (TARGET_APCS_FRAME && TARGET_THUMB)
1658 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1659 target_flags &= ~MASK_APCS_FRAME;
1662 /* Callee super interworking implies thumb interworking. Adding
1663 this to the flags here simplifies the logic elsewhere. */
1664 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1665 target_flags |= MASK_INTERWORK;
1667 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1668 from here where no function is being compiled currently. */
1669 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1670 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1672 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1673 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1675 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1677 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1678 target_flags |= MASK_APCS_FRAME;
1681 if (TARGET_POKE_FUNCTION_NAME)
1682 target_flags |= MASK_APCS_FRAME;
1684 if (TARGET_APCS_REENT && flag_pic)
1685 error ("-fpic and -mapcs-reent are incompatible");
1687 if (TARGET_APCS_REENT)
1688 warning (0, "APCS reentrant code not supported. Ignored");
1690 /* If this target is normally configured to use APCS frames, warn if they
1691 are turned off and debugging is turned on. */
1693 && write_symbols != NO_DEBUG
1694 && !TARGET_APCS_FRAME
1695 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1696 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1698 if (TARGET_APCS_FLOAT)
1699 warning (0, "passing floating point arguments in fp regs not yet supported");
1701 if (TARGET_LITTLE_WORDS)
1702 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1703 "will be removed in a future release");
1705 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1706 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1707 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1708 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1709 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1710 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1711 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1712 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1713 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1714 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1715 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1716 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1717 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1718 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1720 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1721 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1722 thumb_code = TARGET_ARM == 0;
1723 thumb1_code = TARGET_THUMB1 != 0;
1724 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1725 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1726 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1727 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1728 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1729 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1731 /* If we are not using the default (ARM mode) section anchor offset
1732 ranges, then set the correct ranges now. */
1735 /* Thumb-1 LDR instructions cannot have negative offsets.
1736 Permissible positive offset ranges are 5-bit (for byte loads),
1737 6-bit (for halfword loads), or 7-bit (for word loads).
1738 Empirical results suggest a 7-bit anchor range gives the best
1739 overall code size. */
1740 targetm.min_anchor_offset = 0;
1741 targetm.max_anchor_offset = 127;
1743 else if (TARGET_THUMB2)
1745 /* The minimum is set such that the total size of the block
1746 for a particular anchor is 248 + 1 + 4095 bytes, which is
1747 divisible by eight, ensuring natural spacing of anchors. */
1748 targetm.min_anchor_offset = -248;
1749 targetm.max_anchor_offset = 4095;
1752 /* V5 code we generate is completely interworking capable, so we turn off
1753 TARGET_INTERWORK here to avoid many tests later on. */
1755 /* XXX However, we must pass the right pre-processor defines to CPP
1756 or GLD can get confused. This is a hack. */
1757 if (TARGET_INTERWORK)
1758 arm_cpp_interwork = 1;
1761 target_flags &= ~MASK_INTERWORK;
1763 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1764 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1766 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1767 error ("iwmmxt abi requires an iwmmxt capable cpu");
1769 if (!global_options_set.x_arm_fpu_index)
1771 const char *target_fpu_name;
1774 #ifdef FPUTYPE_DEFAULT
1775 target_fpu_name = FPUTYPE_DEFAULT;
1777 if (arm_arch_cirrus)
1778 target_fpu_name = "maverick";
1780 target_fpu_name = "fpe2";
1783 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1788 arm_fpu_desc = &all_fpus[arm_fpu_index];
1790 switch (arm_fpu_desc->model)
1792 case ARM_FP_MODEL_FPA:
1793 if (arm_fpu_desc->rev == 2)
1794 arm_fpu_attr = FPU_FPE2;
1795 else if (arm_fpu_desc->rev == 3)
1796 arm_fpu_attr = FPU_FPE3;
1798 arm_fpu_attr = FPU_FPA;
1801 case ARM_FP_MODEL_MAVERICK:
1802 arm_fpu_attr = FPU_MAVERICK;
1805 case ARM_FP_MODEL_VFP:
1806 arm_fpu_attr = FPU_VFP;
1813 if (TARGET_AAPCS_BASED
1814 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1815 error ("FPA is unsupported in the AAPCS");
1817 if (TARGET_AAPCS_BASED)
1819 if (TARGET_CALLER_INTERWORKING)
1820 error ("AAPCS does not support -mcaller-super-interworking");
1822 if (TARGET_CALLEE_INTERWORKING)
1823 error ("AAPCS does not support -mcallee-super-interworking");
1826 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1827 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1828 will ever exist. GCC makes no attempt to support this combination. */
1829 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1830 sorry ("iWMMXt and hardware floating point");
1832 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1833 if (TARGET_THUMB2 && TARGET_IWMMXT)
1834 sorry ("Thumb-2 iWMMXt");
1836 /* __fp16 support currently assumes the core has ldrh. */
1837 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1838 sorry ("__fp16 and no ldrh");
1840 /* If soft-float is specified then don't use FPU. */
1841 if (TARGET_SOFT_FLOAT)
1842 arm_fpu_attr = FPU_NONE;
1844 if (TARGET_AAPCS_BASED)
1846 if (arm_abi == ARM_ABI_IWMMXT)
1847 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1848 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1849 && TARGET_HARD_FLOAT
1851 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1853 arm_pcs_default = ARM_PCS_AAPCS;
1857 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1858 sorry ("-mfloat-abi=hard and VFP");
1860 if (arm_abi == ARM_ABI_APCS)
1861 arm_pcs_default = ARM_PCS_APCS;
1863 arm_pcs_default = ARM_PCS_ATPCS;
1866 /* For arm2/3 there is no need to do any scheduling if there is only
1867 a floating point emulator, or we are doing software floating-point. */
1868 if ((TARGET_SOFT_FLOAT
1869 || (TARGET_FPA && arm_fpu_desc->rev))
1870 && (tune_flags & FL_MODE32) == 0)
1871 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1873 /* Use the cp15 method if it is available. */
1874 if (target_thread_pointer == TP_AUTO)
1876 if (arm_arch6k && !TARGET_THUMB1)
1877 target_thread_pointer = TP_CP15;
1879 target_thread_pointer = TP_SOFT;
1882 if (TARGET_HARD_TP && TARGET_THUMB1)
1883 error ("can not use -mtp=cp15 with 16-bit Thumb");
1885 /* Override the default structure alignment for AAPCS ABI. */
1886 if (!global_options_set.x_arm_structure_size_boundary)
1888 if (TARGET_AAPCS_BASED)
1889 arm_structure_size_boundary = 8;
1893 if (arm_structure_size_boundary != 8
1894 && arm_structure_size_boundary != 32
1895 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1897 if (ARM_DOUBLEWORD_ALIGN)
1899 "structure size boundary can only be set to 8, 32 or 64");
1901 warning (0, "structure size boundary can only be set to 8 or 32");
1902 arm_structure_size_boundary
1903 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1907 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1909 error ("RTP PIC is incompatible with Thumb");
1913 /* If stack checking is disabled, we can use r10 as the PIC register,
1914 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1915 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1917 if (TARGET_VXWORKS_RTP)
1918 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1919 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1922 if (flag_pic && TARGET_VXWORKS_RTP)
1923 arm_pic_register = 9;
1925 if (arm_pic_register_string != NULL)
1927 int pic_register = decode_reg_name (arm_pic_register_string);
1930 warning (0, "-mpic-register= is useless without -fpic");
1932 /* Prevent the user from choosing an obviously stupid PIC register. */
1933 else if (pic_register < 0 || call_used_regs[pic_register]
1934 || pic_register == HARD_FRAME_POINTER_REGNUM
1935 || pic_register == STACK_POINTER_REGNUM
1936 || pic_register >= PC_REGNUM
1937 || (TARGET_VXWORKS_RTP
1938 && (unsigned int) pic_register != arm_pic_register))
1939 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1941 arm_pic_register = pic_register;
1944 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1945 if (fix_cm3_ldrd == 2)
1947 if (arm_selected_cpu->core == cortexm3)
1953 /* Enable -munaligned-access by default for
1954 - all ARMv6 architecture-based processors
1955 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1957 Disable -munaligned-access by default for
1958 - all pre-ARMv6 architecture-based processors
1959 - ARMv6-M architecture-based processors. */
1961 if (unaligned_access == 2)
1963 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1964 unaligned_access = 1;
1966 unaligned_access = 0;
1968 else if (unaligned_access == 1
1969 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1971 warning (0, "target CPU does not support unaligned accesses");
1972 unaligned_access = 0;
1975 if (TARGET_THUMB1 && flag_schedule_insns)
1977 /* Don't warn since it's on by default in -O2. */
1978 flag_schedule_insns = 0;
1983 /* If optimizing for size, bump the number of instructions that we
1984 are prepared to conditionally execute (even on a StrongARM). */
1985 max_insns_skipped = 6;
1988 max_insns_skipped = current_tune->max_insns_skipped;
1990 /* Hot/Cold partitioning is not currently supported, since we can't
1991 handle literal pool placement in that case. */
1992 if (flag_reorder_blocks_and_partition)
1994 inform (input_location,
1995 "-freorder-blocks-and-partition not supported on this architecture");
1996 flag_reorder_blocks_and_partition = 0;
1997 flag_reorder_blocks = 1;
2001 /* Hoisting PIC address calculations more aggressively provides a small,
2002 but measurable, size reduction for PIC code. Therefore, we decrease
2003 the bar for unrestricted expression hoisting to the cost of PIC address
2004 calculation, which is 2 instructions. */
2005 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2006 global_options.x_param_values,
2007 global_options_set.x_param_values);
2009 /* ARM EABI defaults to strict volatile bitfields. */
2010 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2011 && abi_version_at_least(2))
2012 flag_strict_volatile_bitfields = 1;
2014 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2015 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2016 if (flag_prefetch_loop_arrays < 0
2019 && current_tune->num_prefetch_slots > 0)
2020 flag_prefetch_loop_arrays = 1;
2022 /* Set up parameters to be used in prefetching algorithm. Do not override the
2023 defaults unless we are tuning for a core we have researched values for. */
2024 if (current_tune->num_prefetch_slots > 0)
2025 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2026 current_tune->num_prefetch_slots,
2027 global_options.x_param_values,
2028 global_options_set.x_param_values);
2029 if (current_tune->l1_cache_line_size >= 0)
2030 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2031 current_tune->l1_cache_line_size,
2032 global_options.x_param_values,
2033 global_options_set.x_param_values);
2034 if (current_tune->l1_cache_size >= 0)
2035 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2036 current_tune->l1_cache_size,
2037 global_options.x_param_values,
2038 global_options_set.x_param_values);
2040 /* Register global variables with the garbage collector. */
2041 arm_add_gc_roots ();
2045 arm_add_gc_roots (void)
2047 gcc_obstack_init(&minipool_obstack);
2048 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2051 /* A table of known ARM exception types.
2052 For use with the interrupt function attribute. */
2056 const char *const arg;
2057 const unsigned long return_value;
2061 static const isr_attribute_arg isr_attribute_args [] =
2063 { "IRQ", ARM_FT_ISR },
2064 { "irq", ARM_FT_ISR },
2065 { "FIQ", ARM_FT_FIQ },
2066 { "fiq", ARM_FT_FIQ },
2067 { "ABORT", ARM_FT_ISR },
2068 { "abort", ARM_FT_ISR },
2069 { "ABORT", ARM_FT_ISR },
2070 { "abort", ARM_FT_ISR },
2071 { "UNDEF", ARM_FT_EXCEPTION },
2072 { "undef", ARM_FT_EXCEPTION },
2073 { "SWI", ARM_FT_EXCEPTION },
2074 { "swi", ARM_FT_EXCEPTION },
2075 { NULL, ARM_FT_NORMAL }
2078 /* Returns the (interrupt) function type of the current
2079 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2081 static unsigned long
2082 arm_isr_value (tree argument)
2084 const isr_attribute_arg * ptr;
2088 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2090 /* No argument - default to IRQ. */
2091 if (argument == NULL_TREE)
2094 /* Get the value of the argument. */
2095 if (TREE_VALUE (argument) == NULL_TREE
2096 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2097 return ARM_FT_UNKNOWN;
2099 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2101 /* Check it against the list of known arguments. */
2102 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2103 if (streq (arg, ptr->arg))
2104 return ptr->return_value;
2106 /* An unrecognized interrupt type. */
2107 return ARM_FT_UNKNOWN;
2110 /* Computes the type of the current function. */
2112 static unsigned long
2113 arm_compute_func_type (void)
2115 unsigned long type = ARM_FT_UNKNOWN;
2119 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2121 /* Decide if the current function is volatile. Such functions
2122 never return, and many memory cycles can be saved by not storing
2123 register values that will never be needed again. This optimization
2124 was added to speed up context switching in a kernel application. */
2126 && (TREE_NOTHROW (current_function_decl)
2127 || !(flag_unwind_tables
2129 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2130 && TREE_THIS_VOLATILE (current_function_decl))
2131 type |= ARM_FT_VOLATILE;
2133 if (cfun->static_chain_decl != NULL)
2134 type |= ARM_FT_NESTED;
2136 attr = DECL_ATTRIBUTES (current_function_decl);
2138 a = lookup_attribute ("naked", attr);
2140 type |= ARM_FT_NAKED;
2142 a = lookup_attribute ("isr", attr);
2144 a = lookup_attribute ("interrupt", attr);
2147 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2149 type |= arm_isr_value (TREE_VALUE (a));
2154 /* Returns the type of the current function. */
2157 arm_current_func_type (void)
2159 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2160 cfun->machine->func_type = arm_compute_func_type ();
2162 return cfun->machine->func_type;
2166 arm_allocate_stack_slots_for_args (void)
2168 /* Naked functions should not allocate stack slots for arguments. */
2169 return !IS_NAKED (arm_current_func_type ());
2173 /* Output assembler code for a block containing the constant parts
2174 of a trampoline, leaving space for the variable parts.
2176 On the ARM, (if r8 is the static chain regnum, and remembering that
2177 referencing pc adds an offset of 8) the trampoline looks like:
2180 .word static chain value
2181 .word function's address
2182 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2185 arm_asm_trampoline_template (FILE *f)
2189 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2190 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2192 else if (TARGET_THUMB2)
2194 /* The Thumb-2 trampoline is similar to the arm implementation.
2195 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2196 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2197 STATIC_CHAIN_REGNUM, PC_REGNUM);
2198 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2202 ASM_OUTPUT_ALIGN (f, 2);
2203 fprintf (f, "\t.code\t16\n");
2204 fprintf (f, ".Ltrampoline_start:\n");
2205 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2206 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2207 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2208 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2209 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2210 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2212 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2213 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2216 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2219 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2221 rtx fnaddr, mem, a_tramp;
2223 emit_block_move (m_tramp, assemble_trampoline_template (),
2224 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2226 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2227 emit_move_insn (mem, chain_value);
2229 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2230 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2231 emit_move_insn (mem, fnaddr);
2233 a_tramp = XEXP (m_tramp, 0);
2234 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2235 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2236 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2239 /* Thumb trampolines should be entered in thumb mode, so set
2240 the bottom bit of the address. */
2243 arm_trampoline_adjust_address (rtx addr)
2246 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2247 NULL, 0, OPTAB_LIB_WIDEN);
2251 /* Return 1 if it is possible to return using a single instruction.
2252 If SIBLING is non-null, this is a test for a return before a sibling
2253 call. SIBLING is the call insn, so we can examine its register usage. */
2256 use_return_insn (int iscond, rtx sibling)
2259 unsigned int func_type;
2260 unsigned long saved_int_regs;
2261 unsigned HOST_WIDE_INT stack_adjust;
2262 arm_stack_offsets *offsets;
2264 /* Never use a return instruction before reload has run. */
2265 if (!reload_completed)
2268 func_type = arm_current_func_type ();
2270 /* Naked, volatile and stack alignment functions need special
2272 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2275 /* So do interrupt functions that use the frame pointer and Thumb
2276 interrupt functions. */
2277 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2280 offsets = arm_get_frame_offsets ();
2281 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2283 /* As do variadic functions. */
2284 if (crtl->args.pretend_args_size
2285 || cfun->machine->uses_anonymous_args
2286 /* Or if the function calls __builtin_eh_return () */
2287 || crtl->calls_eh_return
2288 /* Or if the function calls alloca */
2289 || cfun->calls_alloca
2290 /* Or if there is a stack adjustment. However, if the stack pointer
2291 is saved on the stack, we can use a pre-incrementing stack load. */
2292 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2293 && stack_adjust == 4)))
2296 saved_int_regs = offsets->saved_regs_mask;
2298 /* Unfortunately, the insn
2300 ldmib sp, {..., sp, ...}
2302 triggers a bug on most SA-110 based devices, such that the stack
2303 pointer won't be correctly restored if the instruction takes a
2304 page fault. We work around this problem by popping r3 along with
2305 the other registers, since that is never slower than executing
2306 another instruction.
2308 We test for !arm_arch5 here, because code for any architecture
2309 less than this could potentially be run on one of the buggy
2311 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2313 /* Validate that r3 is a call-clobbered register (always true in
2314 the default abi) ... */
2315 if (!call_used_regs[3])
2318 /* ... that it isn't being used for a return value ... */
2319 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2322 /* ... or for a tail-call argument ... */
2325 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2327 if (find_regno_fusage (sibling, USE, 3))
2331 /* ... and that there are no call-saved registers in r0-r2
2332 (always true in the default ABI). */
2333 if (saved_int_regs & 0x7)
2337 /* Can't be done if interworking with Thumb, and any registers have been
2339 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2342 /* On StrongARM, conditional returns are expensive if they aren't
2343 taken and multiple registers have been stacked. */
2344 if (iscond && arm_tune_strongarm)
2346 /* Conditional return when just the LR is stored is a simple
2347 conditional-load instruction, that's not expensive. */
2348 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2352 && arm_pic_register != INVALID_REGNUM
2353 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2357 /* If there are saved registers but the LR isn't saved, then we need
2358 two instructions for the return. */
2359 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2362 /* Can't be done if any of the FPA regs are pushed,
2363 since this also requires an insn. */
2364 if (TARGET_HARD_FLOAT && TARGET_FPA)
2365 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2366 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2369 /* Likewise VFP regs. */
2370 if (TARGET_HARD_FLOAT && TARGET_VFP)
2371 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2372 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2375 if (TARGET_REALLY_IWMMXT)
2376 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2377 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2383 /* Return TRUE if int I is a valid immediate ARM constant. */
2386 const_ok_for_arm (HOST_WIDE_INT i)
2390 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2391 be all zero, or all one. */
2392 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2393 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2394 != ((~(unsigned HOST_WIDE_INT) 0)
2395 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2398 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2400 /* Fast return for 0 and small values. We must do this for zero, since
2401 the code below can't handle that one case. */
2402 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2405 /* Get the number of trailing zeros. */
2406 lowbit = ffs((int) i) - 1;
2408 /* Only even shifts are allowed in ARM mode so round down to the
2409 nearest even number. */
2413 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2418 /* Allow rotated constants in ARM mode. */
2420 && ((i & ~0xc000003f) == 0
2421 || (i & ~0xf000000f) == 0
2422 || (i & ~0xfc000003) == 0))
2429 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2432 if (i == v || i == (v | (v << 8)))
2435 /* Allow repeated pattern 0xXY00XY00. */
2445 /* Return true if I is a valid constant for the operation CODE. */
2447 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2449 if (const_ok_for_arm (i))
2455 /* See if we can use movw. */
2456 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2459 /* Otherwise, try mvn. */
2460 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2463 /* See if we can use addw or subw. */
2465 && ((i & 0xfffff000) == 0
2466 || ((-i) & 0xfffff000) == 0))
2468 /* else fall through. */
2488 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2490 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2496 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2500 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2507 /* Emit a sequence of insns to handle a large constant.
2508 CODE is the code of the operation required, it can be any of SET, PLUS,
2509 IOR, AND, XOR, MINUS;
2510 MODE is the mode in which the operation is being performed;
2511 VAL is the integer to operate on;
2512 SOURCE is the other operand (a register, or a null-pointer for SET);
2513 SUBTARGETS means it is safe to create scratch registers if that will
2514 either produce a simpler sequence, or we will want to cse the values.
2515 Return value is the number of insns emitted. */
2517 /* ??? Tweak this for thumb2. */
2519 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2520 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2524 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2525 cond = COND_EXEC_TEST (PATTERN (insn));
2529 if (subtargets || code == SET
2530 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2531 && REGNO (target) != REGNO (source)))
2533 /* After arm_reorg has been called, we can't fix up expensive
2534 constants by pushing them into memory so we must synthesize
2535 them in-line, regardless of the cost. This is only likely to
2536 be more costly on chips that have load delay slots and we are
2537 compiling without running the scheduler (so no splitting
2538 occurred before the final instruction emission).
2540 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2542 if (!after_arm_reorg
2544 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2546 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2551 /* Currently SET is the only monadic value for CODE, all
2552 the rest are diadic. */
2553 if (TARGET_USE_MOVT)
2554 arm_emit_movpair (target, GEN_INT (val));
2556 emit_set_insn (target, GEN_INT (val));
2562 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2564 if (TARGET_USE_MOVT)
2565 arm_emit_movpair (temp, GEN_INT (val));
2567 emit_set_insn (temp, GEN_INT (val));
2569 /* For MINUS, the value is subtracted from, since we never
2570 have subtraction of a constant. */
2572 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2574 emit_set_insn (target,
2575 gen_rtx_fmt_ee (code, mode, source, temp));
2581 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2585 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2586 ARM/THUMB2 immediates, and add up to VAL.
2587 Thr function return value gives the number of insns required. */
2589 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2590 struct four_ints *return_sequence)
2592 int best_consecutive_zeros = 0;
2596 struct four_ints tmp_sequence;
2598 /* If we aren't targetting ARM, the best place to start is always at
2599 the bottom, otherwise look more closely. */
2602 for (i = 0; i < 32; i += 2)
2604 int consecutive_zeros = 0;
2606 if (!(val & (3 << i)))
2608 while ((i < 32) && !(val & (3 << i)))
2610 consecutive_zeros += 2;
2613 if (consecutive_zeros > best_consecutive_zeros)
2615 best_consecutive_zeros = consecutive_zeros;
2616 best_start = i - consecutive_zeros;
2623 /* So long as it won't require any more insns to do so, it's
2624 desirable to emit a small constant (in bits 0...9) in the last
2625 insn. This way there is more chance that it can be combined with
2626 a later addressing insn to form a pre-indexed load or store
2627 operation. Consider:
2629 *((volatile int *)0xe0000100) = 1;
2630 *((volatile int *)0xe0000110) = 2;
2632 We want this to wind up as:
2636 str rB, [rA, #0x100]
2638 str rB, [rA, #0x110]
2640 rather than having to synthesize both large constants from scratch.
2642 Therefore, we calculate how many insns would be required to emit
2643 the constant starting from `best_start', and also starting from
2644 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2645 yield a shorter sequence, we may as well use zero. */
2646 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2648 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2650 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2651 if (insns2 <= insns1)
2653 *return_sequence = tmp_sequence;
2661 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2663 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2664 struct four_ints *return_sequence, int i)
2666 int remainder = val & 0xffffffff;
2669 /* Try and find a way of doing the job in either two or three
2672 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2673 location. We start at position I. This may be the MSB, or
2674 optimial_immediate_sequence may have positioned it at the largest block
2675 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2676 wrapping around to the top of the word when we drop off the bottom.
2677 In the worst case this code should produce no more than four insns.
2679 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2680 constants, shifted to any arbitrary location. We should always start
2685 unsigned int b1, b2, b3, b4;
2686 unsigned HOST_WIDE_INT result;
2689 gcc_assert (insns < 4);
2694 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2695 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2698 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2699 /* We can use addw/subw for the last 12 bits. */
2703 /* Use an 8-bit shifted/rotated immediate. */
2707 result = remainder & ((0x0ff << end)
2708 | ((i < end) ? (0xff >> (32 - end))
2715 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2716 arbitrary shifts. */
2717 i -= TARGET_ARM ? 2 : 1;
2721 /* Next, see if we can do a better job with a thumb2 replicated
2724 We do it this way around to catch the cases like 0x01F001E0 where
2725 two 8-bit immediates would work, but a replicated constant would
2728 TODO: 16-bit constants that don't clear all the bits, but still win.
2729 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2732 b1 = (remainder & 0xff000000) >> 24;
2733 b2 = (remainder & 0x00ff0000) >> 16;
2734 b3 = (remainder & 0x0000ff00) >> 8;
2735 b4 = remainder & 0xff;
2739 /* The 8-bit immediate already found clears b1 (and maybe b2),
2740 but must leave b3 and b4 alone. */
2742 /* First try to find a 32-bit replicated constant that clears
2743 almost everything. We can assume that we can't do it in one,
2744 or else we wouldn't be here. */
2745 unsigned int tmp = b1 & b2 & b3 & b4;
2746 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2748 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2749 + (tmp == b3) + (tmp == b4);
2751 && (matching_bytes >= 3
2752 || (matching_bytes == 2
2753 && const_ok_for_op (remainder & ~tmp2, code))))
2755 /* At least 3 of the bytes match, and the fourth has at
2756 least as many bits set, or two of the bytes match
2757 and it will only require one more insn to finish. */
2765 /* Second, try to find a 16-bit replicated constant that can
2766 leave three of the bytes clear. If b2 or b4 is already
2767 zero, then we can. If the 8-bit from above would not
2768 clear b2 anyway, then we still win. */
2769 else if (b1 == b3 && (!b2 || !b4
2770 || (remainder & 0x00ff0000 & ~result)))
2772 result = remainder & 0xff00ff00;
2778 /* The 8-bit immediate already found clears b2 (and maybe b3)
2779 and we don't get here unless b1 is alredy clear, but it will
2780 leave b4 unchanged. */
2782 /* If we can clear b2 and b4 at once, then we win, since the
2783 8-bits couldn't possibly reach that far. */
2786 result = remainder & 0x00ff00ff;
2792 return_sequence->i[insns++] = result;
2793 remainder &= ~result;
2795 if (code == SET || code == MINUS)
2803 /* Emit an instruction with the indicated PATTERN. If COND is
2804 non-NULL, conditionalize the execution of the instruction on COND
2808 emit_constant_insn (rtx cond, rtx pattern)
2811 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2812 emit_insn (pattern);
2815 /* As above, but extra parameter GENERATE which, if clear, suppresses
2819 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2820 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2825 int final_invert = 0;
2827 int set_sign_bit_copies = 0;
2828 int clear_sign_bit_copies = 0;
2829 int clear_zero_bit_copies = 0;
2830 int set_zero_bit_copies = 0;
2831 int insns = 0, neg_insns, inv_insns;
2832 unsigned HOST_WIDE_INT temp1, temp2;
2833 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2834 struct four_ints *immediates;
2835 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2837 /* Find out which operations are safe for a given CODE. Also do a quick
2838 check for degenerate cases; these can occur when DImode operations
2851 if (remainder == 0xffffffff)
2854 emit_constant_insn (cond,
2855 gen_rtx_SET (VOIDmode, target,
2856 GEN_INT (ARM_SIGN_EXTEND (val))));
2862 if (reload_completed && rtx_equal_p (target, source))
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, target, source));
2876 emit_constant_insn (cond,
2877 gen_rtx_SET (VOIDmode, target, const0_rtx));
2880 if (remainder == 0xffffffff)
2882 if (reload_completed && rtx_equal_p (target, source))
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target, source));
2895 if (reload_completed && rtx_equal_p (target, source))
2898 emit_constant_insn (cond,
2899 gen_rtx_SET (VOIDmode, target, source));
2903 if (remainder == 0xffffffff)
2906 emit_constant_insn (cond,
2907 gen_rtx_SET (VOIDmode, target,
2908 gen_rtx_NOT (mode, source)));
2915 /* We treat MINUS as (val - source), since (source - val) is always
2916 passed as (source + (-val)). */
2920 emit_constant_insn (cond,
2921 gen_rtx_SET (VOIDmode, target,
2922 gen_rtx_NEG (mode, source)));
2925 if (const_ok_for_arm (val))
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, target,
2930 gen_rtx_MINUS (mode, GEN_INT (val),
2941 /* If we can do it in one insn get out quickly. */
2942 if (const_ok_for_op (val, code))
2945 emit_constant_insn (cond,
2946 gen_rtx_SET (VOIDmode, target,
2948 ? gen_rtx_fmt_ee (code, mode, source,
2954 /* Calculate a few attributes that may be useful for specific
2956 /* Count number of leading zeros. */
2957 for (i = 31; i >= 0; i--)
2959 if ((remainder & (1 << i)) == 0)
2960 clear_sign_bit_copies++;
2965 /* Count number of leading 1's. */
2966 for (i = 31; i >= 0; i--)
2968 if ((remainder & (1 << i)) != 0)
2969 set_sign_bit_copies++;
2974 /* Count number of trailing zero's. */
2975 for (i = 0; i <= 31; i++)
2977 if ((remainder & (1 << i)) == 0)
2978 clear_zero_bit_copies++;
2983 /* Count number of trailing 1's. */
2984 for (i = 0; i <= 31; i++)
2986 if ((remainder & (1 << i)) != 0)
2987 set_zero_bit_copies++;
2995 /* See if we can do this by sign_extending a constant that is known
2996 to be negative. This is a good, way of doing it, since the shift
2997 may well merge into a subsequent insn. */
2998 if (set_sign_bit_copies > 1)
3000 if (const_ok_for_arm
3001 (temp1 = ARM_SIGN_EXTEND (remainder
3002 << (set_sign_bit_copies - 1))))
3006 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3007 emit_constant_insn (cond,
3008 gen_rtx_SET (VOIDmode, new_src,
3010 emit_constant_insn (cond,
3011 gen_ashrsi3 (target, new_src,
3012 GEN_INT (set_sign_bit_copies - 1)));
3016 /* For an inverted constant, we will need to set the low bits,
3017 these will be shifted out of harm's way. */
3018 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3019 if (const_ok_for_arm (~temp1))
3023 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3024 emit_constant_insn (cond,
3025 gen_rtx_SET (VOIDmode, new_src,
3027 emit_constant_insn (cond,
3028 gen_ashrsi3 (target, new_src,
3029 GEN_INT (set_sign_bit_copies - 1)));
3035 /* See if we can calculate the value as the difference between two
3036 valid immediates. */
3037 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3039 int topshift = clear_sign_bit_copies & ~1;
3041 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3042 & (0xff000000 >> topshift));
3044 /* If temp1 is zero, then that means the 9 most significant
3045 bits of remainder were 1 and we've caused it to overflow.
3046 When topshift is 0 we don't need to do anything since we
3047 can borrow from 'bit 32'. */
3048 if (temp1 == 0 && topshift != 0)
3049 temp1 = 0x80000000 >> (topshift - 1);
3051 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3053 if (const_ok_for_arm (temp2))
3057 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3058 emit_constant_insn (cond,
3059 gen_rtx_SET (VOIDmode, new_src,
3061 emit_constant_insn (cond,
3062 gen_addsi3 (target, new_src,
3070 /* See if we can generate this by setting the bottom (or the top)
3071 16 bits, and then shifting these into the other half of the
3072 word. We only look for the simplest cases, to do more would cost
3073 too much. Be careful, however, not to generate this when the
3074 alternative would take fewer insns. */
3075 if (val & 0xffff0000)
3077 temp1 = remainder & 0xffff0000;
3078 temp2 = remainder & 0x0000ffff;
3080 /* Overlaps outside this range are best done using other methods. */
3081 for (i = 9; i < 24; i++)
3083 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3084 && !const_ok_for_arm (temp2))
3086 rtx new_src = (subtargets
3087 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3089 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3090 source, subtargets, generate);
3098 gen_rtx_ASHIFT (mode, source,
3105 /* Don't duplicate cases already considered. */
3106 for (i = 17; i < 24; i++)
3108 if (((temp1 | (temp1 >> i)) == remainder)
3109 && !const_ok_for_arm (temp1))
3111 rtx new_src = (subtargets
3112 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3114 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3115 source, subtargets, generate);
3120 gen_rtx_SET (VOIDmode, target,
3123 gen_rtx_LSHIFTRT (mode, source,
3134 /* If we have IOR or XOR, and the constant can be loaded in a
3135 single instruction, and we can find a temporary to put it in,
3136 then this can be done in two instructions instead of 3-4. */
3138 /* TARGET can't be NULL if SUBTARGETS is 0 */
3139 || (reload_completed && !reg_mentioned_p (target, source)))
3141 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3145 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, sub,
3150 emit_constant_insn (cond,
3151 gen_rtx_SET (VOIDmode, target,
3152 gen_rtx_fmt_ee (code, mode,
3163 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3164 and the remainder 0s for e.g. 0xfff00000)
3165 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3167 This can be done in 2 instructions by using shifts with mov or mvn.
3172 mvn r0, r0, lsr #12 */
3173 if (set_sign_bit_copies > 8
3174 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3178 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3179 rtx shift = GEN_INT (set_sign_bit_copies);
3183 gen_rtx_SET (VOIDmode, sub,
3185 gen_rtx_ASHIFT (mode,
3190 gen_rtx_SET (VOIDmode, target,
3192 gen_rtx_LSHIFTRT (mode, sub,
3199 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3201 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3203 For eg. r0 = r0 | 0xfff
3208 if (set_zero_bit_copies > 8
3209 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3213 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3214 rtx shift = GEN_INT (set_zero_bit_copies);
3218 gen_rtx_SET (VOIDmode, sub,
3220 gen_rtx_LSHIFTRT (mode,
3225 gen_rtx_SET (VOIDmode, target,
3227 gen_rtx_ASHIFT (mode, sub,
3233 /* This will never be reached for Thumb2 because orn is a valid
3234 instruction. This is for Thumb1 and the ARM 32 bit cases.
3236 x = y | constant (such that ~constant is a valid constant)
3238 x = ~(~y & ~constant).
3240 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3244 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3245 emit_constant_insn (cond,
3246 gen_rtx_SET (VOIDmode, sub,
3247 gen_rtx_NOT (mode, source)));
3250 sub = gen_reg_rtx (mode);
3251 emit_constant_insn (cond,
3252 gen_rtx_SET (VOIDmode, sub,
3253 gen_rtx_AND (mode, source,
3255 emit_constant_insn (cond,
3256 gen_rtx_SET (VOIDmode, target,
3257 gen_rtx_NOT (mode, sub)));
3264 /* See if two shifts will do 2 or more insn's worth of work. */
3265 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3267 HOST_WIDE_INT shift_mask = ((0xffffffff
3268 << (32 - clear_sign_bit_copies))
3271 if ((remainder | shift_mask) != 0xffffffff)
3275 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3276 insns = arm_gen_constant (AND, mode, cond,
3277 remainder | shift_mask,
3278 new_src, source, subtargets, 1);
3283 rtx targ = subtargets ? NULL_RTX : target;
3284 insns = arm_gen_constant (AND, mode, cond,
3285 remainder | shift_mask,
3286 targ, source, subtargets, 0);
3292 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3293 rtx shift = GEN_INT (clear_sign_bit_copies);
3295 emit_insn (gen_ashlsi3 (new_src, source, shift));
3296 emit_insn (gen_lshrsi3 (target, new_src, shift));
3302 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3304 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3306 if ((remainder | shift_mask) != 0xffffffff)
3310 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3312 insns = arm_gen_constant (AND, mode, cond,
3313 remainder | shift_mask,
3314 new_src, source, subtargets, 1);
3319 rtx targ = subtargets ? NULL_RTX : target;
3321 insns = arm_gen_constant (AND, mode, cond,
3322 remainder | shift_mask,
3323 targ, source, subtargets, 0);
3329 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3330 rtx shift = GEN_INT (clear_zero_bit_copies);
3332 emit_insn (gen_lshrsi3 (new_src, source, shift));
3333 emit_insn (gen_ashlsi3 (target, new_src, shift));
3345 /* Calculate what the instruction sequences would be if we generated it
3346 normally, negated, or inverted. */
3348 /* AND cannot be split into multiple insns, so invert and use BIC. */
3351 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3354 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3359 if (can_invert || final_invert)
3360 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3365 immediates = &pos_immediates;
3367 /* Is the negated immediate sequence more efficient? */
3368 if (neg_insns < insns && neg_insns <= inv_insns)
3371 immediates = &neg_immediates;
3376 /* Is the inverted immediate sequence more efficient?
3377 We must allow for an extra NOT instruction for XOR operations, although
3378 there is some chance that the final 'mvn' will get optimized later. */
3379 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3382 immediates = &inv_immediates;
3390 /* Now output the chosen sequence as instructions. */
3393 for (i = 0; i < insns; i++)
3395 rtx new_src, temp1_rtx;
3397 temp1 = immediates->i[i];
3399 if (code == SET || code == MINUS)
3400 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3401 else if ((final_invert || i < (insns - 1)) && subtargets)
3402 new_src = gen_reg_rtx (mode);
3408 else if (can_negate)
3411 temp1 = trunc_int_for_mode (temp1, mode);
3412 temp1_rtx = GEN_INT (temp1);
3416 else if (code == MINUS)
3417 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3419 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3421 emit_constant_insn (cond,
3422 gen_rtx_SET (VOIDmode, new_src,
3428 can_negate = can_invert;
3432 else if (code == MINUS)
3440 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3441 gen_rtx_NOT (mode, source)));
3448 /* Canonicalize a comparison so that we are more likely to recognize it.
3449 This can be done for a few constant compares, where we can make the
3450 immediate value easier to load. */
3453 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3455 enum machine_mode mode;
3456 unsigned HOST_WIDE_INT i, maxval;
3458 mode = GET_MODE (*op0);
3459 if (mode == VOIDmode)
3460 mode = GET_MODE (*op1);
3462 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3464 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3465 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3466 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3467 for GTU/LEU in Thumb mode. */
3472 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3474 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3477 if (code == GT || code == LE
3478 || (!TARGET_ARM && (code == GTU || code == LEU)))
3480 /* Missing comparison. First try to use an available
3482 if (GET_CODE (*op1) == CONST_INT)
3490 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3492 *op1 = GEN_INT (i + 1);
3493 return code == GT ? GE : LT;
3498 if (i != ~((unsigned HOST_WIDE_INT) 0)
3499 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3501 *op1 = GEN_INT (i + 1);
3502 return code == GTU ? GEU : LTU;
3510 /* If that did not work, reverse the condition. */
3514 return swap_condition (code);
3520 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3521 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3522 to facilitate possible combining with a cmp into 'ands'. */
3524 && GET_CODE (*op0) == ZERO_EXTEND
3525 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3526 && GET_MODE (XEXP (*op0, 0)) == QImode
3527 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3528 && subreg_lowpart_p (XEXP (*op0, 0))
3529 && *op1 == const0_rtx)
3530 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3533 /* Comparisons smaller than DImode. Only adjust comparisons against
3534 an out-of-range constant. */
3535 if (GET_CODE (*op1) != CONST_INT
3536 || const_ok_for_arm (INTVAL (*op1))
3537 || const_ok_for_arm (- INTVAL (*op1)))
3551 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3553 *op1 = GEN_INT (i + 1);
3554 return code == GT ? GE : LT;
3561 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3563 *op1 = GEN_INT (i - 1);
3564 return code == GE ? GT : LE;
3570 if (i != ~((unsigned HOST_WIDE_INT) 0)
3571 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3573 *op1 = GEN_INT (i + 1);
3574 return code == GTU ? GEU : LTU;
3581 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3583 *op1 = GEN_INT (i - 1);
3584 return code == GEU ? GTU : LEU;
3596 /* Define how to find the value returned by a function. */
3599 arm_function_value(const_tree type, const_tree func,
3600 bool outgoing ATTRIBUTE_UNUSED)
3602 enum machine_mode mode;
3603 int unsignedp ATTRIBUTE_UNUSED;
3604 rtx r ATTRIBUTE_UNUSED;
3606 mode = TYPE_MODE (type);
3608 if (TARGET_AAPCS_BASED)
3609 return aapcs_allocate_return_reg (mode, type, func);
3611 /* Promote integer types. */
3612 if (INTEGRAL_TYPE_P (type))
3613 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3615 /* Promotes small structs returned in a register to full-word size
3616 for big-endian AAPCS. */
3617 if (arm_return_in_msb (type))
3619 HOST_WIDE_INT size = int_size_in_bytes (type);
3620 if (size % UNITS_PER_WORD != 0)
3622 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3623 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3627 return arm_libcall_value_1 (mode);
3631 libcall_eq (const void *p1, const void *p2)
3633 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3637 libcall_hash (const void *p1)
3639 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3643 add_libcall (htab_t htab, rtx libcall)
3645 *htab_find_slot (htab, libcall, INSERT) = libcall;
3649 arm_libcall_uses_aapcs_base (const_rtx libcall)
3651 static bool init_done = false;
3652 static htab_t libcall_htab;
3658 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3660 add_libcall (libcall_htab,
3661 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3669 add_libcall (libcall_htab,
3670 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3671 add_libcall (libcall_htab,
3672 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3673 add_libcall (libcall_htab,
3674 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3675 add_libcall (libcall_htab,
3676 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3678 add_libcall (libcall_htab,
3679 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3680 add_libcall (libcall_htab,
3681 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3682 add_libcall (libcall_htab,
3683 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3684 add_libcall (libcall_htab,
3685 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3686 add_libcall (libcall_htab,
3687 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3688 add_libcall (libcall_htab,
3689 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3691 /* Values from double-precision helper functions are returned in core
3692 registers if the selected core only supports single-precision
3693 arithmetic, even if we are using the hard-float ABI. The same is
3694 true for single-precision helpers, but we will never be using the
3695 hard-float ABI on a CPU which doesn't support single-precision
3696 operations in hardware. */
3697 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3698 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3699 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3700 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3701 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3702 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3703 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3704 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3705 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3706 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3707 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3708 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3710 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3714 return libcall && htab_find (libcall_htab, libcall) != NULL;
3718 arm_libcall_value_1 (enum machine_mode mode)
3720 if (TARGET_AAPCS_BASED)
3721 return aapcs_libcall_value (mode);
3722 else if (TARGET_32BIT
3723 && TARGET_HARD_FLOAT_ABI
3725 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3726 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3727 else if (TARGET_32BIT
3728 && TARGET_HARD_FLOAT_ABI
3730 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3731 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3732 else if (TARGET_IWMMXT_ABI
3733 && arm_vector_mode_supported_p (mode))
3734 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3736 return gen_rtx_REG (mode, ARG_REGISTER (1));
3739 /* Define how to find the value returned by a library function
3740 assuming the value has mode MODE. */
3743 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3745 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3746 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3748 /* The following libcalls return their result in integer registers,
3749 even though they return a floating point value. */
3750 if (arm_libcall_uses_aapcs_base (libcall))
3751 return gen_rtx_REG (mode, ARG_REGISTER(1));
3755 return arm_libcall_value_1 (mode);
3758 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3761 arm_function_value_regno_p (const unsigned int regno)
3763 if (regno == ARG_REGISTER (1)
3765 && TARGET_AAPCS_BASED
3767 && TARGET_HARD_FLOAT
3768 && regno == FIRST_VFP_REGNUM)
3770 && TARGET_HARD_FLOAT_ABI
3772 && regno == FIRST_CIRRUS_FP_REGNUM)
3773 || (TARGET_IWMMXT_ABI
3774 && regno == FIRST_IWMMXT_REGNUM)
3776 && TARGET_HARD_FLOAT_ABI
3778 && regno == FIRST_FPA_REGNUM))
3784 /* Determine the amount of memory needed to store the possible return
3785 registers of an untyped call. */
3787 arm_apply_result_size (void)
3793 if (TARGET_HARD_FLOAT_ABI)
3799 if (TARGET_MAVERICK)
3802 if (TARGET_IWMMXT_ABI)
3809 /* Decide whether TYPE should be returned in memory (true)
3810 or in a register (false). FNTYPE is the type of the function making
3813 arm_return_in_memory (const_tree type, const_tree fntype)
3817 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3819 if (TARGET_AAPCS_BASED)
3821 /* Simple, non-aggregate types (ie not including vectors and
3822 complex) are always returned in a register (or registers).
3823 We don't care about which register here, so we can short-cut
3824 some of the detail. */
3825 if (!AGGREGATE_TYPE_P (type)
3826 && TREE_CODE (type) != VECTOR_TYPE
3827 && TREE_CODE (type) != COMPLEX_TYPE)
3830 /* Any return value that is no larger than one word can be
3832 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3835 /* Check any available co-processors to see if they accept the
3836 type as a register candidate (VFP, for example, can return
3837 some aggregates in consecutive registers). These aren't
3838 available if the call is variadic. */
3839 if (aapcs_select_return_coproc (type, fntype) >= 0)
3842 /* Vector values should be returned using ARM registers, not
3843 memory (unless they're over 16 bytes, which will break since
3844 we only have four call-clobbered registers to play with). */
3845 if (TREE_CODE (type) == VECTOR_TYPE)
3846 return (size < 0 || size > (4 * UNITS_PER_WORD));
3848 /* The rest go in memory. */
3852 if (TREE_CODE (type) == VECTOR_TYPE)
3853 return (size < 0 || size > (4 * UNITS_PER_WORD));
3855 if (!AGGREGATE_TYPE_P (type) &&
3856 (TREE_CODE (type) != VECTOR_TYPE))
3857 /* All simple types are returned in registers. */
3860 if (arm_abi != ARM_ABI_APCS)
3862 /* ATPCS and later return aggregate types in memory only if they are
3863 larger than a word (or are variable size). */
3864 return (size < 0 || size > UNITS_PER_WORD);
3867 /* For the arm-wince targets we choose to be compatible with Microsoft's
3868 ARM and Thumb compilers, which always return aggregates in memory. */
3870 /* All structures/unions bigger than one word are returned in memory.
3871 Also catch the case where int_size_in_bytes returns -1. In this case
3872 the aggregate is either huge or of variable size, and in either case
3873 we will want to return it via memory and not in a register. */
3874 if (size < 0 || size > UNITS_PER_WORD)
3877 if (TREE_CODE (type) == RECORD_TYPE)
3881 /* For a struct the APCS says that we only return in a register
3882 if the type is 'integer like' and every addressable element
3883 has an offset of zero. For practical purposes this means
3884 that the structure can have at most one non bit-field element
3885 and that this element must be the first one in the structure. */
3887 /* Find the first field, ignoring non FIELD_DECL things which will
3888 have been created by C++. */
3889 for (field = TYPE_FIELDS (type);
3890 field && TREE_CODE (field) != FIELD_DECL;
3891 field = DECL_CHAIN (field))
3895 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3897 /* Check that the first field is valid for returning in a register. */
3899 /* ... Floats are not allowed */
3900 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3903 /* ... Aggregates that are not themselves valid for returning in
3904 a register are not allowed. */
3905 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3908 /* Now check the remaining fields, if any. Only bitfields are allowed,
3909 since they are not addressable. */
3910 for (field = DECL_CHAIN (field);
3912 field = DECL_CHAIN (field))
3914 if (TREE_CODE (field) != FIELD_DECL)
3917 if (!DECL_BIT_FIELD_TYPE (field))
3924 if (TREE_CODE (type) == UNION_TYPE)
3928 /* Unions can be returned in registers if every element is
3929 integral, or can be returned in an integer register. */
3930 for (field = TYPE_FIELDS (type);
3932 field = DECL_CHAIN (field))
3934 if (TREE_CODE (field) != FIELD_DECL)
3937 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3940 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3946 #endif /* not ARM_WINCE */
3948 /* Return all other types in memory. */
3952 /* Indicate whether or not words of a double are in big-endian order. */
3955 arm_float_words_big_endian (void)
3957 if (TARGET_MAVERICK)
3960 /* For FPA, float words are always big-endian. For VFP, floats words
3961 follow the memory system mode. */
3969 return (TARGET_BIG_END ? 1 : 0);
3974 const struct pcs_attribute_arg
3978 } pcs_attribute_args[] =
3980 {"aapcs", ARM_PCS_AAPCS},
3981 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3983 /* We could recognize these, but changes would be needed elsewhere
3984 * to implement them. */
3985 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3986 {"atpcs", ARM_PCS_ATPCS},
3987 {"apcs", ARM_PCS_APCS},
3989 {NULL, ARM_PCS_UNKNOWN}
3993 arm_pcs_from_attribute (tree attr)
3995 const struct pcs_attribute_arg *ptr;
3998 /* Get the value of the argument. */
3999 if (TREE_VALUE (attr) == NULL_TREE
4000 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4001 return ARM_PCS_UNKNOWN;
4003 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4005 /* Check it against the list of known arguments. */
4006 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4007 if (streq (arg, ptr->arg))
4010 /* An unrecognized interrupt type. */
4011 return ARM_PCS_UNKNOWN;
4014 /* Get the PCS variant to use for this call. TYPE is the function's type
4015 specification, DECL is the specific declartion. DECL may be null if
4016 the call could be indirect or if this is a library call. */
4018 arm_get_pcs_model (const_tree type, const_tree decl)
4020 bool user_convention = false;
4021 enum arm_pcs user_pcs = arm_pcs_default;
4026 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4029 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4030 user_convention = true;
4033 if (TARGET_AAPCS_BASED)
4035 /* Detect varargs functions. These always use the base rules
4036 (no argument is ever a candidate for a co-processor
4038 bool base_rules = stdarg_p (type);
4040 if (user_convention)
4042 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4043 sorry ("non-AAPCS derived PCS variant");
4044 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4045 error ("variadic functions must use the base AAPCS variant");
4049 return ARM_PCS_AAPCS;
4050 else if (user_convention)
4052 else if (decl && flag_unit_at_a_time)
4054 /* Local functions never leak outside this compilation unit,
4055 so we are free to use whatever conventions are
4057 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4058 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4060 return ARM_PCS_AAPCS_LOCAL;
4063 else if (user_convention && user_pcs != arm_pcs_default)
4064 sorry ("PCS variant");
4066 /* For everything else we use the target's default. */
4067 return arm_pcs_default;
4072 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4073 const_tree fntype ATTRIBUTE_UNUSED,
4074 rtx libcall ATTRIBUTE_UNUSED,
4075 const_tree fndecl ATTRIBUTE_UNUSED)
4077 /* Record the unallocated VFP registers. */
4078 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4079 pcum->aapcs_vfp_reg_alloc = 0;
4082 /* Walk down the type tree of TYPE counting consecutive base elements.
4083 If *MODEP is VOIDmode, then set it to the first valid floating point
4084 type. If a non-floating point type is found, or if a floating point
4085 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4086 otherwise return the count in the sub-tree. */
4088 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4090 enum machine_mode mode;
4093 switch (TREE_CODE (type))
4096 mode = TYPE_MODE (type);
4097 if (mode != DFmode && mode != SFmode)
4100 if (*modep == VOIDmode)
4109 mode = TYPE_MODE (TREE_TYPE (type));
4110 if (mode != DFmode && mode != SFmode)
4113 if (*modep == VOIDmode)
4122 /* Use V2SImode and V4SImode as representatives of all 64-bit
4123 and 128-bit vector types, whether or not those modes are
4124 supported with the present options. */
4125 size = int_size_in_bytes (type);
4138 if (*modep == VOIDmode)
4141 /* Vector modes are considered to be opaque: two vectors are
4142 equivalent for the purposes of being homogeneous aggregates
4143 if they are the same size. */
4152 tree index = TYPE_DOMAIN (type);
4154 /* Can't handle incomplete types. */
4155 if (!COMPLETE_TYPE_P(type))
4158 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4161 || !TYPE_MAX_VALUE (index)
4162 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4163 || !TYPE_MIN_VALUE (index)
4164 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4168 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4169 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4171 /* There must be no padding. */
4172 if (!host_integerp (TYPE_SIZE (type), 1)
4173 || (tree_low_cst (TYPE_SIZE (type), 1)
4174 != count * GET_MODE_BITSIZE (*modep)))
4186 /* Can't handle incomplete types. */
4187 if (!COMPLETE_TYPE_P(type))
4190 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4192 if (TREE_CODE (field) != FIELD_DECL)
4195 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4201 /* There must be no padding. */
4202 if (!host_integerp (TYPE_SIZE (type), 1)
4203 || (tree_low_cst (TYPE_SIZE (type), 1)
4204 != count * GET_MODE_BITSIZE (*modep)))
4211 case QUAL_UNION_TYPE:
4213 /* These aren't very interesting except in a degenerate case. */
4218 /* Can't handle incomplete types. */
4219 if (!COMPLETE_TYPE_P(type))
4222 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4224 if (TREE_CODE (field) != FIELD_DECL)
4227 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4230 count = count > sub_count ? count : sub_count;
4233 /* There must be no padding. */
4234 if (!host_integerp (TYPE_SIZE (type), 1)
4235 || (tree_low_cst (TYPE_SIZE (type), 1)
4236 != count * GET_MODE_BITSIZE (*modep)))
4249 /* Return true if PCS_VARIANT should use VFP registers. */
4251 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4253 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4255 static bool seen_thumb1_vfp = false;
4257 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4259 sorry ("Thumb-1 hard-float VFP ABI");
4260 /* sorry() is not immediately fatal, so only display this once. */
4261 seen_thumb1_vfp = true;
4267 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4270 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4271 (TARGET_VFP_DOUBLE || !is_double));
4275 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4276 enum machine_mode mode, const_tree type,
4277 enum machine_mode *base_mode, int *count)
4279 enum machine_mode new_mode = VOIDmode;
4281 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4282 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4283 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4288 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4291 new_mode = (mode == DCmode ? DFmode : SFmode);
4293 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4295 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4297 if (ag_count > 0 && ag_count <= 4)
4306 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4309 *base_mode = new_mode;
4314 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4315 enum machine_mode mode, const_tree type)
4317 int count ATTRIBUTE_UNUSED;
4318 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4320 if (!use_vfp_abi (pcs_variant, false))
4322 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4327 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4330 if (!use_vfp_abi (pcum->pcs_variant, false))
4333 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4334 &pcum->aapcs_vfp_rmode,
4335 &pcum->aapcs_vfp_rcount);
4339 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4340 const_tree type ATTRIBUTE_UNUSED)
4342 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4343 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4346 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4347 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4349 pcum->aapcs_vfp_reg_alloc = mask << regno;
4350 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4353 int rcount = pcum->aapcs_vfp_rcount;
4355 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4359 /* Avoid using unsupported vector modes. */
4360 if (rmode == V2SImode)
4362 else if (rmode == V4SImode)
4369 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4370 for (i = 0; i < rcount; i++)
4372 rtx tmp = gen_rtx_REG (rmode,
4373 FIRST_VFP_REGNUM + regno + i * rshift);
4374 tmp = gen_rtx_EXPR_LIST
4376 GEN_INT (i * GET_MODE_SIZE (rmode)));
4377 XVECEXP (par, 0, i) = tmp;
4380 pcum->aapcs_reg = par;
4383 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4390 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4391 enum machine_mode mode,
4392 const_tree type ATTRIBUTE_UNUSED)
4394 if (!use_vfp_abi (pcs_variant, false))
4397 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4400 enum machine_mode ag_mode;
4405 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4410 if (ag_mode == V2SImode)
4412 else if (ag_mode == V4SImode)
4418 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4419 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4420 for (i = 0; i < count; i++)
4422 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4423 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4424 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4425 XVECEXP (par, 0, i) = tmp;
4431 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4435 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4436 enum machine_mode mode ATTRIBUTE_UNUSED,
4437 const_tree type ATTRIBUTE_UNUSED)
4439 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4440 pcum->aapcs_vfp_reg_alloc = 0;
4444 #define AAPCS_CP(X) \
4446 aapcs_ ## X ## _cum_init, \
4447 aapcs_ ## X ## _is_call_candidate, \
4448 aapcs_ ## X ## _allocate, \
4449 aapcs_ ## X ## _is_return_candidate, \
4450 aapcs_ ## X ## _allocate_return_reg, \
4451 aapcs_ ## X ## _advance \
4454 /* Table of co-processors that can be used to pass arguments in
4455 registers. Idealy no arugment should be a candidate for more than
4456 one co-processor table entry, but the table is processed in order
4457 and stops after the first match. If that entry then fails to put
4458 the argument into a co-processor register, the argument will go on
4462 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4463 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4465 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4466 BLKmode) is a candidate for this co-processor's registers; this
4467 function should ignore any position-dependent state in
4468 CUMULATIVE_ARGS and only use call-type dependent information. */
4469 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4471 /* Return true if the argument does get a co-processor register; it
4472 should set aapcs_reg to an RTX of the register allocated as is
4473 required for a return from FUNCTION_ARG. */
4474 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4476 /* Return true if a result of mode MODE (or type TYPE if MODE is
4477 BLKmode) is can be returned in this co-processor's registers. */
4478 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4480 /* Allocate and return an RTX element to hold the return type of a
4481 call, this routine must not fail and will only be called if
4482 is_return_candidate returned true with the same parameters. */
4483 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4485 /* Finish processing this argument and prepare to start processing
4487 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4488 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4496 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4501 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4502 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4509 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4511 /* We aren't passed a decl, so we can't check that a call is local.
4512 However, it isn't clear that that would be a win anyway, since it
4513 might limit some tail-calling opportunities. */
4514 enum arm_pcs pcs_variant;
4518 const_tree fndecl = NULL_TREE;
4520 if (TREE_CODE (fntype) == FUNCTION_DECL)
4523 fntype = TREE_TYPE (fntype);
4526 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4529 pcs_variant = arm_pcs_default;
4531 if (pcs_variant != ARM_PCS_AAPCS)
4535 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4536 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4545 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4548 /* We aren't passed a decl, so we can't check that a call is local.
4549 However, it isn't clear that that would be a win anyway, since it
4550 might limit some tail-calling opportunities. */
4551 enum arm_pcs pcs_variant;
4552 int unsignedp ATTRIBUTE_UNUSED;
4556 const_tree fndecl = NULL_TREE;
4558 if (TREE_CODE (fntype) == FUNCTION_DECL)
4561 fntype = TREE_TYPE (fntype);
4564 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4567 pcs_variant = arm_pcs_default;
4569 /* Promote integer types. */
4570 if (type && INTEGRAL_TYPE_P (type))
4571 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4573 if (pcs_variant != ARM_PCS_AAPCS)
4577 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4578 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4580 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4584 /* Promotes small structs returned in a register to full-word size
4585 for big-endian AAPCS. */
4586 if (type && arm_return_in_msb (type))
4588 HOST_WIDE_INT size = int_size_in_bytes (type);
4589 if (size % UNITS_PER_WORD != 0)
4591 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4592 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4596 return gen_rtx_REG (mode, R0_REGNUM);
4600 aapcs_libcall_value (enum machine_mode mode)
4602 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4603 && GET_MODE_SIZE (mode) <= 4)
4606 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4609 /* Lay out a function argument using the AAPCS rules. The rule
4610 numbers referred to here are those in the AAPCS. */
4612 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4613 const_tree type, bool named)
4618 /* We only need to do this once per argument. */
4619 if (pcum->aapcs_arg_processed)
4622 pcum->aapcs_arg_processed = true;
4624 /* Special case: if named is false then we are handling an incoming
4625 anonymous argument which is on the stack. */
4629 /* Is this a potential co-processor register candidate? */
4630 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4632 int slot = aapcs_select_call_coproc (pcum, mode, type);
4633 pcum->aapcs_cprc_slot = slot;
4635 /* We don't have to apply any of the rules from part B of the
4636 preparation phase, these are handled elsewhere in the
4641 /* A Co-processor register candidate goes either in its own
4642 class of registers or on the stack. */
4643 if (!pcum->aapcs_cprc_failed[slot])
4645 /* C1.cp - Try to allocate the argument to co-processor
4647 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4650 /* C2.cp - Put the argument on the stack and note that we
4651 can't assign any more candidates in this slot. We also
4652 need to note that we have allocated stack space, so that
4653 we won't later try to split a non-cprc candidate between
4654 core registers and the stack. */
4655 pcum->aapcs_cprc_failed[slot] = true;
4656 pcum->can_split = false;
4659 /* We didn't get a register, so this argument goes on the
4661 gcc_assert (pcum->can_split == false);
4666 /* C3 - For double-word aligned arguments, round the NCRN up to the
4667 next even number. */
4668 ncrn = pcum->aapcs_ncrn;
4669 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4672 nregs = ARM_NUM_REGS2(mode, type);
4674 /* Sigh, this test should really assert that nregs > 0, but a GCC
4675 extension allows empty structs and then gives them empty size; it
4676 then allows such a structure to be passed by value. For some of
4677 the code below we have to pretend that such an argument has
4678 non-zero size so that we 'locate' it correctly either in
4679 registers or on the stack. */
4680 gcc_assert (nregs >= 0);
4682 nregs2 = nregs ? nregs : 1;
4684 /* C4 - Argument fits entirely in core registers. */
4685 if (ncrn + nregs2 <= NUM_ARG_REGS)
4687 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4688 pcum->aapcs_next_ncrn = ncrn + nregs;
4692 /* C5 - Some core registers left and there are no arguments already
4693 on the stack: split this argument between the remaining core
4694 registers and the stack. */
4695 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4697 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4698 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4699 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4703 /* C6 - NCRN is set to 4. */
4704 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4706 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4710 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4711 for a call to a function whose data type is FNTYPE.
4712 For a library call, FNTYPE is NULL. */
4714 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4716 tree fndecl ATTRIBUTE_UNUSED)
4718 /* Long call handling. */
4720 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4722 pcum->pcs_variant = arm_pcs_default;
4724 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4726 if (arm_libcall_uses_aapcs_base (libname))
4727 pcum->pcs_variant = ARM_PCS_AAPCS;
4729 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4730 pcum->aapcs_reg = NULL_RTX;
4731 pcum->aapcs_partial = 0;
4732 pcum->aapcs_arg_processed = false;
4733 pcum->aapcs_cprc_slot = -1;
4734 pcum->can_split = true;
4736 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4740 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4742 pcum->aapcs_cprc_failed[i] = false;
4743 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4751 /* On the ARM, the offset starts at 0. */
4753 pcum->iwmmxt_nregs = 0;
4754 pcum->can_split = true;
4756 /* Varargs vectors are treated the same as long long.
4757 named_count avoids having to change the way arm handles 'named' */
4758 pcum->named_count = 0;
4761 if (TARGET_REALLY_IWMMXT && fntype)
4765 for (fn_arg = TYPE_ARG_TYPES (fntype);
4767 fn_arg = TREE_CHAIN (fn_arg))
4768 pcum->named_count += 1;
4770 if (! pcum->named_count)
4771 pcum->named_count = INT_MAX;
4776 /* Return true if mode/type need doubleword alignment. */
4778 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4780 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4781 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4785 /* Determine where to put an argument to a function.
4786 Value is zero to push the argument on the stack,
4787 or a hard register in which to store the argument.
4789 MODE is the argument's machine mode.
4790 TYPE is the data type of the argument (as a tree).
4791 This is null for libcalls where that information may
4793 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4794 the preceding args and about the function being called.
4795 NAMED is nonzero if this argument is a named parameter
4796 (otherwise it is an extra parameter matching an ellipsis).
4798 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4799 other arguments are passed on the stack. If (NAMED == 0) (which happens
4800 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4801 defined), say it is passed in the stack (function_prologue will
4802 indeed make it pass in the stack if necessary). */
4805 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4806 const_tree type, bool named)
4808 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4811 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4812 a call insn (op3 of a call_value insn). */
4813 if (mode == VOIDmode)
4816 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4818 aapcs_layout_arg (pcum, mode, type, named);
4819 return pcum->aapcs_reg;
4822 /* Varargs vectors are treated the same as long long.
4823 named_count avoids having to change the way arm handles 'named' */
4824 if (TARGET_IWMMXT_ABI
4825 && arm_vector_mode_supported_p (mode)
4826 && pcum->named_count > pcum->nargs + 1)
4828 if (pcum->iwmmxt_nregs <= 9)
4829 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4832 pcum->can_split = false;
4837 /* Put doubleword aligned quantities in even register pairs. */
4839 && ARM_DOUBLEWORD_ALIGN
4840 && arm_needs_doubleword_align (mode, type))
4843 /* Only allow splitting an arg between regs and memory if all preceding
4844 args were allocated to regs. For args passed by reference we only count
4845 the reference pointer. */
4846 if (pcum->can_split)
4849 nregs = ARM_NUM_REGS2 (mode, type);
4851 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4854 return gen_rtx_REG (mode, pcum->nregs);
4858 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4860 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4861 ? DOUBLEWORD_ALIGNMENT
4866 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4867 tree type, bool named)
4869 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4870 int nregs = pcum->nregs;
4872 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4874 aapcs_layout_arg (pcum, mode, type, named);
4875 return pcum->aapcs_partial;
4878 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4881 if (NUM_ARG_REGS > nregs
4882 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4884 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4889 /* Update the data in PCUM to advance over an argument
4890 of mode MODE and data type TYPE.
4891 (TYPE is null for libcalls where that information may not be available.) */
4894 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4895 const_tree type, bool named)
4897 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4899 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4901 aapcs_layout_arg (pcum, mode, type, named);
4903 if (pcum->aapcs_cprc_slot >= 0)
4905 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4907 pcum->aapcs_cprc_slot = -1;
4910 /* Generic stuff. */
4911 pcum->aapcs_arg_processed = false;
4912 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4913 pcum->aapcs_reg = NULL_RTX;
4914 pcum->aapcs_partial = 0;
4919 if (arm_vector_mode_supported_p (mode)
4920 && pcum->named_count > pcum->nargs
4921 && TARGET_IWMMXT_ABI)
4922 pcum->iwmmxt_nregs += 1;
4924 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4928 /* Variable sized types are passed by reference. This is a GCC
4929 extension to the ARM ABI. */
4932 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4933 enum machine_mode mode ATTRIBUTE_UNUSED,
4934 const_tree type, bool named ATTRIBUTE_UNUSED)
4936 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4939 /* Encode the current state of the #pragma [no_]long_calls. */
4942 OFF, /* No #pragma [no_]long_calls is in effect. */
4943 LONG, /* #pragma long_calls is in effect. */
4944 SHORT /* #pragma no_long_calls is in effect. */
4947 static arm_pragma_enum arm_pragma_long_calls = OFF;
4950 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4952 arm_pragma_long_calls = LONG;
4956 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4958 arm_pragma_long_calls = SHORT;
4962 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4964 arm_pragma_long_calls = OFF;
4967 /* Handle an attribute requiring a FUNCTION_DECL;
4968 arguments as in struct attribute_spec.handler. */
4970 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4971 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4973 if (TREE_CODE (*node) != FUNCTION_DECL)
4975 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4977 *no_add_attrs = true;
4983 /* Handle an "interrupt" or "isr" attribute;
4984 arguments as in struct attribute_spec.handler. */
4986 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4991 if (TREE_CODE (*node) != FUNCTION_DECL)
4993 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4995 *no_add_attrs = true;
4997 /* FIXME: the argument if any is checked for type attributes;
4998 should it be checked for decl ones? */
5002 if (TREE_CODE (*node) == FUNCTION_TYPE
5003 || TREE_CODE (*node) == METHOD_TYPE)
5005 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5007 warning (OPT_Wattributes, "%qE attribute ignored",
5009 *no_add_attrs = true;
5012 else if (TREE_CODE (*node) == POINTER_TYPE
5013 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5014 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5015 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5017 *node = build_variant_type_copy (*node);
5018 TREE_TYPE (*node) = build_type_attribute_variant
5020 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5021 *no_add_attrs = true;
5025 /* Possibly pass this attribute on from the type to a decl. */
5026 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5027 | (int) ATTR_FLAG_FUNCTION_NEXT
5028 | (int) ATTR_FLAG_ARRAY_NEXT))
5030 *no_add_attrs = true;
5031 return tree_cons (name, args, NULL_TREE);
5035 warning (OPT_Wattributes, "%qE attribute ignored",
5044 /* Handle a "pcs" attribute; arguments as in struct
5045 attribute_spec.handler. */
5047 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5048 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5050 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5052 warning (OPT_Wattributes, "%qE attribute ignored", name);
5053 *no_add_attrs = true;
5058 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5059 /* Handle the "notshared" attribute. This attribute is another way of
5060 requesting hidden visibility. ARM's compiler supports
5061 "__declspec(notshared)"; we support the same thing via an
5065 arm_handle_notshared_attribute (tree *node,
5066 tree name ATTRIBUTE_UNUSED,
5067 tree args ATTRIBUTE_UNUSED,
5068 int flags ATTRIBUTE_UNUSED,
5071 tree decl = TYPE_NAME (*node);
5075 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5076 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5077 *no_add_attrs = false;
5083 /* Return 0 if the attributes for two types are incompatible, 1 if they
5084 are compatible, and 2 if they are nearly compatible (which causes a
5085 warning to be generated). */
5087 arm_comp_type_attributes (const_tree type1, const_tree type2)
5091 /* Check for mismatch of non-default calling convention. */
5092 if (TREE_CODE (type1) != FUNCTION_TYPE)
5095 /* Check for mismatched call attributes. */
5096 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5097 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5098 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5099 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5101 /* Only bother to check if an attribute is defined. */
5102 if (l1 | l2 | s1 | s2)
5104 /* If one type has an attribute, the other must have the same attribute. */
5105 if ((l1 != l2) || (s1 != s2))
5108 /* Disallow mixed attributes. */
5109 if ((l1 & s2) || (l2 & s1))
5113 /* Check for mismatched ISR attribute. */
5114 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5116 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5117 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5119 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5126 /* Assigns default attributes to newly defined type. This is used to
5127 set short_call/long_call attributes for function types of
5128 functions defined inside corresponding #pragma scopes. */
5130 arm_set_default_type_attributes (tree type)
5132 /* Add __attribute__ ((long_call)) to all functions, when
5133 inside #pragma long_calls or __attribute__ ((short_call)),
5134 when inside #pragma no_long_calls. */
5135 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5137 tree type_attr_list, attr_name;
5138 type_attr_list = TYPE_ATTRIBUTES (type);
5140 if (arm_pragma_long_calls == LONG)
5141 attr_name = get_identifier ("long_call");
5142 else if (arm_pragma_long_calls == SHORT)
5143 attr_name = get_identifier ("short_call");
5147 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5148 TYPE_ATTRIBUTES (type) = type_attr_list;
5152 /* Return true if DECL is known to be linked into section SECTION. */
5155 arm_function_in_section_p (tree decl, section *section)
5157 /* We can only be certain about functions defined in the same
5158 compilation unit. */
5159 if (!TREE_STATIC (decl))
5162 /* Make sure that SYMBOL always binds to the definition in this
5163 compilation unit. */
5164 if (!targetm.binds_local_p (decl))
5167 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5168 if (!DECL_SECTION_NAME (decl))
5170 /* Make sure that we will not create a unique section for DECL. */
5171 if (flag_function_sections || DECL_ONE_ONLY (decl))
5175 return function_section (decl) == section;
5178 /* Return nonzero if a 32-bit "long_call" should be generated for
5179 a call from the current function to DECL. We generate a long_call
5182 a. has an __attribute__((long call))
5183 or b. is within the scope of a #pragma long_calls
5184 or c. the -mlong-calls command line switch has been specified
5186 However we do not generate a long call if the function:
5188 d. has an __attribute__ ((short_call))
5189 or e. is inside the scope of a #pragma no_long_calls
5190 or f. is defined in the same section as the current function. */
5193 arm_is_long_call_p (tree decl)
5198 return TARGET_LONG_CALLS;
5200 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5201 if (lookup_attribute ("short_call", attrs))
5204 /* For "f", be conservative, and only cater for cases in which the
5205 whole of the current function is placed in the same section. */
5206 if (!flag_reorder_blocks_and_partition
5207 && TREE_CODE (decl) == FUNCTION_DECL
5208 && arm_function_in_section_p (decl, current_function_section ()))
5211 if (lookup_attribute ("long_call", attrs))
5214 return TARGET_LONG_CALLS;
5217 /* Return nonzero if it is ok to make a tail-call to DECL. */
5219 arm_function_ok_for_sibcall (tree decl, tree exp)
5221 unsigned long func_type;
5223 if (cfun->machine->sibcall_blocked)
5226 /* Never tailcall something for which we have no decl, or if we
5227 are generating code for Thumb-1. */
5228 if (decl == NULL || TARGET_THUMB1)
5231 /* The PIC register is live on entry to VxWorks PLT entries, so we
5232 must make the call before restoring the PIC register. */
5233 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5236 /* Cannot tail-call to long calls, since these are out of range of
5237 a branch instruction. */
5238 if (arm_is_long_call_p (decl))
5241 /* If we are interworking and the function is not declared static
5242 then we can't tail-call it unless we know that it exists in this
5243 compilation unit (since it might be a Thumb routine). */
5244 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5247 func_type = arm_current_func_type ();
5248 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5249 if (IS_INTERRUPT (func_type))
5252 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5254 /* Check that the return value locations are the same. For
5255 example that we aren't returning a value from the sibling in
5256 a VFP register but then need to transfer it to a core
5260 a = arm_function_value (TREE_TYPE (exp), decl, false);
5261 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5263 if (!rtx_equal_p (a, b))
5267 /* Never tailcall if function may be called with a misaligned SP. */
5268 if (IS_STACKALIGN (func_type))
5271 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5272 references should become a NOP. Don't convert such calls into
5274 if (TARGET_AAPCS_BASED
5275 && arm_abi == ARM_ABI_AAPCS
5276 && DECL_WEAK (decl))
5279 /* Everything else is ok. */
5284 /* Addressing mode support functions. */
5286 /* Return nonzero if X is a legitimate immediate operand when compiling
5287 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5289 legitimate_pic_operand_p (rtx x)
5291 if (GET_CODE (x) == SYMBOL_REF
5292 || (GET_CODE (x) == CONST
5293 && GET_CODE (XEXP (x, 0)) == PLUS
5294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5300 /* Record that the current function needs a PIC register. Initialize
5301 cfun->machine->pic_reg if we have not already done so. */
5304 require_pic_register (void)
5306 /* A lot of the logic here is made obscure by the fact that this
5307 routine gets called as part of the rtx cost estimation process.
5308 We don't want those calls to affect any assumptions about the real
5309 function; and further, we can't call entry_of_function() until we
5310 start the real expansion process. */
5311 if (!crtl->uses_pic_offset_table)
5313 gcc_assert (can_create_pseudo_p ());
5314 if (arm_pic_register != INVALID_REGNUM)
5316 if (!cfun->machine->pic_reg)
5317 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5319 /* Play games to avoid marking the function as needing pic
5320 if we are being called as part of the cost-estimation
5322 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5323 crtl->uses_pic_offset_table = 1;
5329 if (!cfun->machine->pic_reg)
5330 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5332 /* Play games to avoid marking the function as needing pic
5333 if we are being called as part of the cost-estimation
5335 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5337 crtl->uses_pic_offset_table = 1;
5340 arm_load_pic_register (0UL);
5345 for (insn = seq; insn; insn = NEXT_INSN (insn))
5347 INSN_LOCATOR (insn) = prologue_locator;
5349 /* We can be called during expansion of PHI nodes, where
5350 we can't yet emit instructions directly in the final
5351 insn stream. Queue the insns on the entry edge, they will
5352 be committed after everything else is expanded. */
5353 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5360 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5362 if (GET_CODE (orig) == SYMBOL_REF
5363 || GET_CODE (orig) == LABEL_REF)
5369 gcc_assert (can_create_pseudo_p ());
5370 reg = gen_reg_rtx (Pmode);
5373 /* VxWorks does not impose a fixed gap between segments; the run-time
5374 gap can be different from the object-file gap. We therefore can't
5375 use GOTOFF unless we are absolutely sure that the symbol is in the
5376 same segment as the GOT. Unfortunately, the flexibility of linker
5377 scripts means that we can't be sure of that in general, so assume
5378 that GOTOFF is never valid on VxWorks. */
5379 if ((GET_CODE (orig) == LABEL_REF
5380 || (GET_CODE (orig) == SYMBOL_REF &&
5381 SYMBOL_REF_LOCAL_P (orig)))
5383 && !TARGET_VXWORKS_RTP)
5384 insn = arm_pic_static_addr (orig, reg);
5390 /* If this function doesn't have a pic register, create one now. */
5391 require_pic_register ();
5393 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5395 /* Make the MEM as close to a constant as possible. */
5396 mem = SET_SRC (pat);
5397 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5398 MEM_READONLY_P (mem) = 1;
5399 MEM_NOTRAP_P (mem) = 1;
5401 insn = emit_insn (pat);
5404 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5406 set_unique_reg_note (insn, REG_EQUAL, orig);
5410 else if (GET_CODE (orig) == CONST)
5414 if (GET_CODE (XEXP (orig, 0)) == PLUS
5415 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5418 /* Handle the case where we have: const (UNSPEC_TLS). */
5419 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5420 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5423 /* Handle the case where we have:
5424 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5426 if (GET_CODE (XEXP (orig, 0)) == PLUS
5427 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5428 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5430 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5436 gcc_assert (can_create_pseudo_p ());
5437 reg = gen_reg_rtx (Pmode);
5440 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5442 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5443 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5444 base == reg ? 0 : reg);
5446 if (GET_CODE (offset) == CONST_INT)
5448 /* The base register doesn't really matter, we only want to
5449 test the index for the appropriate mode. */
5450 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5452 gcc_assert (can_create_pseudo_p ());
5453 offset = force_reg (Pmode, offset);
5456 if (GET_CODE (offset) == CONST_INT)
5457 return plus_constant (base, INTVAL (offset));
5460 if (GET_MODE_SIZE (mode) > 4
5461 && (GET_MODE_CLASS (mode) == MODE_INT
5462 || TARGET_SOFT_FLOAT))
5464 emit_insn (gen_addsi3 (reg, base, offset));
5468 return gen_rtx_PLUS (Pmode, base, offset);
5475 /* Find a spare register to use during the prolog of a function. */
5478 thumb_find_work_register (unsigned long pushed_regs_mask)
5482 /* Check the argument registers first as these are call-used. The
5483 register allocation order means that sometimes r3 might be used
5484 but earlier argument registers might not, so check them all. */
5485 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5486 if (!df_regs_ever_live_p (reg))
5489 /* Before going on to check the call-saved registers we can try a couple
5490 more ways of deducing that r3 is available. The first is when we are
5491 pushing anonymous arguments onto the stack and we have less than 4
5492 registers worth of fixed arguments(*). In this case r3 will be part of
5493 the variable argument list and so we can be sure that it will be
5494 pushed right at the start of the function. Hence it will be available
5495 for the rest of the prologue.
5496 (*): ie crtl->args.pretend_args_size is greater than 0. */
5497 if (cfun->machine->uses_anonymous_args
5498 && crtl->args.pretend_args_size > 0)
5499 return LAST_ARG_REGNUM;
5501 /* The other case is when we have fixed arguments but less than 4 registers
5502 worth. In this case r3 might be used in the body of the function, but
5503 it is not being used to convey an argument into the function. In theory
5504 we could just check crtl->args.size to see how many bytes are
5505 being passed in argument registers, but it seems that it is unreliable.
5506 Sometimes it will have the value 0 when in fact arguments are being
5507 passed. (See testcase execute/20021111-1.c for an example). So we also
5508 check the args_info.nregs field as well. The problem with this field is
5509 that it makes no allowances for arguments that are passed to the
5510 function but which are not used. Hence we could miss an opportunity
5511 when a function has an unused argument in r3. But it is better to be
5512 safe than to be sorry. */
5513 if (! cfun->machine->uses_anonymous_args
5514 && crtl->args.size >= 0
5515 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5516 && crtl->args.info.nregs < 4)
5517 return LAST_ARG_REGNUM;
5519 /* Otherwise look for a call-saved register that is going to be pushed. */
5520 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5521 if (pushed_regs_mask & (1 << reg))
5526 /* Thumb-2 can use high regs. */
5527 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5528 if (pushed_regs_mask & (1 << reg))
5531 /* Something went wrong - thumb_compute_save_reg_mask()
5532 should have arranged for a suitable register to be pushed. */
5536 static GTY(()) int pic_labelno;
5538 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5542 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5544 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5546 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5549 gcc_assert (flag_pic);
5551 pic_reg = cfun->machine->pic_reg;
5552 if (TARGET_VXWORKS_RTP)
5554 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5555 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5556 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5558 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5560 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5561 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5565 /* We use an UNSPEC rather than a LABEL_REF because this label
5566 never appears in the code stream. */
5568 labelno = GEN_INT (pic_labelno++);
5569 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5570 l1 = gen_rtx_CONST (VOIDmode, l1);
5572 /* On the ARM the PC register contains 'dot + 8' at the time of the
5573 addition, on the Thumb it is 'dot + 4'. */
5574 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5575 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5577 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5581 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5583 else /* TARGET_THUMB1 */
5585 if (arm_pic_register != INVALID_REGNUM
5586 && REGNO (pic_reg) > LAST_LO_REGNUM)
5588 /* We will have pushed the pic register, so we should always be
5589 able to find a work register. */
5590 pic_tmp = gen_rtx_REG (SImode,
5591 thumb_find_work_register (saved_regs));
5592 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5593 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5594 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5597 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5601 /* Need to emit this whether or not we obey regdecls,
5602 since setjmp/longjmp can cause life info to screw up. */
5606 /* Generate code to load the address of a static var when flag_pic is set. */
5608 arm_pic_static_addr (rtx orig, rtx reg)
5610 rtx l1, labelno, offset_rtx, insn;
5612 gcc_assert (flag_pic);
5614 /* We use an UNSPEC rather than a LABEL_REF because this label
5615 never appears in the code stream. */
5616 labelno = GEN_INT (pic_labelno++);
5617 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5618 l1 = gen_rtx_CONST (VOIDmode, l1);
5620 /* On the ARM the PC register contains 'dot + 8' at the time of the
5621 addition, on the Thumb it is 'dot + 4'. */
5622 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5623 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5624 UNSPEC_SYMBOL_OFFSET);
5625 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5627 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5631 /* Return nonzero if X is valid as an ARM state addressing register. */
5633 arm_address_register_rtx_p (rtx x, int strict_p)
5637 if (GET_CODE (x) != REG)
5643 return ARM_REGNO_OK_FOR_BASE_P (regno);
5645 return (regno <= LAST_ARM_REGNUM
5646 || regno >= FIRST_PSEUDO_REGISTER
5647 || regno == FRAME_POINTER_REGNUM
5648 || regno == ARG_POINTER_REGNUM);
5651 /* Return TRUE if this rtx is the difference of a symbol and a label,
5652 and will reduce to a PC-relative relocation in the object file.
5653 Expressions like this can be left alone when generating PIC, rather
5654 than forced through the GOT. */
5656 pcrel_constant_p (rtx x)
5658 if (GET_CODE (x) == MINUS)
5659 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5664 /* Return true if X will surely end up in an index register after next
5667 will_be_in_index_register (const_rtx x)
5669 /* arm.md: calculate_pic_address will split this into a register. */
5670 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5673 /* Return nonzero if X is a valid ARM state address operand. */
5675 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5679 enum rtx_code code = GET_CODE (x);
5681 if (arm_address_register_rtx_p (x, strict_p))
5684 use_ldrd = (TARGET_LDRD
5686 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5688 if (code == POST_INC || code == PRE_DEC
5689 || ((code == PRE_INC || code == POST_DEC)
5690 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5691 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5693 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5694 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5695 && GET_CODE (XEXP (x, 1)) == PLUS
5696 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5698 rtx addend = XEXP (XEXP (x, 1), 1);
5700 /* Don't allow ldrd post increment by register because it's hard
5701 to fixup invalid register choices. */
5703 && GET_CODE (x) == POST_MODIFY
5704 && GET_CODE (addend) == REG)
5707 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5708 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5711 /* After reload constants split into minipools will have addresses
5712 from a LABEL_REF. */
5713 else if (reload_completed
5714 && (code == LABEL_REF
5716 && GET_CODE (XEXP (x, 0)) == PLUS
5717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5718 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5721 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5724 else if (code == PLUS)
5726 rtx xop0 = XEXP (x, 0);
5727 rtx xop1 = XEXP (x, 1);
5729 return ((arm_address_register_rtx_p (xop0, strict_p)
5730 && ((GET_CODE(xop1) == CONST_INT
5731 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5732 || (!strict_p && will_be_in_index_register (xop1))))
5733 || (arm_address_register_rtx_p (xop1, strict_p)
5734 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5738 /* Reload currently can't handle MINUS, so disable this for now */
5739 else if (GET_CODE (x) == MINUS)
5741 rtx xop0 = XEXP (x, 0);
5742 rtx xop1 = XEXP (x, 1);
5744 return (arm_address_register_rtx_p (xop0, strict_p)
5745 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5749 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5750 && code == SYMBOL_REF
5751 && CONSTANT_POOL_ADDRESS_P (x)
5753 && symbol_mentioned_p (get_pool_constant (x))
5754 && ! pcrel_constant_p (get_pool_constant (x))))
5760 /* Return nonzero if X is a valid Thumb-2 address operand. */
5762 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5765 enum rtx_code code = GET_CODE (x);
5767 if (arm_address_register_rtx_p (x, strict_p))
5770 use_ldrd = (TARGET_LDRD
5772 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5774 if (code == POST_INC || code == PRE_DEC
5775 || ((code == PRE_INC || code == POST_DEC)
5776 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5777 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5779 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5780 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5781 && GET_CODE (XEXP (x, 1)) == PLUS
5782 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5784 /* Thumb-2 only has autoincrement by constant. */
5785 rtx addend = XEXP (XEXP (x, 1), 1);
5786 HOST_WIDE_INT offset;
5788 if (GET_CODE (addend) != CONST_INT)
5791 offset = INTVAL(addend);
5792 if (GET_MODE_SIZE (mode) <= 4)
5793 return (offset > -256 && offset < 256);
5795 return (use_ldrd && offset > -1024 && offset < 1024
5796 && (offset & 3) == 0);
5799 /* After reload constants split into minipools will have addresses
5800 from a LABEL_REF. */
5801 else if (reload_completed
5802 && (code == LABEL_REF
5804 && GET_CODE (XEXP (x, 0)) == PLUS
5805 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5809 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5812 else if (code == PLUS)
5814 rtx xop0 = XEXP (x, 0);
5815 rtx xop1 = XEXP (x, 1);
5817 return ((arm_address_register_rtx_p (xop0, strict_p)
5818 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5819 || (!strict_p && will_be_in_index_register (xop1))))
5820 || (arm_address_register_rtx_p (xop1, strict_p)
5821 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5824 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5825 && code == SYMBOL_REF
5826 && CONSTANT_POOL_ADDRESS_P (x)
5828 && symbol_mentioned_p (get_pool_constant (x))
5829 && ! pcrel_constant_p (get_pool_constant (x))))
5835 /* Return nonzero if INDEX is valid for an address index operand in
5838 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5841 HOST_WIDE_INT range;
5842 enum rtx_code code = GET_CODE (index);
5844 /* Standard coprocessor addressing modes. */
5845 if (TARGET_HARD_FLOAT
5846 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5847 && (mode == SFmode || mode == DFmode
5848 || (TARGET_MAVERICK && mode == DImode)))
5849 return (code == CONST_INT && INTVAL (index) < 1024
5850 && INTVAL (index) > -1024
5851 && (INTVAL (index) & 3) == 0);
5853 /* For quad modes, we restrict the constant offset to be slightly less
5854 than what the instruction format permits. We do this because for
5855 quad mode moves, we will actually decompose them into two separate
5856 double-mode reads or writes. INDEX must therefore be a valid
5857 (double-mode) offset and so should INDEX+8. */
5858 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5859 return (code == CONST_INT
5860 && INTVAL (index) < 1016
5861 && INTVAL (index) > -1024
5862 && (INTVAL (index) & 3) == 0);
5864 /* We have no such constraint on double mode offsets, so we permit the
5865 full range of the instruction format. */
5866 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5867 return (code == CONST_INT
5868 && INTVAL (index) < 1024
5869 && INTVAL (index) > -1024
5870 && (INTVAL (index) & 3) == 0);
5872 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5873 return (code == CONST_INT
5874 && INTVAL (index) < 1024
5875 && INTVAL (index) > -1024
5876 && (INTVAL (index) & 3) == 0);
5878 if (arm_address_register_rtx_p (index, strict_p)
5879 && (GET_MODE_SIZE (mode) <= 4))
5882 if (mode == DImode || mode == DFmode)
5884 if (code == CONST_INT)
5886 HOST_WIDE_INT val = INTVAL (index);
5889 return val > -256 && val < 256;
5891 return val > -4096 && val < 4092;
5894 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5897 if (GET_MODE_SIZE (mode) <= 4
5901 || (mode == QImode && outer == SIGN_EXTEND))))
5905 rtx xiop0 = XEXP (index, 0);
5906 rtx xiop1 = XEXP (index, 1);
5908 return ((arm_address_register_rtx_p (xiop0, strict_p)
5909 && power_of_two_operand (xiop1, SImode))
5910 || (arm_address_register_rtx_p (xiop1, strict_p)
5911 && power_of_two_operand (xiop0, SImode)));
5913 else if (code == LSHIFTRT || code == ASHIFTRT
5914 || code == ASHIFT || code == ROTATERT)
5916 rtx op = XEXP (index, 1);
5918 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5919 && GET_CODE (op) == CONST_INT
5921 && INTVAL (op) <= 31);
5925 /* For ARM v4 we may be doing a sign-extend operation during the
5931 || (outer == SIGN_EXTEND && mode == QImode))
5937 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5939 return (code == CONST_INT
5940 && INTVAL (index) < range
5941 && INTVAL (index) > -range);
5944 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5945 index operand. i.e. 1, 2, 4 or 8. */
5947 thumb2_index_mul_operand (rtx op)
5951 if (GET_CODE(op) != CONST_INT)
5955 return (val == 1 || val == 2 || val == 4 || val == 8);
5958 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5960 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5962 enum rtx_code code = GET_CODE (index);
5964 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5965 /* Standard coprocessor addressing modes. */
5966 if (TARGET_HARD_FLOAT
5967 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5968 && (mode == SFmode || mode == DFmode
5969 || (TARGET_MAVERICK && mode == DImode)))
5970 return (code == CONST_INT && INTVAL (index) < 1024
5971 /* Thumb-2 allows only > -256 index range for it's core register
5972 load/stores. Since we allow SF/DF in core registers, we have
5973 to use the intersection between -256~4096 (core) and -1024~1024
5975 && INTVAL (index) > -256
5976 && (INTVAL (index) & 3) == 0);
5978 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5980 /* For DImode assume values will usually live in core regs
5981 and only allow LDRD addressing modes. */
5982 if (!TARGET_LDRD || mode != DImode)
5983 return (code == CONST_INT
5984 && INTVAL (index) < 1024
5985 && INTVAL (index) > -1024
5986 && (INTVAL (index) & 3) == 0);
5989 /* For quad modes, we restrict the constant offset to be slightly less
5990 than what the instruction format permits. We do this because for
5991 quad mode moves, we will actually decompose them into two separate
5992 double-mode reads or writes. INDEX must therefore be a valid
5993 (double-mode) offset and so should INDEX+8. */
5994 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5995 return (code == CONST_INT
5996 && INTVAL (index) < 1016
5997 && INTVAL (index) > -1024
5998 && (INTVAL (index) & 3) == 0);
6000 /* We have no such constraint on double mode offsets, so we permit the
6001 full range of the instruction format. */
6002 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6003 return (code == CONST_INT
6004 && INTVAL (index) < 1024
6005 && INTVAL (index) > -1024
6006 && (INTVAL (index) & 3) == 0);
6008 if (arm_address_register_rtx_p (index, strict_p)
6009 && (GET_MODE_SIZE (mode) <= 4))
6012 if (mode == DImode || mode == DFmode)
6014 if (code == CONST_INT)
6016 HOST_WIDE_INT val = INTVAL (index);
6017 /* ??? Can we assume ldrd for thumb2? */
6018 /* Thumb-2 ldrd only has reg+const addressing modes. */
6019 /* ldrd supports offsets of +-1020.
6020 However the ldr fallback does not. */
6021 return val > -256 && val < 256 && (val & 3) == 0;
6029 rtx xiop0 = XEXP (index, 0);
6030 rtx xiop1 = XEXP (index, 1);
6032 return ((arm_address_register_rtx_p (xiop0, strict_p)
6033 && thumb2_index_mul_operand (xiop1))
6034 || (arm_address_register_rtx_p (xiop1, strict_p)
6035 && thumb2_index_mul_operand (xiop0)));
6037 else if (code == ASHIFT)
6039 rtx op = XEXP (index, 1);
6041 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6042 && GET_CODE (op) == CONST_INT
6044 && INTVAL (op) <= 3);
6047 return (code == CONST_INT
6048 && INTVAL (index) < 4096
6049 && INTVAL (index) > -256);
6052 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6054 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6058 if (GET_CODE (x) != REG)
6064 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6066 return (regno <= LAST_LO_REGNUM
6067 || regno > LAST_VIRTUAL_REGISTER
6068 || regno == FRAME_POINTER_REGNUM
6069 || (GET_MODE_SIZE (mode) >= 4
6070 && (regno == STACK_POINTER_REGNUM
6071 || regno >= FIRST_PSEUDO_REGISTER
6072 || x == hard_frame_pointer_rtx
6073 || x == arg_pointer_rtx)));
6076 /* Return nonzero if x is a legitimate index register. This is the case
6077 for any base register that can access a QImode object. */
6079 thumb1_index_register_rtx_p (rtx x, int strict_p)
6081 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6084 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6086 The AP may be eliminated to either the SP or the FP, so we use the
6087 least common denominator, e.g. SImode, and offsets from 0 to 64.
6089 ??? Verify whether the above is the right approach.
6091 ??? Also, the FP may be eliminated to the SP, so perhaps that
6092 needs special handling also.
6094 ??? Look at how the mips16 port solves this problem. It probably uses
6095 better ways to solve some of these problems.
6097 Although it is not incorrect, we don't accept QImode and HImode
6098 addresses based on the frame pointer or arg pointer until the
6099 reload pass starts. This is so that eliminating such addresses
6100 into stack based ones won't produce impossible code. */
6102 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6104 /* ??? Not clear if this is right. Experiment. */
6105 if (GET_MODE_SIZE (mode) < 4
6106 && !(reload_in_progress || reload_completed)
6107 && (reg_mentioned_p (frame_pointer_rtx, x)
6108 || reg_mentioned_p (arg_pointer_rtx, x)
6109 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6110 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6111 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6112 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6115 /* Accept any base register. SP only in SImode or larger. */
6116 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6119 /* This is PC relative data before arm_reorg runs. */
6120 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6121 && GET_CODE (x) == SYMBOL_REF
6122 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6125 /* This is PC relative data after arm_reorg runs. */
6126 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6128 && (GET_CODE (x) == LABEL_REF
6129 || (GET_CODE (x) == CONST
6130 && GET_CODE (XEXP (x, 0)) == PLUS
6131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6132 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6135 /* Post-inc indexing only supported for SImode and larger. */
6136 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6137 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6140 else if (GET_CODE (x) == PLUS)
6142 /* REG+REG address can be any two index registers. */
6143 /* We disallow FRAME+REG addressing since we know that FRAME
6144 will be replaced with STACK, and SP relative addressing only
6145 permits SP+OFFSET. */
6146 if (GET_MODE_SIZE (mode) <= 4
6147 && XEXP (x, 0) != frame_pointer_rtx
6148 && XEXP (x, 1) != frame_pointer_rtx
6149 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6150 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6151 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6154 /* REG+const has 5-7 bit offset for non-SP registers. */
6155 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6156 || XEXP (x, 0) == arg_pointer_rtx)
6157 && GET_CODE (XEXP (x, 1)) == CONST_INT
6158 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6161 /* REG+const has 10-bit offset for SP, but only SImode and
6162 larger is supported. */
6163 /* ??? Should probably check for DI/DFmode overflow here
6164 just like GO_IF_LEGITIMATE_OFFSET does. */
6165 else if (GET_CODE (XEXP (x, 0)) == REG
6166 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6167 && GET_MODE_SIZE (mode) >= 4
6168 && GET_CODE (XEXP (x, 1)) == CONST_INT
6169 && INTVAL (XEXP (x, 1)) >= 0
6170 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6171 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6174 else if (GET_CODE (XEXP (x, 0)) == REG
6175 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6176 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6177 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6178 && REGNO (XEXP (x, 0))
6179 <= LAST_VIRTUAL_POINTER_REGISTER))
6180 && GET_MODE_SIZE (mode) >= 4
6181 && GET_CODE (XEXP (x, 1)) == CONST_INT
6182 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6186 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6187 && GET_MODE_SIZE (mode) == 4
6188 && GET_CODE (x) == SYMBOL_REF
6189 && CONSTANT_POOL_ADDRESS_P (x)
6191 && symbol_mentioned_p (get_pool_constant (x))
6192 && ! pcrel_constant_p (get_pool_constant (x))))
6198 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6199 instruction of mode MODE. */
6201 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6203 switch (GET_MODE_SIZE (mode))
6206 return val >= 0 && val < 32;
6209 return val >= 0 && val < 64 && (val & 1) == 0;
6213 && (val + GET_MODE_SIZE (mode)) <= 128
6219 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6222 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6223 else if (TARGET_THUMB2)
6224 return thumb2_legitimate_address_p (mode, x, strict_p);
6225 else /* if (TARGET_THUMB1) */
6226 return thumb1_legitimate_address_p (mode, x, strict_p);
6229 /* Build the SYMBOL_REF for __tls_get_addr. */
6231 static GTY(()) rtx tls_get_addr_libfunc;
6234 get_tls_get_addr (void)
6236 if (!tls_get_addr_libfunc)
6237 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6238 return tls_get_addr_libfunc;
6242 arm_load_tp (rtx target)
6245 target = gen_reg_rtx (SImode);
6249 /* Can return in any reg. */
6250 emit_insn (gen_load_tp_hard (target));
6254 /* Always returned in r0. Immediately copy the result into a pseudo,
6255 otherwise other uses of r0 (e.g. setting up function arguments) may
6256 clobber the value. */
6260 emit_insn (gen_load_tp_soft ());
6262 tmp = gen_rtx_REG (SImode, 0);
6263 emit_move_insn (target, tmp);
6269 load_tls_operand (rtx x, rtx reg)
6273 if (reg == NULL_RTX)
6274 reg = gen_reg_rtx (SImode);
6276 tmp = gen_rtx_CONST (SImode, x);
6278 emit_move_insn (reg, tmp);
6284 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6286 rtx insns, label, labelno, sum;
6288 gcc_assert (reloc != TLS_DESCSEQ);
6291 labelno = GEN_INT (pic_labelno++);
6292 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6293 label = gen_rtx_CONST (VOIDmode, label);
6295 sum = gen_rtx_UNSPEC (Pmode,
6296 gen_rtvec (4, x, GEN_INT (reloc), label,
6297 GEN_INT (TARGET_ARM ? 8 : 4)),
6299 reg = load_tls_operand (sum, reg);
6302 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6304 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6306 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6307 LCT_PURE, /* LCT_CONST? */
6308 Pmode, 1, reg, Pmode);
6310 insns = get_insns ();
6317 arm_tls_descseq_addr (rtx x, rtx reg)
6319 rtx labelno = GEN_INT (pic_labelno++);
6320 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6321 rtx sum = gen_rtx_UNSPEC (Pmode,
6322 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6323 gen_rtx_CONST (VOIDmode, label),
6324 GEN_INT (!TARGET_ARM)),
6326 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6328 emit_insn (gen_tlscall (x, labelno));
6330 reg = gen_reg_rtx (SImode);
6332 gcc_assert (REGNO (reg) != 0);
6334 emit_move_insn (reg, reg0);
6340 legitimize_tls_address (rtx x, rtx reg)
6342 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6343 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6347 case TLS_MODEL_GLOBAL_DYNAMIC:
6348 if (TARGET_GNU2_TLS)
6350 reg = arm_tls_descseq_addr (x, reg);
6352 tp = arm_load_tp (NULL_RTX);
6354 dest = gen_rtx_PLUS (Pmode, tp, reg);
6358 /* Original scheme */
6359 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6360 dest = gen_reg_rtx (Pmode);
6361 emit_libcall_block (insns, dest, ret, x);
6365 case TLS_MODEL_LOCAL_DYNAMIC:
6366 if (TARGET_GNU2_TLS)
6368 reg = arm_tls_descseq_addr (x, reg);
6370 tp = arm_load_tp (NULL_RTX);
6372 dest = gen_rtx_PLUS (Pmode, tp, reg);
6376 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6378 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6379 share the LDM result with other LD model accesses. */
6380 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6382 dest = gen_reg_rtx (Pmode);
6383 emit_libcall_block (insns, dest, ret, eqv);
6385 /* Load the addend. */
6386 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6387 GEN_INT (TLS_LDO32)),
6389 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6390 dest = gen_rtx_PLUS (Pmode, dest, addend);
6394 case TLS_MODEL_INITIAL_EXEC:
6395 labelno = GEN_INT (pic_labelno++);
6396 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6397 label = gen_rtx_CONST (VOIDmode, label);
6398 sum = gen_rtx_UNSPEC (Pmode,
6399 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6400 GEN_INT (TARGET_ARM ? 8 : 4)),
6402 reg = load_tls_operand (sum, reg);
6405 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6406 else if (TARGET_THUMB2)
6407 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6410 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6411 emit_move_insn (reg, gen_const_mem (SImode, reg));
6414 tp = arm_load_tp (NULL_RTX);
6416 return gen_rtx_PLUS (Pmode, tp, reg);
6418 case TLS_MODEL_LOCAL_EXEC:
6419 tp = arm_load_tp (NULL_RTX);
6421 reg = gen_rtx_UNSPEC (Pmode,
6422 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6424 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6426 return gen_rtx_PLUS (Pmode, tp, reg);
6433 /* Try machine-dependent ways of modifying an illegitimate address
6434 to be legitimate. If we find one, return the new, valid address. */
6436 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6440 /* TODO: legitimize_address for Thumb2. */
6443 return thumb_legitimize_address (x, orig_x, mode);
6446 if (arm_tls_symbol_p (x))
6447 return legitimize_tls_address (x, NULL_RTX);
6449 if (GET_CODE (x) == PLUS)
6451 rtx xop0 = XEXP (x, 0);
6452 rtx xop1 = XEXP (x, 1);
6454 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6455 xop0 = force_reg (SImode, xop0);
6457 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6458 xop1 = force_reg (SImode, xop1);
6460 if (ARM_BASE_REGISTER_RTX_P (xop0)
6461 && GET_CODE (xop1) == CONST_INT)
6463 HOST_WIDE_INT n, low_n;
6467 /* VFP addressing modes actually allow greater offsets, but for
6468 now we just stick with the lowest common denominator. */
6470 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6482 low_n = ((mode) == TImode ? 0
6483 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6487 base_reg = gen_reg_rtx (SImode);
6488 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6489 emit_move_insn (base_reg, val);
6490 x = plus_constant (base_reg, low_n);
6492 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6493 x = gen_rtx_PLUS (SImode, xop0, xop1);
6496 /* XXX We don't allow MINUS any more -- see comment in
6497 arm_legitimate_address_outer_p (). */
6498 else if (GET_CODE (x) == MINUS)
6500 rtx xop0 = XEXP (x, 0);
6501 rtx xop1 = XEXP (x, 1);
6503 if (CONSTANT_P (xop0))
6504 xop0 = force_reg (SImode, xop0);
6506 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6507 xop1 = force_reg (SImode, xop1);
6509 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6510 x = gen_rtx_MINUS (SImode, xop0, xop1);
6513 /* Make sure to take full advantage of the pre-indexed addressing mode
6514 with absolute addresses which often allows for the base register to
6515 be factorized for multiple adjacent memory references, and it might
6516 even allows for the mini pool to be avoided entirely. */
6517 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6520 HOST_WIDE_INT mask, base, index;
6523 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6524 use a 8-bit index. So let's use a 12-bit index for SImode only and
6525 hope that arm_gen_constant will enable ldrb to use more bits. */
6526 bits = (mode == SImode) ? 12 : 8;
6527 mask = (1 << bits) - 1;
6528 base = INTVAL (x) & ~mask;
6529 index = INTVAL (x) & mask;
6530 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6532 /* It'll most probably be more efficient to generate the base
6533 with more bits set and use a negative index instead. */
6537 base_reg = force_reg (SImode, GEN_INT (base));
6538 x = plus_constant (base_reg, index);
6543 /* We need to find and carefully transform any SYMBOL and LABEL
6544 references; so go back to the original address expression. */
6545 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6547 if (new_x != orig_x)
6555 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6556 to be legitimate. If we find one, return the new, valid address. */
6558 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6560 if (arm_tls_symbol_p (x))
6561 return legitimize_tls_address (x, NULL_RTX);
6563 if (GET_CODE (x) == PLUS
6564 && GET_CODE (XEXP (x, 1)) == CONST_INT
6565 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6566 || INTVAL (XEXP (x, 1)) < 0))
6568 rtx xop0 = XEXP (x, 0);
6569 rtx xop1 = XEXP (x, 1);
6570 HOST_WIDE_INT offset = INTVAL (xop1);
6572 /* Try and fold the offset into a biasing of the base register and
6573 then offsetting that. Don't do this when optimizing for space
6574 since it can cause too many CSEs. */
6575 if (optimize_size && offset >= 0
6576 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6578 HOST_WIDE_INT delta;
6581 delta = offset - (256 - GET_MODE_SIZE (mode));
6582 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6583 delta = 31 * GET_MODE_SIZE (mode);
6585 delta = offset & (~31 * GET_MODE_SIZE (mode));
6587 xop0 = force_operand (plus_constant (xop0, offset - delta),
6589 x = plus_constant (xop0, delta);
6591 else if (offset < 0 && offset > -256)
6592 /* Small negative offsets are best done with a subtract before the
6593 dereference, forcing these into a register normally takes two
6595 x = force_operand (x, NULL_RTX);
6598 /* For the remaining cases, force the constant into a register. */
6599 xop1 = force_reg (SImode, xop1);
6600 x = gen_rtx_PLUS (SImode, xop0, xop1);
6603 else if (GET_CODE (x) == PLUS
6604 && s_register_operand (XEXP (x, 1), SImode)
6605 && !s_register_operand (XEXP (x, 0), SImode))
6607 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6609 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6614 /* We need to find and carefully transform any SYMBOL and LABEL
6615 references; so go back to the original address expression. */
6616 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6618 if (new_x != orig_x)
6626 arm_legitimize_reload_address (rtx *p,
6627 enum machine_mode mode,
6628 int opnum, int type,
6629 int ind_levels ATTRIBUTE_UNUSED)
6631 /* We must recognize output that we have already generated ourselves. */
6632 if (GET_CODE (*p) == PLUS
6633 && GET_CODE (XEXP (*p, 0)) == PLUS
6634 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6635 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6636 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6638 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6639 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6640 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6644 if (GET_CODE (*p) == PLUS
6645 && GET_CODE (XEXP (*p, 0)) == REG
6646 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6647 /* If the base register is equivalent to a constant, let the generic
6648 code handle it. Otherwise we will run into problems if a future
6649 reload pass decides to rematerialize the constant. */
6650 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6651 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6653 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6654 HOST_WIDE_INT low, high;
6656 /* Detect coprocessor load/stores. */
6657 bool coproc_p = ((TARGET_HARD_FLOAT
6658 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6659 && (mode == SFmode || mode == DFmode
6660 || (mode == DImode && TARGET_MAVERICK)))
6661 || (TARGET_REALLY_IWMMXT
6662 && VALID_IWMMXT_REG_MODE (mode))
6664 && (VALID_NEON_DREG_MODE (mode)
6665 || VALID_NEON_QREG_MODE (mode))));
6667 /* For some conditions, bail out when lower two bits are unaligned. */
6668 if ((val & 0x3) != 0
6669 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6671 /* For DI, and DF under soft-float: */
6672 || ((mode == DImode || mode == DFmode)
6673 /* Without ldrd, we use stm/ldm, which does not
6674 fair well with unaligned bits. */
6676 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6677 || TARGET_THUMB2))))
6680 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6681 of which the (reg+high) gets turned into a reload add insn,
6682 we try to decompose the index into high/low values that can often
6683 also lead to better reload CSE.
6685 ldr r0, [r2, #4100] // Offset too large
6686 ldr r1, [r2, #4104] // Offset too large
6688 is best reloaded as:
6694 which post-reload CSE can simplify in most cases to eliminate the
6695 second add instruction:
6700 The idea here is that we want to split out the bits of the constant
6701 as a mask, rather than as subtracting the maximum offset that the
6702 respective type of load/store used can handle.
6704 When encountering negative offsets, we can still utilize it even if
6705 the overall offset is positive; sometimes this may lead to an immediate
6706 that can be constructed with fewer instructions.
6708 ldr r0, [r2, #0x3FFFFC]
6710 This is best reloaded as:
6711 add t1, r2, #0x400000
6714 The trick for spotting this for a load insn with N bits of offset
6715 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6716 negative offset that is going to make bit N and all the bits below
6717 it become zero in the remainder part.
6719 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6720 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6721 used in most cases of ARM load/store instructions. */
6723 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6724 (((VAL) & ((1 << (N)) - 1)) \
6725 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6730 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6732 /* NEON quad-word load/stores are made of two double-word accesses,
6733 so the valid index range is reduced by 8. Treat as 9-bit range if
6735 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6736 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6738 else if (GET_MODE_SIZE (mode) == 8)
6741 low = (TARGET_THUMB2
6742 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6743 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6745 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6746 to access doublewords. The supported load/store offsets are
6747 -8, -4, and 4, which we try to produce here. */
6748 low = ((val & 0xf) ^ 0x8) - 0x8;
6750 else if (GET_MODE_SIZE (mode) < 8)
6752 /* NEON element load/stores do not have an offset. */
6753 if (TARGET_NEON_FP16 && mode == HFmode)
6758 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6759 Try the wider 12-bit range first, and re-try if the result
6761 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6763 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6767 if (mode == HImode || mode == HFmode)
6770 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6773 /* The storehi/movhi_bytes fallbacks can use only
6774 [-4094,+4094] of the full ldrb/strb index range. */
6775 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6776 if (low == 4095 || low == -4095)
6781 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6787 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6788 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6789 - (unsigned HOST_WIDE_INT) 0x80000000);
6790 /* Check for overflow or zero */
6791 if (low == 0 || high == 0 || (high + low != val))
6794 /* Reload the high part into a base reg; leave the low part
6796 *p = gen_rtx_PLUS (GET_MODE (*p),
6797 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6800 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6801 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6802 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6810 thumb_legitimize_reload_address (rtx *x_p,
6811 enum machine_mode mode,
6812 int opnum, int type,
6813 int ind_levels ATTRIBUTE_UNUSED)
6817 if (GET_CODE (x) == PLUS
6818 && GET_MODE_SIZE (mode) < 4
6819 && REG_P (XEXP (x, 0))
6820 && XEXP (x, 0) == stack_pointer_rtx
6821 && GET_CODE (XEXP (x, 1)) == CONST_INT
6822 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6827 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6828 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6832 /* If both registers are hi-regs, then it's better to reload the
6833 entire expression rather than each register individually. That
6834 only requires one reload register rather than two. */
6835 if (GET_CODE (x) == PLUS
6836 && REG_P (XEXP (x, 0))
6837 && REG_P (XEXP (x, 1))
6838 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6839 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6844 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6845 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6852 /* Test for various thread-local symbols. */
6854 /* Return TRUE if X is a thread-local symbol. */
6857 arm_tls_symbol_p (rtx x)
6859 if (! TARGET_HAVE_TLS)
6862 if (GET_CODE (x) != SYMBOL_REF)
6865 return SYMBOL_REF_TLS_MODEL (x) != 0;
6868 /* Helper for arm_tls_referenced_p. */
6871 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6873 if (GET_CODE (*x) == SYMBOL_REF)
6874 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6876 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6877 TLS offsets, not real symbol references. */
6878 if (GET_CODE (*x) == UNSPEC
6879 && XINT (*x, 1) == UNSPEC_TLS)
6885 /* Return TRUE if X contains any TLS symbol references. */
6888 arm_tls_referenced_p (rtx x)
6890 if (! TARGET_HAVE_TLS)
6893 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6896 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6898 On the ARM, allow any integer (invalid ones are removed later by insn
6899 patterns), nice doubles and symbol_refs which refer to the function's
6902 When generating pic allow anything. */
6905 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6907 /* At present, we have no support for Neon structure constants, so forbid
6908 them here. It might be possible to handle simple cases like 0 and -1
6910 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6913 return flag_pic || !label_mentioned_p (x);
6917 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6919 return (GET_CODE (x) == CONST_INT
6920 || GET_CODE (x) == CONST_DOUBLE
6921 || CONSTANT_ADDRESS_P (x)
6926 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6928 return (!arm_cannot_force_const_mem (mode, x)
6930 ? arm_legitimate_constant_p_1 (mode, x)
6931 : thumb_legitimate_constant_p (mode, x)));
6934 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6937 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6941 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6943 split_const (x, &base, &offset);
6944 if (GET_CODE (base) == SYMBOL_REF
6945 && !offset_within_block_p (base, INTVAL (offset)))
6948 return arm_tls_referenced_p (x);
6951 #define REG_OR_SUBREG_REG(X) \
6952 (GET_CODE (X) == REG \
6953 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6955 #define REG_OR_SUBREG_RTX(X) \
6956 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6959 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6961 enum machine_mode mode = GET_MODE (x);
6975 return COSTS_N_INSNS (1);
6978 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6981 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6988 return COSTS_N_INSNS (2) + cycles;
6990 return COSTS_N_INSNS (1) + 16;
6993 return (COSTS_N_INSNS (1)
6994 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6995 + GET_CODE (SET_DEST (x)) == MEM));
7000 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7002 if (thumb_shiftable_const (INTVAL (x)))
7003 return COSTS_N_INSNS (2);
7004 return COSTS_N_INSNS (3);
7006 else if ((outer == PLUS || outer == COMPARE)
7007 && INTVAL (x) < 256 && INTVAL (x) > -256)
7009 else if ((outer == IOR || outer == XOR || outer == AND)
7010 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7011 return COSTS_N_INSNS (1);
7012 else if (outer == AND)
7015 /* This duplicates the tests in the andsi3 expander. */
7016 for (i = 9; i <= 31; i++)
7017 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7018 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7019 return COSTS_N_INSNS (2);
7021 else if (outer == ASHIFT || outer == ASHIFTRT
7022 || outer == LSHIFTRT)
7024 return COSTS_N_INSNS (2);
7030 return COSTS_N_INSNS (3);
7048 /* XXX another guess. */
7049 /* Memory costs quite a lot for the first word, but subsequent words
7050 load at the equivalent of a single insn each. */
7051 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7052 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7057 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7063 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7064 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7070 return total + COSTS_N_INSNS (1);
7072 /* Assume a two-shift sequence. Increase the cost slightly so
7073 we prefer actual shifts over an extend operation. */
7074 return total + 1 + COSTS_N_INSNS (2);
7082 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7084 enum machine_mode mode = GET_MODE (x);
7085 enum rtx_code subcode;
7087 enum rtx_code code = GET_CODE (x);
7093 /* Memory costs quite a lot for the first word, but subsequent words
7094 load at the equivalent of a single insn each. */
7095 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7102 if (TARGET_HARD_FLOAT && mode == SFmode)
7103 *total = COSTS_N_INSNS (2);
7104 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7105 *total = COSTS_N_INSNS (4);
7107 *total = COSTS_N_INSNS (20);
7111 if (GET_CODE (XEXP (x, 1)) == REG)
7112 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7113 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7114 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7120 *total += COSTS_N_INSNS (4);
7125 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7126 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7129 *total += COSTS_N_INSNS (3);
7133 *total += COSTS_N_INSNS (1);
7134 /* Increase the cost of complex shifts because they aren't any faster,
7135 and reduce dual issue opportunities. */
7136 if (arm_tune_cortex_a9
7137 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7145 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7146 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7147 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7149 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7153 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7154 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7156 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7163 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7165 if (TARGET_HARD_FLOAT
7167 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7169 *total = COSTS_N_INSNS (1);
7170 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7171 && arm_const_double_rtx (XEXP (x, 0)))
7173 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7177 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7178 && arm_const_double_rtx (XEXP (x, 1)))
7180 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7186 *total = COSTS_N_INSNS (20);
7190 *total = COSTS_N_INSNS (1);
7191 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7192 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7194 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7198 subcode = GET_CODE (XEXP (x, 1));
7199 if (subcode == ASHIFT || subcode == ASHIFTRT
7200 || subcode == LSHIFTRT
7201 || subcode == ROTATE || subcode == ROTATERT)
7203 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7204 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7208 /* A shift as a part of RSB costs no more than RSB itself. */
7209 if (GET_CODE (XEXP (x, 0)) == MULT
7210 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7212 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7213 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7218 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7220 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7221 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7225 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7226 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7228 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7229 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7230 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7231 *total += COSTS_N_INSNS (1);
7239 if (code == PLUS && arm_arch6 && mode == SImode
7240 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7241 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7243 *total = COSTS_N_INSNS (1);
7244 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7246 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7250 /* MLA: All arguments must be registers. We filter out
7251 multiplication by a power of two, so that we fall down into
7253 if (GET_CODE (XEXP (x, 0)) == MULT
7254 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7256 /* The cost comes from the cost of the multiply. */
7260 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7262 if (TARGET_HARD_FLOAT
7264 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7266 *total = COSTS_N_INSNS (1);
7267 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7268 && arm_const_double_rtx (XEXP (x, 1)))
7270 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7277 *total = COSTS_N_INSNS (20);
7281 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7282 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7284 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7285 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7286 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7287 *total += COSTS_N_INSNS (1);
7293 case AND: case XOR: case IOR:
7295 /* Normally the frame registers will be spilt into reg+const during
7296 reload, so it is a bad idea to combine them with other instructions,
7297 since then they might not be moved outside of loops. As a compromise
7298 we allow integration with ops that have a constant as their second
7300 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7301 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7302 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7303 *total = COSTS_N_INSNS (1);
7307 *total += COSTS_N_INSNS (2);
7308 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7309 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7311 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7318 *total += COSTS_N_INSNS (1);
7319 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7320 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7322 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7325 subcode = GET_CODE (XEXP (x, 0));
7326 if (subcode == ASHIFT || subcode == ASHIFTRT
7327 || subcode == LSHIFTRT
7328 || subcode == ROTATE || subcode == ROTATERT)
7330 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7331 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7336 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7338 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7339 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7343 if (subcode == UMIN || subcode == UMAX
7344 || subcode == SMIN || subcode == SMAX)
7346 *total = COSTS_N_INSNS (3);
7353 /* This should have been handled by the CPU specific routines. */
7357 if (arm_arch3m && mode == SImode
7358 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7360 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7361 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7362 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7363 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7365 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7368 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7372 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7374 if (TARGET_HARD_FLOAT
7376 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7378 *total = COSTS_N_INSNS (1);
7381 *total = COSTS_N_INSNS (2);
7387 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7388 if (mode == SImode && code == NOT)
7390 subcode = GET_CODE (XEXP (x, 0));
7391 if (subcode == ASHIFT || subcode == ASHIFTRT
7392 || subcode == LSHIFTRT
7393 || subcode == ROTATE || subcode == ROTATERT
7395 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7397 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7398 /* Register shifts cost an extra cycle. */
7399 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7400 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7409 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7411 *total = COSTS_N_INSNS (4);
7415 operand = XEXP (x, 0);
7417 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7418 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7419 && GET_CODE (XEXP (operand, 0)) == REG
7420 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7421 *total += COSTS_N_INSNS (1);
7422 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7423 + rtx_cost (XEXP (x, 2), code, 2, speed));
7427 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7429 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7435 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7436 && mode == SImode && XEXP (x, 1) == const0_rtx)
7438 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7444 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7445 && mode == SImode && XEXP (x, 1) == const0_rtx)
7447 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7467 /* SCC insns. In the case where the comparison has already been
7468 performed, then they cost 2 instructions. Otherwise they need
7469 an additional comparison before them. */
7470 *total = COSTS_N_INSNS (2);
7471 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7478 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7484 *total += COSTS_N_INSNS (1);
7485 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7486 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7488 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7492 subcode = GET_CODE (XEXP (x, 0));
7493 if (subcode == ASHIFT || subcode == ASHIFTRT
7494 || subcode == LSHIFTRT
7495 || subcode == ROTATE || subcode == ROTATERT)
7497 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7498 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7503 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7505 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7506 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7516 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7517 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7518 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7519 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7523 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7525 if (TARGET_HARD_FLOAT
7527 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7529 *total = COSTS_N_INSNS (1);
7532 *total = COSTS_N_INSNS (20);
7535 *total = COSTS_N_INSNS (1);
7537 *total += COSTS_N_INSNS (3);
7543 if (GET_MODE_CLASS (mode) == MODE_INT)
7545 rtx op = XEXP (x, 0);
7546 enum machine_mode opmode = GET_MODE (op);
7549 *total += COSTS_N_INSNS (1);
7551 if (opmode != SImode)
7555 /* If !arm_arch4, we use one of the extendhisi2_mem
7556 or movhi_bytes patterns for HImode. For a QImode
7557 sign extension, we first zero-extend from memory
7558 and then perform a shift sequence. */
7559 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7560 *total += COSTS_N_INSNS (2);
7563 *total += COSTS_N_INSNS (1);
7565 /* We don't have the necessary insn, so we need to perform some
7567 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7568 /* An and with constant 255. */
7569 *total += COSTS_N_INSNS (1);
7571 /* A shift sequence. Increase costs slightly to avoid
7572 combining two shifts into an extend operation. */
7573 *total += COSTS_N_INSNS (2) + 1;
7579 switch (GET_MODE (XEXP (x, 0)))
7586 *total = COSTS_N_INSNS (1);
7596 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7600 if (const_ok_for_arm (INTVAL (x))
7601 || const_ok_for_arm (~INTVAL (x)))
7602 *total = COSTS_N_INSNS (1);
7604 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7605 INTVAL (x), NULL_RTX,
7612 *total = COSTS_N_INSNS (3);
7616 *total = COSTS_N_INSNS (1);
7620 *total = COSTS_N_INSNS (1);
7621 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7625 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7626 && (mode == SFmode || !TARGET_VFP_SINGLE))
7627 *total = COSTS_N_INSNS (1);
7629 *total = COSTS_N_INSNS (4);
7636 /* We cost this as high as our memory costs to allow this to
7637 be hoisted from loops. */
7638 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7640 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7645 *total = COSTS_N_INSNS (4);
7650 /* Estimates the size cost of thumb1 instructions.
7651 For now most of the code is copied from thumb1_rtx_costs. We need more
7652 fine grain tuning when we have more related test cases. */
7654 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7656 enum machine_mode mode = GET_MODE (x);
7669 return COSTS_N_INSNS (1);
7672 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7674 /* Thumb1 mul instruction can't operate on const. We must Load it
7675 into a register first. */
7676 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7677 return COSTS_N_INSNS (1) + const_size;
7679 return COSTS_N_INSNS (1);
7682 return (COSTS_N_INSNS (1)
7683 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7684 + GET_CODE (SET_DEST (x)) == MEM));
7689 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7690 return COSTS_N_INSNS (1);
7691 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7692 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7693 return COSTS_N_INSNS (2);
7694 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7695 if (thumb_shiftable_const (INTVAL (x)))
7696 return COSTS_N_INSNS (2);
7697 return COSTS_N_INSNS (3);
7699 else if ((outer == PLUS || outer == COMPARE)
7700 && INTVAL (x) < 256 && INTVAL (x) > -256)
7702 else if ((outer == IOR || outer == XOR || outer == AND)
7703 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7704 return COSTS_N_INSNS (1);
7705 else if (outer == AND)
7708 /* This duplicates the tests in the andsi3 expander. */
7709 for (i = 9; i <= 31; i++)
7710 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7711 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7712 return COSTS_N_INSNS (2);
7714 else if (outer == ASHIFT || outer == ASHIFTRT
7715 || outer == LSHIFTRT)
7717 return COSTS_N_INSNS (2);
7723 return COSTS_N_INSNS (3);
7741 /* XXX another guess. */
7742 /* Memory costs quite a lot for the first word, but subsequent words
7743 load at the equivalent of a single insn each. */
7744 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7745 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7750 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7755 /* XXX still guessing. */
7756 switch (GET_MODE (XEXP (x, 0)))
7759 return (1 + (mode == DImode ? 4 : 0)
7760 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7763 return (4 + (mode == DImode ? 4 : 0)
7764 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7767 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7778 /* RTX costs when optimizing for size. */
7780 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7783 enum machine_mode mode = GET_MODE (x);
7786 *total = thumb1_size_rtx_costs (x, code, outer_code);
7790 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7794 /* A memory access costs 1 insn if the mode is small, or the address is
7795 a single register, otherwise it costs one insn per word. */
7796 if (REG_P (XEXP (x, 0)))
7797 *total = COSTS_N_INSNS (1);
7799 && GET_CODE (XEXP (x, 0)) == PLUS
7800 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7801 /* This will be split into two instructions.
7802 See arm.md:calculate_pic_address. */
7803 *total = COSTS_N_INSNS (2);
7805 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7812 /* Needs a libcall, so it costs about this. */
7813 *total = COSTS_N_INSNS (2);
7817 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7819 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7827 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7829 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7832 else if (mode == SImode)
7834 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7835 /* Slightly disparage register shifts, but not by much. */
7836 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7837 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7841 /* Needs a libcall. */
7842 *total = COSTS_N_INSNS (2);
7846 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7847 && (mode == SFmode || !TARGET_VFP_SINGLE))
7849 *total = COSTS_N_INSNS (1);
7855 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7856 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7858 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7859 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7860 || subcode1 == ROTATE || subcode1 == ROTATERT
7861 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7862 || subcode1 == ASHIFTRT)
7864 /* It's just the cost of the two operands. */
7869 *total = COSTS_N_INSNS (1);
7873 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7877 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7878 && (mode == SFmode || !TARGET_VFP_SINGLE))
7880 *total = COSTS_N_INSNS (1);
7884 /* A shift as a part of ADD costs nothing. */
7885 if (GET_CODE (XEXP (x, 0)) == MULT
7886 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7888 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7889 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7890 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7895 case AND: case XOR: case IOR:
7898 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7900 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7901 || subcode == LSHIFTRT || subcode == ASHIFTRT
7902 || (code == AND && subcode == NOT))
7904 /* It's just the cost of the two operands. */
7910 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7914 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7918 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7919 && (mode == SFmode || !TARGET_VFP_SINGLE))
7921 *total = COSTS_N_INSNS (1);
7927 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7936 if (cc_register (XEXP (x, 0), VOIDmode))
7939 *total = COSTS_N_INSNS (1);
7943 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7944 && (mode == SFmode || !TARGET_VFP_SINGLE))
7945 *total = COSTS_N_INSNS (1);
7947 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7952 return arm_rtx_costs_1 (x, outer_code, total, 0);
7955 if (const_ok_for_arm (INTVAL (x)))
7956 /* A multiplication by a constant requires another instruction
7957 to load the constant to a register. */
7958 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7960 else if (const_ok_for_arm (~INTVAL (x)))
7961 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7962 else if (const_ok_for_arm (-INTVAL (x)))
7964 if (outer_code == COMPARE || outer_code == PLUS
7965 || outer_code == MINUS)
7968 *total = COSTS_N_INSNS (1);
7971 *total = COSTS_N_INSNS (2);
7977 *total = COSTS_N_INSNS (2);
7981 *total = COSTS_N_INSNS (4);
7986 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7987 cost of these slightly. */
7988 *total = COSTS_N_INSNS (1) + 1;
7995 if (mode != VOIDmode)
7996 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7998 *total = COSTS_N_INSNS (4); /* How knows? */
8003 /* RTX costs when optimizing for size. */
8005 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8006 int *total, bool speed)
8009 return arm_size_rtx_costs (x, (enum rtx_code) code,
8010 (enum rtx_code) outer_code, total);
8012 return current_tune->rtx_costs (x, (enum rtx_code) code,
8013 (enum rtx_code) outer_code,
8017 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8018 supported on any "slowmul" cores, so it can be ignored. */
8021 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8022 int *total, bool speed)
8024 enum machine_mode mode = GET_MODE (x);
8028 *total = thumb1_rtx_costs (x, code, outer_code);
8035 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8038 *total = COSTS_N_INSNS (20);
8042 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8044 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8045 & (unsigned HOST_WIDE_INT) 0xffffffff);
8046 int cost, const_ok = const_ok_for_arm (i);
8047 int j, booth_unit_size;
8049 /* Tune as appropriate. */
8050 cost = const_ok ? 4 : 8;
8051 booth_unit_size = 2;
8052 for (j = 0; i && j < 32; j += booth_unit_size)
8054 i >>= booth_unit_size;
8058 *total = COSTS_N_INSNS (cost);
8059 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8063 *total = COSTS_N_INSNS (20);
8067 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8072 /* RTX cost for cores with a fast multiply unit (M variants). */
8075 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8076 int *total, bool speed)
8078 enum machine_mode mode = GET_MODE (x);
8082 *total = thumb1_rtx_costs (x, code, outer_code);
8086 /* ??? should thumb2 use different costs? */
8090 /* There is no point basing this on the tuning, since it is always the
8091 fast variant if it exists at all. */
8093 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8094 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8095 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8097 *total = COSTS_N_INSNS(2);
8104 *total = COSTS_N_INSNS (5);
8108 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8110 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8111 & (unsigned HOST_WIDE_INT) 0xffffffff);
8112 int cost, const_ok = const_ok_for_arm (i);
8113 int j, booth_unit_size;
8115 /* Tune as appropriate. */
8116 cost = const_ok ? 4 : 8;
8117 booth_unit_size = 8;
8118 for (j = 0; i && j < 32; j += booth_unit_size)
8120 i >>= booth_unit_size;
8124 *total = COSTS_N_INSNS(cost);
8130 *total = COSTS_N_INSNS (4);
8134 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8136 if (TARGET_HARD_FLOAT
8138 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8140 *total = COSTS_N_INSNS (1);
8145 /* Requires a lib call */
8146 *total = COSTS_N_INSNS (20);
8150 return arm_rtx_costs_1 (x, outer_code, total, speed);
8155 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8156 so it can be ignored. */
8159 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8160 int *total, bool speed)
8162 enum machine_mode mode = GET_MODE (x);
8166 *total = thumb1_rtx_costs (x, code, outer_code);
8173 if (GET_CODE (XEXP (x, 0)) != MULT)
8174 return arm_rtx_costs_1 (x, outer_code, total, speed);
8176 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8177 will stall until the multiplication is complete. */
8178 *total = COSTS_N_INSNS (3);
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8185 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8186 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8189 *total = COSTS_N_INSNS (2);
8196 *total = COSTS_N_INSNS (5);
8200 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8202 /* If operand 1 is a constant we can more accurately
8203 calculate the cost of the multiply. The multiplier can
8204 retire 15 bits on the first cycle and a further 12 on the
8205 second. We do, of course, have to load the constant into
8206 a register first. */
8207 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8208 /* There's a general overhead of one cycle. */
8210 unsigned HOST_WIDE_INT masked_const;
8215 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8217 masked_const = i & 0xffff8000;
8218 if (masked_const != 0)
8221 masked_const = i & 0xf8000000;
8222 if (masked_const != 0)
8225 *total = COSTS_N_INSNS (cost);
8231 *total = COSTS_N_INSNS (3);
8235 /* Requires a lib call */
8236 *total = COSTS_N_INSNS (20);
8240 return arm_rtx_costs_1 (x, outer_code, total, speed);
8245 /* RTX costs for 9e (and later) cores. */
8248 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8249 int *total, bool speed)
8251 enum machine_mode mode = GET_MODE (x);
8258 *total = COSTS_N_INSNS (3);
8262 *total = thumb1_rtx_costs (x, code, outer_code);
8270 /* There is no point basing this on the tuning, since it is always the
8271 fast variant if it exists at all. */
8273 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8274 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8275 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8277 *total = COSTS_N_INSNS (2);
8284 *total = COSTS_N_INSNS (5);
8290 *total = COSTS_N_INSNS (2);
8294 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8296 if (TARGET_HARD_FLOAT
8298 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8300 *total = COSTS_N_INSNS (1);
8305 *total = COSTS_N_INSNS (20);
8309 return arm_rtx_costs_1 (x, outer_code, total, speed);
8312 /* All address computations that can be done are free, but rtx cost returns
8313 the same for practically all of them. So we weight the different types
8314 of address here in the order (most pref first):
8315 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8317 arm_arm_address_cost (rtx x)
8319 enum rtx_code c = GET_CODE (x);
8321 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8323 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8328 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8331 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8341 arm_thumb_address_cost (rtx x)
8343 enum rtx_code c = GET_CODE (x);
8348 && GET_CODE (XEXP (x, 0)) == REG
8349 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8356 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8358 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8361 /* Adjust cost hook for XScale. */
8363 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8365 /* Some true dependencies can have a higher cost depending
8366 on precisely how certain input operands are used. */
8367 if (REG_NOTE_KIND(link) == 0
8368 && recog_memoized (insn) >= 0
8369 && recog_memoized (dep) >= 0)
8371 int shift_opnum = get_attr_shift (insn);
8372 enum attr_type attr_type = get_attr_type (dep);
8374 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8375 operand for INSN. If we have a shifted input operand and the
8376 instruction we depend on is another ALU instruction, then we may
8377 have to account for an additional stall. */
8378 if (shift_opnum != 0
8379 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8381 rtx shifted_operand;
8384 /* Get the shifted operand. */
8385 extract_insn (insn);
8386 shifted_operand = recog_data.operand[shift_opnum];
8388 /* Iterate over all the operands in DEP. If we write an operand
8389 that overlaps with SHIFTED_OPERAND, then we have increase the
8390 cost of this dependency. */
8392 preprocess_constraints ();
8393 for (opno = 0; opno < recog_data.n_operands; opno++)
8395 /* We can ignore strict inputs. */
8396 if (recog_data.operand_type[opno] == OP_IN)
8399 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8411 /* Adjust cost hook for Cortex A9. */
8413 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8415 switch (REG_NOTE_KIND (link))
8422 case REG_DEP_OUTPUT:
8423 if (recog_memoized (insn) >= 0
8424 && recog_memoized (dep) >= 0)
8426 if (GET_CODE (PATTERN (insn)) == SET)
8429 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8431 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8433 enum attr_type attr_type_insn = get_attr_type (insn);
8434 enum attr_type attr_type_dep = get_attr_type (dep);
8436 /* By default all dependencies of the form
8439 have an extra latency of 1 cycle because
8440 of the input and output dependency in this
8441 case. However this gets modeled as an true
8442 dependency and hence all these checks. */
8443 if (REG_P (SET_DEST (PATTERN (insn)))
8444 && REG_P (SET_DEST (PATTERN (dep)))
8445 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8446 SET_DEST (PATTERN (dep))))
8448 /* FMACS is a special case where the dependant
8449 instruction can be issued 3 cycles before
8450 the normal latency in case of an output
8452 if ((attr_type_insn == TYPE_FMACS
8453 || attr_type_insn == TYPE_FMACD)
8454 && (attr_type_dep == TYPE_FMACS
8455 || attr_type_dep == TYPE_FMACD))
8457 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8458 *cost = insn_default_latency (dep) - 3;
8460 *cost = insn_default_latency (dep);
8465 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8466 *cost = insn_default_latency (dep) + 1;
8468 *cost = insn_default_latency (dep);
8484 /* Adjust cost hook for FA726TE. */
8486 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8488 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8489 have penalty of 3. */
8490 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8491 && recog_memoized (insn) >= 0
8492 && recog_memoized (dep) >= 0
8493 && get_attr_conds (dep) == CONDS_SET)
8495 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8496 if (get_attr_conds (insn) == CONDS_USE
8497 && get_attr_type (insn) != TYPE_BRANCH)
8503 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8504 || get_attr_conds (insn) == CONDS_USE)
8514 /* Implement TARGET_REGISTER_MOVE_COST.
8516 Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8517 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8518 it is typically more expensive than a single memory access. We set
8519 the cost to less than two memory accesses so that floating
8520 point to integer conversion does not go through memory. */
8523 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8524 reg_class_t from, reg_class_t to)
8528 if ((from == FPA_REGS && to != FPA_REGS)
8529 || (from != FPA_REGS && to == FPA_REGS))
8531 else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8532 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8534 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8535 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8537 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8539 else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8540 || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8547 if (from == HI_REGS || to == HI_REGS)
8554 /* Implement TARGET_MEMORY_MOVE_COST. */
8557 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8558 bool in ATTRIBUTE_UNUSED)
8564 if (GET_MODE_SIZE (mode) < 4)
8567 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8571 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8572 It corrects the value of COST based on the relationship between
8573 INSN and DEP through the dependence LINK. It returns the new
8574 value. There is a per-core adjust_cost hook to adjust scheduler costs
8575 and the per-core hook can choose to completely override the generic
8576 adjust_cost function. Only put bits of code into arm_adjust_cost that
8577 are common across all cores. */
8579 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8583 /* When generating Thumb-1 code, we want to place flag-setting operations
8584 close to a conditional branch which depends on them, so that we can
8585 omit the comparison. */
8587 && REG_NOTE_KIND (link) == 0
8588 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8589 && recog_memoized (dep) >= 0
8590 && get_attr_conds (dep) == CONDS_SET)
8593 if (current_tune->sched_adjust_cost != NULL)
8595 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8599 /* XXX This is not strictly true for the FPA. */
8600 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8601 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8604 /* Call insns don't incur a stall, even if they follow a load. */
8605 if (REG_NOTE_KIND (link) == 0
8606 && GET_CODE (insn) == CALL_INSN)
8609 if ((i_pat = single_set (insn)) != NULL
8610 && GET_CODE (SET_SRC (i_pat)) == MEM
8611 && (d_pat = single_set (dep)) != NULL
8612 && GET_CODE (SET_DEST (d_pat)) == MEM)
8614 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8615 /* This is a load after a store, there is no conflict if the load reads
8616 from a cached area. Assume that loads from the stack, and from the
8617 constant pool are cached, and that others will miss. This is a
8620 if ((GET_CODE (src_mem) == SYMBOL_REF
8621 && CONSTANT_POOL_ADDRESS_P (src_mem))
8622 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8623 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8624 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8632 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8635 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8637 return (optimize > 0) ? 2 : 0;
8641 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8643 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8646 static int fp_consts_inited = 0;
8648 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8649 static const char * const strings_fp[8] =
8652 "4", "5", "0.5", "10"
8655 static REAL_VALUE_TYPE values_fp[8];
8658 init_fp_table (void)
8664 fp_consts_inited = 1;
8666 fp_consts_inited = 8;
8668 for (i = 0; i < fp_consts_inited; i++)
8670 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8675 /* Return TRUE if rtx X is a valid immediate FP constant. */
8677 arm_const_double_rtx (rtx x)
8682 if (!fp_consts_inited)
8685 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8686 if (REAL_VALUE_MINUS_ZERO (r))
8689 for (i = 0; i < fp_consts_inited; i++)
8690 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8696 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8698 neg_const_double_rtx_ok_for_fpa (rtx x)
8703 if (!fp_consts_inited)
8706 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8707 r = real_value_negate (&r);
8708 if (REAL_VALUE_MINUS_ZERO (r))
8711 for (i = 0; i < 8; i++)
8712 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8719 /* VFPv3 has a fairly wide range of representable immediates, formed from
8720 "quarter-precision" floating-point values. These can be evaluated using this
8721 formula (with ^ for exponentiation):
8725 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8726 16 <= n <= 31 and 0 <= r <= 7.
8728 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8730 - A (most-significant) is the sign bit.
8731 - BCD are the exponent (encoded as r XOR 3).
8732 - EFGH are the mantissa (encoded as n - 16).
8735 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8736 fconst[sd] instruction, or -1 if X isn't suitable. */
8738 vfp3_const_double_index (rtx x)
8740 REAL_VALUE_TYPE r, m;
8742 unsigned HOST_WIDE_INT mantissa, mant_hi;
8743 unsigned HOST_WIDE_INT mask;
8744 HOST_WIDE_INT m1, m2;
8745 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8747 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8752 /* We can't represent these things, so detect them first. */
8753 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8756 /* Extract sign, exponent and mantissa. */
8757 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8758 r = real_value_abs (&r);
8759 exponent = REAL_EXP (&r);
8760 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8761 highest (sign) bit, with a fixed binary point at bit point_pos.
8762 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8763 bits for the mantissa, this may fail (low bits would be lost). */
8764 real_ldexp (&m, &r, point_pos - exponent);
8765 REAL_VALUE_TO_INT (&m1, &m2, m);
8769 /* If there are bits set in the low part of the mantissa, we can't
8770 represent this value. */
8774 /* Now make it so that mantissa contains the most-significant bits, and move
8775 the point_pos to indicate that the least-significant bits have been
8777 point_pos -= HOST_BITS_PER_WIDE_INT;
8780 /* We can permit four significant bits of mantissa only, plus a high bit
8781 which is always 1. */
8782 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8783 if ((mantissa & mask) != 0)
8786 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8787 mantissa >>= point_pos - 5;
8789 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8790 floating-point immediate zero with Neon using an integer-zero load, but
8791 that case is handled elsewhere.) */
8795 gcc_assert (mantissa >= 16 && mantissa <= 31);
8797 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8798 normalized significands are in the range [1, 2). (Our mantissa is shifted
8799 left 4 places at this point relative to normalized IEEE754 values). GCC
8800 internally uses [0.5, 1) (see real.c), so the exponent returned from
8801 REAL_EXP must be altered. */
8802 exponent = 5 - exponent;
8804 if (exponent < 0 || exponent > 7)
8807 /* Sign, mantissa and exponent are now in the correct form to plug into the
8808 formula described in the comment above. */
8809 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8812 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8814 vfp3_const_double_rtx (rtx x)
8819 return vfp3_const_double_index (x) != -1;
8822 /* Recognize immediates which can be used in various Neon instructions. Legal
8823 immediates are described by the following table (for VMVN variants, the
8824 bitwise inverse of the constant shown is recognized. In either case, VMOV
8825 is output and the correct instruction to use for a given constant is chosen
8826 by the assembler). The constant shown is replicated across all elements of
8827 the destination vector.
8829 insn elems variant constant (binary)
8830 ---- ----- ------- -----------------
8831 vmov i32 0 00000000 00000000 00000000 abcdefgh
8832 vmov i32 1 00000000 00000000 abcdefgh 00000000
8833 vmov i32 2 00000000 abcdefgh 00000000 00000000
8834 vmov i32 3 abcdefgh 00000000 00000000 00000000
8835 vmov i16 4 00000000 abcdefgh
8836 vmov i16 5 abcdefgh 00000000
8837 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8838 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8839 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8840 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8841 vmvn i16 10 00000000 abcdefgh
8842 vmvn i16 11 abcdefgh 00000000
8843 vmov i32 12 00000000 00000000 abcdefgh 11111111
8844 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8845 vmov i32 14 00000000 abcdefgh 11111111 11111111
8846 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8848 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8849 eeeeeeee ffffffff gggggggg hhhhhhhh
8850 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8852 For case 18, B = !b. Representable values are exactly those accepted by
8853 vfp3_const_double_index, but are output as floating-point numbers rather
8856 Variants 0-5 (inclusive) may also be used as immediates for the second
8857 operand of VORR/VBIC instructions.
8859 The INVERSE argument causes the bitwise inverse of the given operand to be
8860 recognized instead (used for recognizing legal immediates for the VAND/VORN
8861 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8862 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8863 output, rather than the real insns vbic/vorr).
8865 INVERSE makes no difference to the recognition of float vectors.
8867 The return value is the variant of immediate as shown in the above table, or
8868 -1 if the given value doesn't match any of the listed patterns.
8871 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8872 rtx *modconst, int *elementwidth)
8874 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8876 for (i = 0; i < idx; i += (STRIDE)) \
8881 immtype = (CLASS); \
8882 elsize = (ELSIZE); \
8886 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8887 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8888 unsigned char bytes[16];
8889 int immtype = -1, matches;
8890 unsigned int invmask = inverse ? 0xff : 0;
8892 /* Vectors of float constants. */
8893 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8895 rtx el0 = CONST_VECTOR_ELT (op, 0);
8898 if (!vfp3_const_double_rtx (el0))
8901 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8903 for (i = 1; i < n_elts; i++)
8905 rtx elt = CONST_VECTOR_ELT (op, i);
8908 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8910 if (!REAL_VALUES_EQUAL (r0, re))
8915 *modconst = CONST_VECTOR_ELT (op, 0);
8923 /* Splat vector constant out into a byte vector. */
8924 for (i = 0; i < n_elts; i++)
8926 rtx el = CONST_VECTOR_ELT (op, i);
8927 unsigned HOST_WIDE_INT elpart;
8928 unsigned int part, parts;
8930 if (GET_CODE (el) == CONST_INT)
8932 elpart = INTVAL (el);
8935 else if (GET_CODE (el) == CONST_DOUBLE)
8937 elpart = CONST_DOUBLE_LOW (el);
8943 for (part = 0; part < parts; part++)
8946 for (byte = 0; byte < innersize; byte++)
8948 bytes[idx++] = (elpart & 0xff) ^ invmask;
8949 elpart >>= BITS_PER_UNIT;
8951 if (GET_CODE (el) == CONST_DOUBLE)
8952 elpart = CONST_DOUBLE_HIGH (el);
8957 gcc_assert (idx == GET_MODE_SIZE (mode));
8961 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8962 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8964 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8965 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8967 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8968 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8970 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8971 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8973 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8975 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8977 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8978 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8980 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8981 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8983 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8984 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8986 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8987 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8989 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8991 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8993 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8994 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8996 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8997 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8999 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9000 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9002 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9003 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9005 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9007 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9008 && bytes[i] == bytes[(i + 8) % idx]);
9016 *elementwidth = elsize;
9020 unsigned HOST_WIDE_INT imm = 0;
9022 /* Un-invert bytes of recognized vector, if necessary. */
9024 for (i = 0; i < idx; i++)
9025 bytes[i] ^= invmask;
9029 /* FIXME: Broken on 32-bit H_W_I hosts. */
9030 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9032 for (i = 0; i < 8; i++)
9033 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9034 << (i * BITS_PER_UNIT);
9036 *modconst = GEN_INT (imm);
9040 unsigned HOST_WIDE_INT imm = 0;
9042 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9043 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9045 *modconst = GEN_INT (imm);
9053 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9054 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9055 float elements), and a modified constant (whatever should be output for a
9056 VMOV) in *MODCONST. */
9059 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9060 rtx *modconst, int *elementwidth)
9064 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9070 *modconst = tmpconst;
9073 *elementwidth = tmpwidth;
9078 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9079 the immediate is valid, write a constant suitable for using as an operand
9080 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9081 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9084 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9085 rtx *modconst, int *elementwidth)
9089 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9091 if (retval < 0 || retval > 5)
9095 *modconst = tmpconst;
9098 *elementwidth = tmpwidth;
9103 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9104 the immediate is valid, write a constant suitable for using as an operand
9105 to VSHR/VSHL to *MODCONST and the corresponding element width to
9106 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9107 because they have different limitations. */
9110 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9111 rtx *modconst, int *elementwidth,
9114 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9115 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9116 unsigned HOST_WIDE_INT last_elt = 0;
9117 unsigned HOST_WIDE_INT maxshift;
9119 /* Split vector constant out into a byte vector. */
9120 for (i = 0; i < n_elts; i++)
9122 rtx el = CONST_VECTOR_ELT (op, i);
9123 unsigned HOST_WIDE_INT elpart;
9125 if (GET_CODE (el) == CONST_INT)
9126 elpart = INTVAL (el);
9127 else if (GET_CODE (el) == CONST_DOUBLE)
9132 if (i != 0 && elpart != last_elt)
9138 /* Shift less than element size. */
9139 maxshift = innersize * 8;
9143 /* Left shift immediate value can be from 0 to <size>-1. */
9144 if (last_elt >= maxshift)
9149 /* Right shift immediate value can be from 1 to <size>. */
9150 if (last_elt == 0 || last_elt > maxshift)
9155 *elementwidth = innersize * 8;
9158 *modconst = CONST_VECTOR_ELT (op, 0);
9163 /* Return a string suitable for output of Neon immediate logic operation
9167 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9168 int inverse, int quad)
9170 int width, is_valid;
9171 static char templ[40];
9173 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9175 gcc_assert (is_valid != 0);
9178 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9180 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9185 /* Return a string suitable for output of Neon immediate shift operation
9186 (VSHR or VSHL) MNEM. */
9189 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9190 enum machine_mode mode, int quad,
9193 int width, is_valid;
9194 static char templ[40];
9196 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9197 gcc_assert (is_valid != 0);
9200 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9202 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9207 /* Output a sequence of pairwise operations to implement a reduction.
9208 NOTE: We do "too much work" here, because pairwise operations work on two
9209 registers-worth of operands in one go. Unfortunately we can't exploit those
9210 extra calculations to do the full operation in fewer steps, I don't think.
9211 Although all vector elements of the result but the first are ignored, we
9212 actually calculate the same result in each of the elements. An alternative
9213 such as initially loading a vector with zero to use as each of the second
9214 operands would use up an additional register and take an extra instruction,
9215 for no particular gain. */
9218 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9219 rtx (*reduc) (rtx, rtx, rtx))
9221 enum machine_mode inner = GET_MODE_INNER (mode);
9222 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9225 for (i = parts / 2; i >= 1; i /= 2)
9227 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9228 emit_insn (reduc (dest, tmpsum, tmpsum));
9233 /* If VALS is a vector constant that can be loaded into a register
9234 using VDUP, generate instructions to do so and return an RTX to
9235 assign to the register. Otherwise return NULL_RTX. */
9238 neon_vdup_constant (rtx vals)
9240 enum machine_mode mode = GET_MODE (vals);
9241 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9242 int n_elts = GET_MODE_NUNITS (mode);
9243 bool all_same = true;
9247 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9250 for (i = 0; i < n_elts; ++i)
9252 x = XVECEXP (vals, 0, i);
9253 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9258 /* The elements are not all the same. We could handle repeating
9259 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9260 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9264 /* We can load this constant by using VDUP and a constant in a
9265 single ARM register. This will be cheaper than a vector
9268 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9269 return gen_rtx_VEC_DUPLICATE (mode, x);
9272 /* Generate code to load VALS, which is a PARALLEL containing only
9273 constants (for vec_init) or CONST_VECTOR, efficiently into a
9274 register. Returns an RTX to copy into the register, or NULL_RTX
9275 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9278 neon_make_constant (rtx vals)
9280 enum machine_mode mode = GET_MODE (vals);
9282 rtx const_vec = NULL_RTX;
9283 int n_elts = GET_MODE_NUNITS (mode);
9287 if (GET_CODE (vals) == CONST_VECTOR)
9289 else if (GET_CODE (vals) == PARALLEL)
9291 /* A CONST_VECTOR must contain only CONST_INTs and
9292 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9293 Only store valid constants in a CONST_VECTOR. */
9294 for (i = 0; i < n_elts; ++i)
9296 rtx x = XVECEXP (vals, 0, i);
9297 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9300 if (n_const == n_elts)
9301 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9306 if (const_vec != NULL
9307 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9308 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9310 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9311 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9312 pipeline cycle; creating the constant takes one or two ARM
9315 else if (const_vec != NULL_RTX)
9316 /* Load from constant pool. On Cortex-A8 this takes two cycles
9317 (for either double or quad vectors). We can not take advantage
9318 of single-cycle VLD1 because we need a PC-relative addressing
9322 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9323 We can not construct an initializer. */
9327 /* Initialize vector TARGET to VALS. */
9330 neon_expand_vector_init (rtx target, rtx vals)
9332 enum machine_mode mode = GET_MODE (target);
9333 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9334 int n_elts = GET_MODE_NUNITS (mode);
9335 int n_var = 0, one_var = -1;
9336 bool all_same = true;
9340 for (i = 0; i < n_elts; ++i)
9342 x = XVECEXP (vals, 0, i);
9343 if (!CONSTANT_P (x))
9344 ++n_var, one_var = i;
9346 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9352 rtx constant = neon_make_constant (vals);
9353 if (constant != NULL_RTX)
9355 emit_move_insn (target, constant);
9360 /* Splat a single non-constant element if we can. */
9361 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9363 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9364 emit_insn (gen_rtx_SET (VOIDmode, target,
9365 gen_rtx_VEC_DUPLICATE (mode, x)));
9369 /* One field is non-constant. Load constant then overwrite varying
9370 field. This is more efficient than using the stack. */
9373 rtx copy = copy_rtx (vals);
9374 rtx index = GEN_INT (one_var);
9376 /* Load constant part of vector, substitute neighboring value for
9378 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9379 neon_expand_vector_init (target, copy);
9381 /* Insert variable. */
9382 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9386 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9389 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9392 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9395 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9398 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9401 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9404 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9407 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9410 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9418 /* Construct the vector in memory one field at a time
9419 and load the whole vector. */
9420 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9421 for (i = 0; i < n_elts; i++)
9422 emit_move_insn (adjust_address_nv (mem, inner_mode,
9423 i * GET_MODE_SIZE (inner_mode)),
9424 XVECEXP (vals, 0, i));
9425 emit_move_insn (target, mem);
9428 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9429 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9430 reported source locations are bogus. */
9433 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9438 gcc_assert (GET_CODE (operand) == CONST_INT);
9440 lane = INTVAL (operand);
9442 if (lane < low || lane >= high)
9446 /* Bounds-check lanes. */
9449 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9451 bounds_check (operand, low, high, "lane out of range");
9454 /* Bounds-check constants. */
9457 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9459 bounds_check (operand, low, high, "constant out of range");
9463 neon_element_bits (enum machine_mode mode)
9466 return GET_MODE_BITSIZE (mode);
9468 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9472 /* Predicates for `match_operand' and `match_operator'. */
9474 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9476 cirrus_memory_offset (rtx op)
9478 /* Reject eliminable registers. */
9479 if (! (reload_in_progress || reload_completed)
9480 && ( reg_mentioned_p (frame_pointer_rtx, op)
9481 || reg_mentioned_p (arg_pointer_rtx, op)
9482 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9483 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9484 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9485 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9488 if (GET_CODE (op) == MEM)
9494 /* Match: (mem (reg)). */
9495 if (GET_CODE (ind) == REG)
9501 if (GET_CODE (ind) == PLUS
9502 && GET_CODE (XEXP (ind, 0)) == REG
9503 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9504 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9511 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9512 WB is true if full writeback address modes are allowed and is false
9513 if limited writeback address modes (POST_INC and PRE_DEC) are
9517 arm_coproc_mem_operand (rtx op, bool wb)
9521 /* Reject eliminable registers. */
9522 if (! (reload_in_progress || reload_completed)
9523 && ( reg_mentioned_p (frame_pointer_rtx, op)
9524 || reg_mentioned_p (arg_pointer_rtx, op)
9525 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9526 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9527 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9528 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9531 /* Constants are converted into offsets from labels. */
9532 if (GET_CODE (op) != MEM)
9537 if (reload_completed
9538 && (GET_CODE (ind) == LABEL_REF
9539 || (GET_CODE (ind) == CONST
9540 && GET_CODE (XEXP (ind, 0)) == PLUS
9541 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9542 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9545 /* Match: (mem (reg)). */
9546 if (GET_CODE (ind) == REG)
9547 return arm_address_register_rtx_p (ind, 0);
9549 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9550 acceptable in any case (subject to verification by
9551 arm_address_register_rtx_p). We need WB to be true to accept
9552 PRE_INC and POST_DEC. */
9553 if (GET_CODE (ind) == POST_INC
9554 || GET_CODE (ind) == PRE_DEC
9556 && (GET_CODE (ind) == PRE_INC
9557 || GET_CODE (ind) == POST_DEC)))
9558 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9561 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9562 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9563 && GET_CODE (XEXP (ind, 1)) == PLUS
9564 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9565 ind = XEXP (ind, 1);
9570 if (GET_CODE (ind) == PLUS
9571 && GET_CODE (XEXP (ind, 0)) == REG
9572 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9573 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9574 && INTVAL (XEXP (ind, 1)) > -1024
9575 && INTVAL (XEXP (ind, 1)) < 1024
9576 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9582 /* Return TRUE if OP is a memory operand which we can load or store a vector
9583 to/from. TYPE is one of the following values:
9584 0 - Vector load/stor (vldr)
9585 1 - Core registers (ldm)
9586 2 - Element/structure loads (vld1)
9589 neon_vector_mem_operand (rtx op, int type)
9593 /* Reject eliminable registers. */
9594 if (! (reload_in_progress || reload_completed)
9595 && ( reg_mentioned_p (frame_pointer_rtx, op)
9596 || reg_mentioned_p (arg_pointer_rtx, op)
9597 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9598 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9599 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9600 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9603 /* Constants are converted into offsets from labels. */
9604 if (GET_CODE (op) != MEM)
9609 if (reload_completed
9610 && (GET_CODE (ind) == LABEL_REF
9611 || (GET_CODE (ind) == CONST
9612 && GET_CODE (XEXP (ind, 0)) == PLUS
9613 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9614 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9617 /* Match: (mem (reg)). */
9618 if (GET_CODE (ind) == REG)
9619 return arm_address_register_rtx_p (ind, 0);
9621 /* Allow post-increment with Neon registers. */
9622 if ((type != 1 && GET_CODE (ind) == POST_INC)
9623 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9624 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9626 /* FIXME: vld1 allows register post-modify. */
9632 && GET_CODE (ind) == PLUS
9633 && GET_CODE (XEXP (ind, 0)) == REG
9634 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9635 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9636 && INTVAL (XEXP (ind, 1)) > -1024
9637 && INTVAL (XEXP (ind, 1)) < 1016
9638 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9644 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9647 neon_struct_mem_operand (rtx op)
9651 /* Reject eliminable registers. */
9652 if (! (reload_in_progress || reload_completed)
9653 && ( reg_mentioned_p (frame_pointer_rtx, op)
9654 || reg_mentioned_p (arg_pointer_rtx, op)
9655 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9656 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9657 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9658 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9661 /* Constants are converted into offsets from labels. */
9662 if (GET_CODE (op) != MEM)
9667 if (reload_completed
9668 && (GET_CODE (ind) == LABEL_REF
9669 || (GET_CODE (ind) == CONST
9670 && GET_CODE (XEXP (ind, 0)) == PLUS
9671 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9672 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9675 /* Match: (mem (reg)). */
9676 if (GET_CODE (ind) == REG)
9677 return arm_address_register_rtx_p (ind, 0);
9679 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9680 if (GET_CODE (ind) == POST_INC
9681 || GET_CODE (ind) == PRE_DEC)
9682 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9687 /* Return true if X is a register that will be eliminated later on. */
9689 arm_eliminable_register (rtx x)
9691 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9692 || REGNO (x) == ARG_POINTER_REGNUM
9693 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9694 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9697 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9698 coprocessor registers. Otherwise return NO_REGS. */
9701 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9705 if (!TARGET_NEON_FP16)
9706 return GENERAL_REGS;
9707 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9709 return GENERAL_REGS;
9712 /* The neon move patterns handle all legitimate vector and struct
9715 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9716 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9717 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9718 || VALID_NEON_STRUCT_MODE (mode)))
9721 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9724 return GENERAL_REGS;
9727 /* Values which must be returned in the most-significant end of the return
9731 arm_return_in_msb (const_tree valtype)
9733 return (TARGET_AAPCS_BASED
9735 && (AGGREGATE_TYPE_P (valtype)
9736 || TREE_CODE (valtype) == COMPLEX_TYPE
9737 || FIXED_POINT_TYPE_P (valtype)));
9740 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9741 Use by the Cirrus Maverick code which has to workaround
9742 a hardware bug triggered by such instructions. */
9744 arm_memory_load_p (rtx insn)
9746 rtx body, lhs, rhs;;
9748 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9751 body = PATTERN (insn);
9753 if (GET_CODE (body) != SET)
9756 lhs = XEXP (body, 0);
9757 rhs = XEXP (body, 1);
9759 lhs = REG_OR_SUBREG_RTX (lhs);
9761 /* If the destination is not a general purpose
9762 register we do not have to worry. */
9763 if (GET_CODE (lhs) != REG
9764 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9767 /* As well as loads from memory we also have to react
9768 to loads of invalid constants which will be turned
9769 into loads from the minipool. */
9770 return (GET_CODE (rhs) == MEM
9771 || GET_CODE (rhs) == SYMBOL_REF
9772 || note_invalid_constants (insn, -1, false));
9775 /* Return TRUE if INSN is a Cirrus instruction. */
9777 arm_cirrus_insn_p (rtx insn)
9779 enum attr_cirrus attr;
9781 /* get_attr cannot accept USE or CLOBBER. */
9783 || GET_CODE (insn) != INSN
9784 || GET_CODE (PATTERN (insn)) == USE
9785 || GET_CODE (PATTERN (insn)) == CLOBBER)
9788 attr = get_attr_cirrus (insn);
9790 return attr != CIRRUS_NOT;
9793 /* Cirrus reorg for invalid instruction combinations. */
9795 cirrus_reorg (rtx first)
9797 enum attr_cirrus attr;
9798 rtx body = PATTERN (first);
9802 /* Any branch must be followed by 2 non Cirrus instructions. */
9803 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9806 t = next_nonnote_insn (first);
9808 if (arm_cirrus_insn_p (t))
9811 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9815 emit_insn_after (gen_nop (), first);
9820 /* (float (blah)) is in parallel with a clobber. */
9821 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9822 body = XVECEXP (body, 0, 0);
9824 if (GET_CODE (body) == SET)
9826 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9828 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9829 be followed by a non Cirrus insn. */
9830 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9832 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9833 emit_insn_after (gen_nop (), first);
9837 else if (arm_memory_load_p (first))
9839 unsigned int arm_regno;
9841 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9842 ldr/cfmv64hr combination where the Rd field is the same
9843 in both instructions must be split with a non Cirrus
9850 /* Get Arm register number for ldr insn. */
9851 if (GET_CODE (lhs) == REG)
9852 arm_regno = REGNO (lhs);
9855 gcc_assert (GET_CODE (rhs) == REG);
9856 arm_regno = REGNO (rhs);
9860 first = next_nonnote_insn (first);
9862 if (! arm_cirrus_insn_p (first))
9865 body = PATTERN (first);
9867 /* (float (blah)) is in parallel with a clobber. */
9868 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9869 body = XVECEXP (body, 0, 0);
9871 if (GET_CODE (body) == FLOAT)
9872 body = XEXP (body, 0);
9874 if (get_attr_cirrus (first) == CIRRUS_MOVE
9875 && GET_CODE (XEXP (body, 1)) == REG
9876 && arm_regno == REGNO (XEXP (body, 1)))
9877 emit_insn_after (gen_nop (), first);
9883 /* get_attr cannot accept USE or CLOBBER. */
9885 || GET_CODE (first) != INSN
9886 || GET_CODE (PATTERN (first)) == USE
9887 || GET_CODE (PATTERN (first)) == CLOBBER)
9890 attr = get_attr_cirrus (first);
9892 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9893 must be followed by a non-coprocessor instruction. */
9894 if (attr == CIRRUS_COMPARE)
9898 t = next_nonnote_insn (first);
9900 if (arm_cirrus_insn_p (t))
9903 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9907 emit_insn_after (gen_nop (), first);
9913 /* Return TRUE if X references a SYMBOL_REF. */
9915 symbol_mentioned_p (rtx x)
9920 if (GET_CODE (x) == SYMBOL_REF)
9923 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9924 are constant offsets, not symbols. */
9925 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9928 fmt = GET_RTX_FORMAT (GET_CODE (x));
9930 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9936 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9937 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9940 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9947 /* Return TRUE if X references a LABEL_REF. */
9949 label_mentioned_p (rtx x)
9954 if (GET_CODE (x) == LABEL_REF)
9957 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9958 instruction, but they are constant offsets, not symbols. */
9959 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9962 fmt = GET_RTX_FORMAT (GET_CODE (x));
9963 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9969 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9970 if (label_mentioned_p (XVECEXP (x, i, j)))
9973 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9981 tls_mentioned_p (rtx x)
9983 switch (GET_CODE (x))
9986 return tls_mentioned_p (XEXP (x, 0));
9989 if (XINT (x, 1) == UNSPEC_TLS)
9997 /* Must not copy any rtx that uses a pc-relative address. */
10000 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10002 if (GET_CODE (*x) == UNSPEC
10003 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10004 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10010 arm_cannot_copy_insn_p (rtx insn)
10012 /* The tls call insn cannot be copied, as it is paired with a data
10014 if (recog_memoized (insn) == CODE_FOR_tlscall)
10017 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10021 minmax_code (rtx x)
10023 enum rtx_code code = GET_CODE (x);
10036 gcc_unreachable ();
10040 /* Return 1 if memory locations are adjacent. */
10042 adjacent_mem_locations (rtx a, rtx b)
10044 /* We don't guarantee to preserve the order of these memory refs. */
10045 if (volatile_refs_p (a) || volatile_refs_p (b))
10048 if ((GET_CODE (XEXP (a, 0)) == REG
10049 || (GET_CODE (XEXP (a, 0)) == PLUS
10050 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10051 && (GET_CODE (XEXP (b, 0)) == REG
10052 || (GET_CODE (XEXP (b, 0)) == PLUS
10053 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10055 HOST_WIDE_INT val0 = 0, val1 = 0;
10059 if (GET_CODE (XEXP (a, 0)) == PLUS)
10061 reg0 = XEXP (XEXP (a, 0), 0);
10062 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10065 reg0 = XEXP (a, 0);
10067 if (GET_CODE (XEXP (b, 0)) == PLUS)
10069 reg1 = XEXP (XEXP (b, 0), 0);
10070 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10073 reg1 = XEXP (b, 0);
10075 /* Don't accept any offset that will require multiple
10076 instructions to handle, since this would cause the
10077 arith_adjacentmem pattern to output an overlong sequence. */
10078 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10081 /* Don't allow an eliminable register: register elimination can make
10082 the offset too large. */
10083 if (arm_eliminable_register (reg0))
10086 val_diff = val1 - val0;
10090 /* If the target has load delay slots, then there's no benefit
10091 to using an ldm instruction unless the offset is zero and
10092 we are optimizing for size. */
10093 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10094 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10095 && (val_diff == 4 || val_diff == -4));
10098 return ((REGNO (reg0) == REGNO (reg1))
10099 && (val_diff == 4 || val_diff == -4));
10105 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10106 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10107 instruction. ADD_OFFSET is nonzero if the base address register needs
10108 to be modified with an add instruction before we can use it. */
10111 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10112 int nops, HOST_WIDE_INT add_offset)
10114 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10115 if the offset isn't small enough. The reason 2 ldrs are faster
10116 is because these ARMs are able to do more than one cache access
10117 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10118 whilst the ARM8 has a double bandwidth cache. This means that
10119 these cores can do both an instruction fetch and a data fetch in
10120 a single cycle, so the trick of calculating the address into a
10121 scratch register (one of the result regs) and then doing a load
10122 multiple actually becomes slower (and no smaller in code size).
10123 That is the transformation
10125 ldr rd1, [rbase + offset]
10126 ldr rd2, [rbase + offset + 4]
10130 add rd1, rbase, offset
10131 ldmia rd1, {rd1, rd2}
10133 produces worse code -- '3 cycles + any stalls on rd2' instead of
10134 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10135 access per cycle, the first sequence could never complete in less
10136 than 6 cycles, whereas the ldm sequence would only take 5 and
10137 would make better use of sequential accesses if not hitting the
10140 We cheat here and test 'arm_ld_sched' which we currently know to
10141 only be true for the ARM8, ARM9 and StrongARM. If this ever
10142 changes, then the test below needs to be reworked. */
10143 if (nops == 2 && arm_ld_sched && add_offset != 0)
10146 /* XScale has load-store double instructions, but they have stricter
10147 alignment requirements than load-store multiple, so we cannot
10150 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10151 the pipeline until completion.
10159 An ldr instruction takes 1-3 cycles, but does not block the
10168 Best case ldr will always win. However, the more ldr instructions
10169 we issue, the less likely we are to be able to schedule them well.
10170 Using ldr instructions also increases code size.
10172 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10173 for counts of 3 or 4 regs. */
10174 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10179 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10180 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10181 an array ORDER which describes the sequence to use when accessing the
10182 offsets that produces an ascending order. In this sequence, each
10183 offset must be larger by exactly 4 than the previous one. ORDER[0]
10184 must have been filled in with the lowest offset by the caller.
10185 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10186 we use to verify that ORDER produces an ascending order of registers.
10187 Return true if it was possible to construct such an order, false if
10191 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10192 int *unsorted_regs)
10195 for (i = 1; i < nops; i++)
10199 order[i] = order[i - 1];
10200 for (j = 0; j < nops; j++)
10201 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10203 /* We must find exactly one offset that is higher than the
10204 previous one by 4. */
10205 if (order[i] != order[i - 1])
10209 if (order[i] == order[i - 1])
10211 /* The register numbers must be ascending. */
10212 if (unsorted_regs != NULL
10213 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10219 /* Used to determine in a peephole whether a sequence of load
10220 instructions can be changed into a load-multiple instruction.
10221 NOPS is the number of separate load instructions we are examining. The
10222 first NOPS entries in OPERANDS are the destination registers, the
10223 next NOPS entries are memory operands. If this function is
10224 successful, *BASE is set to the common base register of the memory
10225 accesses; *LOAD_OFFSET is set to the first memory location's offset
10226 from that base register.
10227 REGS is an array filled in with the destination register numbers.
10228 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10229 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10230 the sequence of registers in REGS matches the loads from ascending memory
10231 locations, and the function verifies that the register numbers are
10232 themselves ascending. If CHECK_REGS is false, the register numbers
10233 are stored in the order they are found in the operands. */
10235 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10236 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10238 int unsorted_regs[MAX_LDM_STM_OPS];
10239 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10240 int order[MAX_LDM_STM_OPS];
10241 rtx base_reg_rtx = NULL;
10245 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10246 easily extended if required. */
10247 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10249 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10251 /* Loop over the operands and check that the memory references are
10252 suitable (i.e. immediate offsets from the same base register). At
10253 the same time, extract the target register, and the memory
10255 for (i = 0; i < nops; i++)
10260 /* Convert a subreg of a mem into the mem itself. */
10261 if (GET_CODE (operands[nops + i]) == SUBREG)
10262 operands[nops + i] = alter_subreg (operands + (nops + i));
10264 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10266 /* Don't reorder volatile memory references; it doesn't seem worth
10267 looking for the case where the order is ok anyway. */
10268 if (MEM_VOLATILE_P (operands[nops + i]))
10271 offset = const0_rtx;
10273 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10274 || (GET_CODE (reg) == SUBREG
10275 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10276 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10277 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10279 || (GET_CODE (reg) == SUBREG
10280 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10281 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10286 base_reg = REGNO (reg);
10287 base_reg_rtx = reg;
10288 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10291 else if (base_reg != (int) REGNO (reg))
10292 /* Not addressed from the same base register. */
10295 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10296 ? REGNO (operands[i])
10297 : REGNO (SUBREG_REG (operands[i])));
10299 /* If it isn't an integer register, or if it overwrites the
10300 base register but isn't the last insn in the list, then
10301 we can't do this. */
10302 if (unsorted_regs[i] < 0
10303 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10304 || unsorted_regs[i] > 14
10305 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10308 unsorted_offsets[i] = INTVAL (offset);
10309 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10313 /* Not a suitable memory address. */
10317 /* All the useful information has now been extracted from the
10318 operands into unsorted_regs and unsorted_offsets; additionally,
10319 order[0] has been set to the lowest offset in the list. Sort
10320 the offsets into order, verifying that they are adjacent, and
10321 check that the register numbers are ascending. */
10322 if (!compute_offset_order (nops, unsorted_offsets, order,
10323 check_regs ? unsorted_regs : NULL))
10327 memcpy (saved_order, order, sizeof order);
10333 for (i = 0; i < nops; i++)
10334 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10336 *load_offset = unsorted_offsets[order[0]];
10340 && !peep2_reg_dead_p (nops, base_reg_rtx))
10343 if (unsorted_offsets[order[0]] == 0)
10344 ldm_case = 1; /* ldmia */
10345 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10346 ldm_case = 2; /* ldmib */
10347 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10348 ldm_case = 3; /* ldmda */
10349 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10350 ldm_case = 4; /* ldmdb */
10351 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10352 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10357 if (!multiple_operation_profitable_p (false, nops,
10359 ? unsorted_offsets[order[0]] : 0))
10365 /* Used to determine in a peephole whether a sequence of store instructions can
10366 be changed into a store-multiple instruction.
10367 NOPS is the number of separate store instructions we are examining.
10368 NOPS_TOTAL is the total number of instructions recognized by the peephole
10370 The first NOPS entries in OPERANDS are the source registers, the next
10371 NOPS entries are memory operands. If this function is successful, *BASE is
10372 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10373 to the first memory location's offset from that base register. REGS is an
10374 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10375 likewise filled with the corresponding rtx's.
10376 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10377 numbers to an ascending order of stores.
10378 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10379 from ascending memory locations, and the function verifies that the register
10380 numbers are themselves ascending. If CHECK_REGS is false, the register
10381 numbers are stored in the order they are found in the operands. */
10383 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10384 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10385 HOST_WIDE_INT *load_offset, bool check_regs)
10387 int unsorted_regs[MAX_LDM_STM_OPS];
10388 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10389 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10390 int order[MAX_LDM_STM_OPS];
10392 rtx base_reg_rtx = NULL;
10395 /* Write back of base register is currently only supported for Thumb 1. */
10396 int base_writeback = TARGET_THUMB1;
10398 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10399 easily extended if required. */
10400 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10402 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10404 /* Loop over the operands and check that the memory references are
10405 suitable (i.e. immediate offsets from the same base register). At
10406 the same time, extract the target register, and the memory
10408 for (i = 0; i < nops; i++)
10413 /* Convert a subreg of a mem into the mem itself. */
10414 if (GET_CODE (operands[nops + i]) == SUBREG)
10415 operands[nops + i] = alter_subreg (operands + (nops + i));
10417 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10419 /* Don't reorder volatile memory references; it doesn't seem worth
10420 looking for the case where the order is ok anyway. */
10421 if (MEM_VOLATILE_P (operands[nops + i]))
10424 offset = const0_rtx;
10426 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10427 || (GET_CODE (reg) == SUBREG
10428 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10429 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10430 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10432 || (GET_CODE (reg) == SUBREG
10433 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10434 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10437 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10438 ? operands[i] : SUBREG_REG (operands[i]));
10439 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10443 base_reg = REGNO (reg);
10444 base_reg_rtx = reg;
10445 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10448 else if (base_reg != (int) REGNO (reg))
10449 /* Not addressed from the same base register. */
10452 /* If it isn't an integer register, then we can't do this. */
10453 if (unsorted_regs[i] < 0
10454 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10455 /* The effects are unpredictable if the base register is
10456 both updated and stored. */
10457 || (base_writeback && unsorted_regs[i] == base_reg)
10458 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10459 || unsorted_regs[i] > 14)
10462 unsorted_offsets[i] = INTVAL (offset);
10463 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10467 /* Not a suitable memory address. */
10471 /* All the useful information has now been extracted from the
10472 operands into unsorted_regs and unsorted_offsets; additionally,
10473 order[0] has been set to the lowest offset in the list. Sort
10474 the offsets into order, verifying that they are adjacent, and
10475 check that the register numbers are ascending. */
10476 if (!compute_offset_order (nops, unsorted_offsets, order,
10477 check_regs ? unsorted_regs : NULL))
10481 memcpy (saved_order, order, sizeof order);
10487 for (i = 0; i < nops; i++)
10489 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10491 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10494 *load_offset = unsorted_offsets[order[0]];
10498 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10501 if (unsorted_offsets[order[0]] == 0)
10502 stm_case = 1; /* stmia */
10503 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10504 stm_case = 2; /* stmib */
10505 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10506 stm_case = 3; /* stmda */
10507 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10508 stm_case = 4; /* stmdb */
10512 if (!multiple_operation_profitable_p (false, nops, 0))
10518 /* Routines for use in generating RTL. */
10520 /* Generate a load-multiple instruction. COUNT is the number of loads in
10521 the instruction; REGS and MEMS are arrays containing the operands.
10522 BASEREG is the base register to be used in addressing the memory operands.
10523 WBACK_OFFSET is nonzero if the instruction should update the base
10527 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10528 HOST_WIDE_INT wback_offset)
10533 if (!multiple_operation_profitable_p (false, count, 0))
10539 for (i = 0; i < count; i++)
10540 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10542 if (wback_offset != 0)
10543 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10545 seq = get_insns ();
10551 result = gen_rtx_PARALLEL (VOIDmode,
10552 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10553 if (wback_offset != 0)
10555 XVECEXP (result, 0, 0)
10556 = gen_rtx_SET (VOIDmode, basereg,
10557 plus_constant (basereg, wback_offset));
10562 for (j = 0; i < count; i++, j++)
10563 XVECEXP (result, 0, i)
10564 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10569 /* Generate a store-multiple instruction. COUNT is the number of stores in
10570 the instruction; REGS and MEMS are arrays containing the operands.
10571 BASEREG is the base register to be used in addressing the memory operands.
10572 WBACK_OFFSET is nonzero if the instruction should update the base
10576 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10577 HOST_WIDE_INT wback_offset)
10582 if (GET_CODE (basereg) == PLUS)
10583 basereg = XEXP (basereg, 0);
10585 if (!multiple_operation_profitable_p (false, count, 0))
10591 for (i = 0; i < count; i++)
10592 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10594 if (wback_offset != 0)
10595 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10597 seq = get_insns ();
10603 result = gen_rtx_PARALLEL (VOIDmode,
10604 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10605 if (wback_offset != 0)
10607 XVECEXP (result, 0, 0)
10608 = gen_rtx_SET (VOIDmode, basereg,
10609 plus_constant (basereg, wback_offset));
10614 for (j = 0; i < count; i++, j++)
10615 XVECEXP (result, 0, i)
10616 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10621 /* Generate either a load-multiple or a store-multiple instruction. This
10622 function can be used in situations where we can start with a single MEM
10623 rtx and adjust its address upwards.
10624 COUNT is the number of operations in the instruction, not counting a
10625 possible update of the base register. REGS is an array containing the
10627 BASEREG is the base register to be used in addressing the memory operands,
10628 which are constructed from BASEMEM.
10629 WRITE_BACK specifies whether the generated instruction should include an
10630 update of the base register.
10631 OFFSETP is used to pass an offset to and from this function; this offset
10632 is not used when constructing the address (instead BASEMEM should have an
10633 appropriate offset in its address), it is used only for setting
10634 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10637 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10638 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10640 rtx mems[MAX_LDM_STM_OPS];
10641 HOST_WIDE_INT offset = *offsetp;
10644 gcc_assert (count <= MAX_LDM_STM_OPS);
10646 if (GET_CODE (basereg) == PLUS)
10647 basereg = XEXP (basereg, 0);
10649 for (i = 0; i < count; i++)
10651 rtx addr = plus_constant (basereg, i * 4);
10652 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10660 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10661 write_back ? 4 * count : 0);
10663 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10664 write_back ? 4 * count : 0);
10668 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10669 rtx basemem, HOST_WIDE_INT *offsetp)
10671 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10676 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10677 rtx basemem, HOST_WIDE_INT *offsetp)
10679 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10683 /* Called from a peephole2 expander to turn a sequence of loads into an
10684 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10685 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10686 is true if we can reorder the registers because they are used commutatively
10688 Returns true iff we could generate a new instruction. */
10691 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10693 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10694 rtx mems[MAX_LDM_STM_OPS];
10695 int i, j, base_reg;
10697 HOST_WIDE_INT offset;
10698 int write_back = FALSE;
10702 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10703 &base_reg, &offset, !sort_regs);
10709 for (i = 0; i < nops - 1; i++)
10710 for (j = i + 1; j < nops; j++)
10711 if (regs[i] > regs[j])
10717 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10721 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10722 gcc_assert (ldm_case == 1 || ldm_case == 5);
10728 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10729 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10731 if (!TARGET_THUMB1)
10733 base_reg = regs[0];
10734 base_reg_rtx = newbase;
10738 for (i = 0; i < nops; i++)
10740 addr = plus_constant (base_reg_rtx, offset + i * 4);
10741 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10744 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10745 write_back ? offset + i * 4 : 0));
10749 /* Called from a peephole2 expander to turn a sequence of stores into an
10750 STM instruction. OPERANDS are the operands found by the peephole matcher;
10751 NOPS indicates how many separate stores we are trying to combine.
10752 Returns true iff we could generate a new instruction. */
10755 gen_stm_seq (rtx *operands, int nops)
10758 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10759 rtx mems[MAX_LDM_STM_OPS];
10762 HOST_WIDE_INT offset;
10763 int write_back = FALSE;
10766 bool base_reg_dies;
10768 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10769 mem_order, &base_reg, &offset, true);
10774 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10776 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10779 gcc_assert (base_reg_dies);
10785 gcc_assert (base_reg_dies);
10786 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10790 addr = plus_constant (base_reg_rtx, offset);
10792 for (i = 0; i < nops; i++)
10794 addr = plus_constant (base_reg_rtx, offset + i * 4);
10795 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10798 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10799 write_back ? offset + i * 4 : 0));
10803 /* Called from a peephole2 expander to turn a sequence of stores that are
10804 preceded by constant loads into an STM instruction. OPERANDS are the
10805 operands found by the peephole matcher; NOPS indicates how many
10806 separate stores we are trying to combine; there are 2 * NOPS
10807 instructions in the peephole.
10808 Returns true iff we could generate a new instruction. */
10811 gen_const_stm_seq (rtx *operands, int nops)
10813 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10814 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10815 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10816 rtx mems[MAX_LDM_STM_OPS];
10819 HOST_WIDE_INT offset;
10820 int write_back = FALSE;
10823 bool base_reg_dies;
10825 HARD_REG_SET allocated;
10827 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10828 mem_order, &base_reg, &offset, false);
10833 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10835 /* If the same register is used more than once, try to find a free
10837 CLEAR_HARD_REG_SET (allocated);
10838 for (i = 0; i < nops; i++)
10840 for (j = i + 1; j < nops; j++)
10841 if (regs[i] == regs[j])
10843 rtx t = peep2_find_free_register (0, nops * 2,
10844 TARGET_THUMB1 ? "l" : "r",
10845 SImode, &allocated);
10849 regs[i] = REGNO (t);
10853 /* Compute an ordering that maps the register numbers to an ascending
10856 for (i = 0; i < nops; i++)
10857 if (regs[i] < regs[reg_order[0]])
10860 for (i = 1; i < nops; i++)
10862 int this_order = reg_order[i - 1];
10863 for (j = 0; j < nops; j++)
10864 if (regs[j] > regs[reg_order[i - 1]]
10865 && (this_order == reg_order[i - 1]
10866 || regs[j] < regs[this_order]))
10868 reg_order[i] = this_order;
10871 /* Ensure that registers that must be live after the instruction end
10872 up with the correct value. */
10873 for (i = 0; i < nops; i++)
10875 int this_order = reg_order[i];
10876 if ((this_order != mem_order[i]
10877 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10878 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10882 /* Load the constants. */
10883 for (i = 0; i < nops; i++)
10885 rtx op = operands[2 * nops + mem_order[i]];
10886 sorted_regs[i] = regs[reg_order[i]];
10887 emit_move_insn (reg_rtxs[reg_order[i]], op);
10890 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10892 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10895 gcc_assert (base_reg_dies);
10901 gcc_assert (base_reg_dies);
10902 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10906 addr = plus_constant (base_reg_rtx, offset);
10908 for (i = 0; i < nops; i++)
10910 addr = plus_constant (base_reg_rtx, offset + i * 4);
10911 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10914 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10915 write_back ? offset + i * 4 : 0));
10919 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10920 unaligned copies on processors which support unaligned semantics for those
10921 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10922 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10923 An interleave factor of 1 (the minimum) will perform no interleaving.
10924 Load/store multiple are used for aligned addresses where possible. */
10927 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10928 HOST_WIDE_INT length,
10929 unsigned int interleave_factor)
10931 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10932 int *regnos = XALLOCAVEC (int, interleave_factor);
10933 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10934 HOST_WIDE_INT i, j;
10935 HOST_WIDE_INT remaining = length, words;
10936 rtx halfword_tmp = NULL, byte_tmp = NULL;
10938 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10939 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10940 HOST_WIDE_INT srcoffset, dstoffset;
10941 HOST_WIDE_INT src_autoinc, dst_autoinc;
10944 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10946 /* Use hard registers if we have aligned source or destination so we can use
10947 load/store multiple with contiguous registers. */
10948 if (dst_aligned || src_aligned)
10949 for (i = 0; i < interleave_factor; i++)
10950 regs[i] = gen_rtx_REG (SImode, i);
10952 for (i = 0; i < interleave_factor; i++)
10953 regs[i] = gen_reg_rtx (SImode);
10955 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10956 src = copy_addr_to_reg (XEXP (srcbase, 0));
10958 srcoffset = dstoffset = 0;
10960 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10961 For copying the last bytes we want to subtract this offset again. */
10962 src_autoinc = dst_autoinc = 0;
10964 for (i = 0; i < interleave_factor; i++)
10967 /* Copy BLOCK_SIZE_BYTES chunks. */
10969 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10972 if (src_aligned && interleave_factor > 1)
10974 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10975 TRUE, srcbase, &srcoffset));
10976 src_autoinc += UNITS_PER_WORD * interleave_factor;
10980 for (j = 0; j < interleave_factor; j++)
10982 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10984 mem = adjust_automodify_address (srcbase, SImode, addr,
10985 srcoffset + j * UNITS_PER_WORD);
10986 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10988 srcoffset += block_size_bytes;
10992 if (dst_aligned && interleave_factor > 1)
10994 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10995 TRUE, dstbase, &dstoffset));
10996 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11000 for (j = 0; j < interleave_factor; j++)
11002 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
11004 mem = adjust_automodify_address (dstbase, SImode, addr,
11005 dstoffset + j * UNITS_PER_WORD);
11006 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11008 dstoffset += block_size_bytes;
11011 remaining -= block_size_bytes;
11014 /* Copy any whole words left (note these aren't interleaved with any
11015 subsequent halfword/byte load/stores in the interests of simplicity). */
11017 words = remaining / UNITS_PER_WORD;
11019 gcc_assert (words < interleave_factor);
11021 if (src_aligned && words > 1)
11023 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11025 src_autoinc += UNITS_PER_WORD * words;
11029 for (j = 0; j < words; j++)
11031 addr = plus_constant (src,
11032 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11033 mem = adjust_automodify_address (srcbase, SImode, addr,
11034 srcoffset + j * UNITS_PER_WORD);
11035 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11037 srcoffset += words * UNITS_PER_WORD;
11040 if (dst_aligned && words > 1)
11042 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11044 dst_autoinc += words * UNITS_PER_WORD;
11048 for (j = 0; j < words; j++)
11050 addr = plus_constant (dst,
11051 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11052 mem = adjust_automodify_address (dstbase, SImode, addr,
11053 dstoffset + j * UNITS_PER_WORD);
11054 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11056 dstoffset += words * UNITS_PER_WORD;
11059 remaining -= words * UNITS_PER_WORD;
11061 gcc_assert (remaining < 4);
11063 /* Copy a halfword if necessary. */
11065 if (remaining >= 2)
11067 halfword_tmp = gen_reg_rtx (SImode);
11069 addr = plus_constant (src, srcoffset - src_autoinc);
11070 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11071 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11073 /* Either write out immediately, or delay until we've loaded the last
11074 byte, depending on interleave factor. */
11075 if (interleave_factor == 1)
11077 addr = plus_constant (dst, dstoffset - dst_autoinc);
11078 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11079 emit_insn (gen_unaligned_storehi (mem,
11080 gen_lowpart (HImode, halfword_tmp)));
11081 halfword_tmp = NULL;
11089 gcc_assert (remaining < 2);
11091 /* Copy last byte. */
11093 if ((remaining & 1) != 0)
11095 byte_tmp = gen_reg_rtx (SImode);
11097 addr = plus_constant (src, srcoffset - src_autoinc);
11098 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11099 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11101 if (interleave_factor == 1)
11103 addr = plus_constant (dst, dstoffset - dst_autoinc);
11104 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11105 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11114 /* Store last halfword if we haven't done so already. */
11118 addr = plus_constant (dst, dstoffset - dst_autoinc);
11119 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11120 emit_insn (gen_unaligned_storehi (mem,
11121 gen_lowpart (HImode, halfword_tmp)));
11125 /* Likewise for last byte. */
11129 addr = plus_constant (dst, dstoffset - dst_autoinc);
11130 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11131 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11135 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11138 /* From mips_adjust_block_mem:
11140 Helper function for doing a loop-based block operation on memory
11141 reference MEM. Each iteration of the loop will operate on LENGTH
11144 Create a new base register for use within the loop and point it to
11145 the start of MEM. Create a new memory reference that uses this
11146 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11149 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11152 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11154 /* Although the new mem does not refer to a known location,
11155 it does keep up to LENGTH bytes of alignment. */
11156 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11157 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11160 /* From mips_block_move_loop:
11162 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11163 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11164 the memory regions do not overlap. */
11167 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11168 unsigned int interleave_factor,
11169 HOST_WIDE_INT bytes_per_iter)
11171 rtx label, src_reg, dest_reg, final_src, test;
11172 HOST_WIDE_INT leftover;
11174 leftover = length % bytes_per_iter;
11175 length -= leftover;
11177 /* Create registers and memory references for use within the loop. */
11178 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11179 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11181 /* Calculate the value that SRC_REG should have after the last iteration of
11183 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11184 0, 0, OPTAB_WIDEN);
11186 /* Emit the start of the loop. */
11187 label = gen_label_rtx ();
11188 emit_label (label);
11190 /* Emit the loop body. */
11191 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11192 interleave_factor);
11194 /* Move on to the next block. */
11195 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11196 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11198 /* Emit the loop condition. */
11199 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11200 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11202 /* Mop up any left-over bytes. */
11204 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11207 /* Emit a block move when either the source or destination is unaligned (not
11208 aligned to a four-byte boundary). This may need further tuning depending on
11209 core type, optimize_size setting, etc. */
11212 arm_movmemqi_unaligned (rtx *operands)
11214 HOST_WIDE_INT length = INTVAL (operands[2]);
11218 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11219 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11220 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11221 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11222 or dst_aligned though: allow more interleaving in those cases since the
11223 resulting code can be smaller. */
11224 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11225 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11228 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11229 interleave_factor, bytes_per_iter);
11231 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11232 interleave_factor);
11236 /* Note that the loop created by arm_block_move_unaligned_loop may be
11237 subject to loop unrolling, which makes tuning this condition a little
11240 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11242 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11249 arm_gen_movmemqi (rtx *operands)
11251 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11252 HOST_WIDE_INT srcoffset, dstoffset;
11254 rtx src, dst, srcbase, dstbase;
11255 rtx part_bytes_reg = NULL;
11258 if (GET_CODE (operands[2]) != CONST_INT
11259 || GET_CODE (operands[3]) != CONST_INT
11260 || INTVAL (operands[2]) > 64)
11263 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11264 return arm_movmemqi_unaligned (operands);
11266 if (INTVAL (operands[3]) & 3)
11269 dstbase = operands[0];
11270 srcbase = operands[1];
11272 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11273 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11275 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11276 out_words_to_go = INTVAL (operands[2]) / 4;
11277 last_bytes = INTVAL (operands[2]) & 3;
11278 dstoffset = srcoffset = 0;
11280 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11281 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11283 for (i = 0; in_words_to_go >= 2; i+=4)
11285 if (in_words_to_go > 4)
11286 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11287 TRUE, srcbase, &srcoffset));
11289 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11290 src, FALSE, srcbase,
11293 if (out_words_to_go)
11295 if (out_words_to_go > 4)
11296 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11297 TRUE, dstbase, &dstoffset));
11298 else if (out_words_to_go != 1)
11299 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11300 out_words_to_go, dst,
11303 dstbase, &dstoffset));
11306 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11307 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11308 if (last_bytes != 0)
11310 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11316 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11317 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11320 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11321 if (out_words_to_go)
11325 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11326 sreg = copy_to_reg (mem);
11328 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11329 emit_move_insn (mem, sreg);
11332 gcc_assert (!in_words_to_go); /* Sanity check */
11335 if (in_words_to_go)
11337 gcc_assert (in_words_to_go > 0);
11339 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11340 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11343 gcc_assert (!last_bytes || part_bytes_reg);
11345 if (BYTES_BIG_ENDIAN && last_bytes)
11347 rtx tmp = gen_reg_rtx (SImode);
11349 /* The bytes we want are in the top end of the word. */
11350 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11351 GEN_INT (8 * (4 - last_bytes))));
11352 part_bytes_reg = tmp;
11356 mem = adjust_automodify_address (dstbase, QImode,
11357 plus_constant (dst, last_bytes - 1),
11358 dstoffset + last_bytes - 1);
11359 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11363 tmp = gen_reg_rtx (SImode);
11364 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11365 part_bytes_reg = tmp;
11372 if (last_bytes > 1)
11374 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11375 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11379 rtx tmp = gen_reg_rtx (SImode);
11380 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11381 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11382 part_bytes_reg = tmp;
11389 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11390 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11397 /* Select a dominance comparison mode if possible for a test of the general
11398 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11399 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11400 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11401 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11402 In all cases OP will be either EQ or NE, but we don't need to know which
11403 here. If we are unable to support a dominance comparison we return
11404 CC mode. This will then fail to match for the RTL expressions that
11405 generate this call. */
11407 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11409 enum rtx_code cond1, cond2;
11412 /* Currently we will probably get the wrong result if the individual
11413 comparisons are not simple. This also ensures that it is safe to
11414 reverse a comparison if necessary. */
11415 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11417 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11421 /* The if_then_else variant of this tests the second condition if the
11422 first passes, but is true if the first fails. Reverse the first
11423 condition to get a true "inclusive-or" expression. */
11424 if (cond_or == DOM_CC_NX_OR_Y)
11425 cond1 = reverse_condition (cond1);
11427 /* If the comparisons are not equal, and one doesn't dominate the other,
11428 then we can't do this. */
11430 && !comparison_dominates_p (cond1, cond2)
11431 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11436 enum rtx_code temp = cond1;
11444 if (cond_or == DOM_CC_X_AND_Y)
11449 case EQ: return CC_DEQmode;
11450 case LE: return CC_DLEmode;
11451 case LEU: return CC_DLEUmode;
11452 case GE: return CC_DGEmode;
11453 case GEU: return CC_DGEUmode;
11454 default: gcc_unreachable ();
11458 if (cond_or == DOM_CC_X_AND_Y)
11470 gcc_unreachable ();
11474 if (cond_or == DOM_CC_X_AND_Y)
11486 gcc_unreachable ();
11490 if (cond_or == DOM_CC_X_AND_Y)
11491 return CC_DLTUmode;
11496 return CC_DLTUmode;
11498 return CC_DLEUmode;
11502 gcc_unreachable ();
11506 if (cond_or == DOM_CC_X_AND_Y)
11507 return CC_DGTUmode;
11512 return CC_DGTUmode;
11514 return CC_DGEUmode;
11518 gcc_unreachable ();
11521 /* The remaining cases only occur when both comparisons are the
11524 gcc_assert (cond1 == cond2);
11528 gcc_assert (cond1 == cond2);
11532 gcc_assert (cond1 == cond2);
11536 gcc_assert (cond1 == cond2);
11537 return CC_DLEUmode;
11540 gcc_assert (cond1 == cond2);
11541 return CC_DGEUmode;
11544 gcc_unreachable ();
11549 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11551 /* All floating point compares return CCFP if it is an equality
11552 comparison, and CCFPE otherwise. */
11553 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11573 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11578 gcc_unreachable ();
11582 /* A compare with a shifted operand. Because of canonicalization, the
11583 comparison will have to be swapped when we emit the assembler. */
11584 if (GET_MODE (y) == SImode
11585 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11586 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11587 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11588 || GET_CODE (x) == ROTATERT))
11591 /* This operation is performed swapped, but since we only rely on the Z
11592 flag we don't need an additional mode. */
11593 if (GET_MODE (y) == SImode
11594 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11595 && GET_CODE (x) == NEG
11596 && (op == EQ || op == NE))
11599 /* This is a special case that is used by combine to allow a
11600 comparison of a shifted byte load to be split into a zero-extend
11601 followed by a comparison of the shifted integer (only valid for
11602 equalities and unsigned inequalities). */
11603 if (GET_MODE (x) == SImode
11604 && GET_CODE (x) == ASHIFT
11605 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11606 && GET_CODE (XEXP (x, 0)) == SUBREG
11607 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11608 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11609 && (op == EQ || op == NE
11610 || op == GEU || op == GTU || op == LTU || op == LEU)
11611 && GET_CODE (y) == CONST_INT)
11614 /* A construct for a conditional compare, if the false arm contains
11615 0, then both conditions must be true, otherwise either condition
11616 must be true. Not all conditions are possible, so CCmode is
11617 returned if it can't be done. */
11618 if (GET_CODE (x) == IF_THEN_ELSE
11619 && (XEXP (x, 2) == const0_rtx
11620 || XEXP (x, 2) == const1_rtx)
11621 && COMPARISON_P (XEXP (x, 0))
11622 && COMPARISON_P (XEXP (x, 1)))
11623 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11624 INTVAL (XEXP (x, 2)));
11626 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11627 if (GET_CODE (x) == AND
11628 && (op == EQ || op == NE)
11629 && COMPARISON_P (XEXP (x, 0))
11630 && COMPARISON_P (XEXP (x, 1)))
11631 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11634 if (GET_CODE (x) == IOR
11635 && (op == EQ || op == NE)
11636 && COMPARISON_P (XEXP (x, 0))
11637 && COMPARISON_P (XEXP (x, 1)))
11638 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11641 /* An operation (on Thumb) where we want to test for a single bit.
11642 This is done by shifting that bit up into the top bit of a
11643 scratch register; we can then branch on the sign bit. */
11645 && GET_MODE (x) == SImode
11646 && (op == EQ || op == NE)
11647 && GET_CODE (x) == ZERO_EXTRACT
11648 && XEXP (x, 1) == const1_rtx)
11651 /* An operation that sets the condition codes as a side-effect, the
11652 V flag is not set correctly, so we can only use comparisons where
11653 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11655 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11656 if (GET_MODE (x) == SImode
11658 && (op == EQ || op == NE || op == LT || op == GE)
11659 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11660 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11661 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11662 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11663 || GET_CODE (x) == LSHIFTRT
11664 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11665 || GET_CODE (x) == ROTATERT
11666 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11667 return CC_NOOVmode;
11669 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11672 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11673 && GET_CODE (x) == PLUS
11674 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11677 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11679 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11681 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11688 /* A DImode comparison against zero can be implemented by
11689 or'ing the two halves together. */
11690 if (y == const0_rtx)
11693 /* We can do an equality test in three Thumb instructions. */
11703 /* DImode unsigned comparisons can be implemented by cmp +
11704 cmpeq without a scratch register. Not worth doing in
11715 /* DImode signed and unsigned comparisons can be implemented
11716 by cmp + sbcs with a scratch register, but that does not
11717 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11718 gcc_assert (op != EQ && op != NE);
11722 gcc_unreachable ();
11729 /* X and Y are two things to compare using CODE. Emit the compare insn and
11730 return the rtx for register 0 in the proper mode. FP means this is a
11731 floating point compare: I don't think that it is needed on the arm. */
11733 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11735 enum machine_mode mode;
11737 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11739 /* We might have X as a constant, Y as a register because of the predicates
11740 used for cmpdi. If so, force X to a register here. */
11741 if (dimode_comparison && !REG_P (x))
11742 x = force_reg (DImode, x);
11744 mode = SELECT_CC_MODE (code, x, y);
11745 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11747 if (dimode_comparison
11748 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11749 && mode != CC_CZmode)
11753 /* To compare two non-zero values for equality, XOR them and
11754 then compare against zero. Not used for ARM mode; there
11755 CC_CZmode is cheaper. */
11756 if (mode == CC_Zmode && y != const0_rtx)
11758 gcc_assert (!reload_completed);
11759 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11763 /* A scratch register is required. */
11764 if (reload_completed)
11765 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11767 scratch = gen_rtx_SCRATCH (SImode);
11769 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11770 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11771 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11774 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11779 /* Generate a sequence of insns that will generate the correct return
11780 address mask depending on the physical architecture that the program
11783 arm_gen_return_addr_mask (void)
11785 rtx reg = gen_reg_rtx (Pmode);
11787 emit_insn (gen_return_addr_mask (reg));
11792 arm_reload_in_hi (rtx *operands)
11794 rtx ref = operands[1];
11796 HOST_WIDE_INT offset = 0;
11798 if (GET_CODE (ref) == SUBREG)
11800 offset = SUBREG_BYTE (ref);
11801 ref = SUBREG_REG (ref);
11804 if (GET_CODE (ref) == REG)
11806 /* We have a pseudo which has been spilt onto the stack; there
11807 are two cases here: the first where there is a simple
11808 stack-slot replacement and a second where the stack-slot is
11809 out of range, or is used as a subreg. */
11810 if (reg_equiv_mem (REGNO (ref)))
11812 ref = reg_equiv_mem (REGNO (ref));
11813 base = find_replacement (&XEXP (ref, 0));
11816 /* The slot is out of range, or was dressed up in a SUBREG. */
11817 base = reg_equiv_address (REGNO (ref));
11820 base = find_replacement (&XEXP (ref, 0));
11822 /* Handle the case where the address is too complex to be offset by 1. */
11823 if (GET_CODE (base) == MINUS
11824 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11826 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11828 emit_set_insn (base_plus, base);
11831 else if (GET_CODE (base) == PLUS)
11833 /* The addend must be CONST_INT, or we would have dealt with it above. */
11834 HOST_WIDE_INT hi, lo;
11836 offset += INTVAL (XEXP (base, 1));
11837 base = XEXP (base, 0);
11839 /* Rework the address into a legal sequence of insns. */
11840 /* Valid range for lo is -4095 -> 4095 */
11843 : -((-offset) & 0xfff));
11845 /* Corner case, if lo is the max offset then we would be out of range
11846 once we have added the additional 1 below, so bump the msb into the
11847 pre-loading insn(s). */
11851 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11852 ^ (HOST_WIDE_INT) 0x80000000)
11853 - (HOST_WIDE_INT) 0x80000000);
11855 gcc_assert (hi + lo == offset);
11859 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11861 /* Get the base address; addsi3 knows how to handle constants
11862 that require more than one insn. */
11863 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11869 /* Operands[2] may overlap operands[0] (though it won't overlap
11870 operands[1]), that's why we asked for a DImode reg -- so we can
11871 use the bit that does not overlap. */
11872 if (REGNO (operands[2]) == REGNO (operands[0]))
11873 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11875 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11877 emit_insn (gen_zero_extendqisi2 (scratch,
11878 gen_rtx_MEM (QImode,
11879 plus_constant (base,
11881 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11882 gen_rtx_MEM (QImode,
11883 plus_constant (base,
11885 if (!BYTES_BIG_ENDIAN)
11886 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11887 gen_rtx_IOR (SImode,
11890 gen_rtx_SUBREG (SImode, operands[0], 0),
11894 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11895 gen_rtx_IOR (SImode,
11896 gen_rtx_ASHIFT (SImode, scratch,
11898 gen_rtx_SUBREG (SImode, operands[0], 0)));
11901 /* Handle storing a half-word to memory during reload by synthesizing as two
11902 byte stores. Take care not to clobber the input values until after we
11903 have moved them somewhere safe. This code assumes that if the DImode
11904 scratch in operands[2] overlaps either the input value or output address
11905 in some way, then that value must die in this insn (we absolutely need
11906 two scratch registers for some corner cases). */
11908 arm_reload_out_hi (rtx *operands)
11910 rtx ref = operands[0];
11911 rtx outval = operands[1];
11913 HOST_WIDE_INT offset = 0;
11915 if (GET_CODE (ref) == SUBREG)
11917 offset = SUBREG_BYTE (ref);
11918 ref = SUBREG_REG (ref);
11921 if (GET_CODE (ref) == REG)
11923 /* We have a pseudo which has been spilt onto the stack; there
11924 are two cases here: the first where there is a simple
11925 stack-slot replacement and a second where the stack-slot is
11926 out of range, or is used as a subreg. */
11927 if (reg_equiv_mem (REGNO (ref)))
11929 ref = reg_equiv_mem (REGNO (ref));
11930 base = find_replacement (&XEXP (ref, 0));
11933 /* The slot is out of range, or was dressed up in a SUBREG. */
11934 base = reg_equiv_address (REGNO (ref));
11937 base = find_replacement (&XEXP (ref, 0));
11939 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11941 /* Handle the case where the address is too complex to be offset by 1. */
11942 if (GET_CODE (base) == MINUS
11943 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11945 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11947 /* Be careful not to destroy OUTVAL. */
11948 if (reg_overlap_mentioned_p (base_plus, outval))
11950 /* Updating base_plus might destroy outval, see if we can
11951 swap the scratch and base_plus. */
11952 if (!reg_overlap_mentioned_p (scratch, outval))
11955 scratch = base_plus;
11960 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11962 /* Be conservative and copy OUTVAL into the scratch now,
11963 this should only be necessary if outval is a subreg
11964 of something larger than a word. */
11965 /* XXX Might this clobber base? I can't see how it can,
11966 since scratch is known to overlap with OUTVAL, and
11967 must be wider than a word. */
11968 emit_insn (gen_movhi (scratch_hi, outval));
11969 outval = scratch_hi;
11973 emit_set_insn (base_plus, base);
11976 else if (GET_CODE (base) == PLUS)
11978 /* The addend must be CONST_INT, or we would have dealt with it above. */
11979 HOST_WIDE_INT hi, lo;
11981 offset += INTVAL (XEXP (base, 1));
11982 base = XEXP (base, 0);
11984 /* Rework the address into a legal sequence of insns. */
11985 /* Valid range for lo is -4095 -> 4095 */
11988 : -((-offset) & 0xfff));
11990 /* Corner case, if lo is the max offset then we would be out of range
11991 once we have added the additional 1 below, so bump the msb into the
11992 pre-loading insn(s). */
11996 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11997 ^ (HOST_WIDE_INT) 0x80000000)
11998 - (HOST_WIDE_INT) 0x80000000);
12000 gcc_assert (hi + lo == offset);
12004 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12006 /* Be careful not to destroy OUTVAL. */
12007 if (reg_overlap_mentioned_p (base_plus, outval))
12009 /* Updating base_plus might destroy outval, see if we
12010 can swap the scratch and base_plus. */
12011 if (!reg_overlap_mentioned_p (scratch, outval))
12014 scratch = base_plus;
12019 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12021 /* Be conservative and copy outval into scratch now,
12022 this should only be necessary if outval is a
12023 subreg of something larger than a word. */
12024 /* XXX Might this clobber base? I can't see how it
12025 can, since scratch is known to overlap with
12027 emit_insn (gen_movhi (scratch_hi, outval));
12028 outval = scratch_hi;
12032 /* Get the base address; addsi3 knows how to handle constants
12033 that require more than one insn. */
12034 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12040 if (BYTES_BIG_ENDIAN)
12042 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12043 plus_constant (base, offset + 1)),
12044 gen_lowpart (QImode, outval)));
12045 emit_insn (gen_lshrsi3 (scratch,
12046 gen_rtx_SUBREG (SImode, outval, 0),
12048 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12049 gen_lowpart (QImode, scratch)));
12053 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12054 gen_lowpart (QImode, outval)));
12055 emit_insn (gen_lshrsi3 (scratch,
12056 gen_rtx_SUBREG (SImode, outval, 0),
12058 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12059 plus_constant (base, offset + 1)),
12060 gen_lowpart (QImode, scratch)));
12064 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12065 (padded to the size of a word) should be passed in a register. */
12068 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12070 if (TARGET_AAPCS_BASED)
12071 return must_pass_in_stack_var_size (mode, type);
12073 return must_pass_in_stack_var_size_or_pad (mode, type);
12077 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12078 Return true if an argument passed on the stack should be padded upwards,
12079 i.e. if the least-significant byte has useful data.
12080 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12081 aggregate types are placed in the lowest memory address. */
12084 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12086 if (!TARGET_AAPCS_BASED)
12087 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12089 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12096 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12097 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12098 register has useful data, and return the opposite if the most
12099 significant byte does. */
12102 arm_pad_reg_upward (enum machine_mode mode,
12103 tree type, int first ATTRIBUTE_UNUSED)
12105 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12107 /* For AAPCS, small aggregates, small fixed-point types,
12108 and small complex types are always padded upwards. */
12111 if ((AGGREGATE_TYPE_P (type)
12112 || TREE_CODE (type) == COMPLEX_TYPE
12113 || FIXED_POINT_TYPE_P (type))
12114 && int_size_in_bytes (type) <= 4)
12119 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12120 && GET_MODE_SIZE (mode) <= 4)
12125 /* Otherwise, use default padding. */
12126 return !BYTES_BIG_ENDIAN;
12130 /* Print a symbolic form of X to the debug file, F. */
12132 arm_print_value (FILE *f, rtx x)
12134 switch (GET_CODE (x))
12137 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12141 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12149 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12151 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12152 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12160 fprintf (f, "\"%s\"", XSTR (x, 0));
12164 fprintf (f, "`%s'", XSTR (x, 0));
12168 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12172 arm_print_value (f, XEXP (x, 0));
12176 arm_print_value (f, XEXP (x, 0));
12178 arm_print_value (f, XEXP (x, 1));
12186 fprintf (f, "????");
12191 /* Routines for manipulation of the constant pool. */
12193 /* Arm instructions cannot load a large constant directly into a
12194 register; they have to come from a pc relative load. The constant
12195 must therefore be placed in the addressable range of the pc
12196 relative load. Depending on the precise pc relative load
12197 instruction the range is somewhere between 256 bytes and 4k. This
12198 means that we often have to dump a constant inside a function, and
12199 generate code to branch around it.
12201 It is important to minimize this, since the branches will slow
12202 things down and make the code larger.
12204 Normally we can hide the table after an existing unconditional
12205 branch so that there is no interruption of the flow, but in the
12206 worst case the code looks like this:
12224 We fix this by performing a scan after scheduling, which notices
12225 which instructions need to have their operands fetched from the
12226 constant table and builds the table.
12228 The algorithm starts by building a table of all the constants that
12229 need fixing up and all the natural barriers in the function (places
12230 where a constant table can be dropped without breaking the flow).
12231 For each fixup we note how far the pc-relative replacement will be
12232 able to reach and the offset of the instruction into the function.
12234 Having built the table we then group the fixes together to form
12235 tables that are as large as possible (subject to addressing
12236 constraints) and emit each table of constants after the last
12237 barrier that is within range of all the instructions in the group.
12238 If a group does not contain a barrier, then we forcibly create one
12239 by inserting a jump instruction into the flow. Once the table has
12240 been inserted, the insns are then modified to reference the
12241 relevant entry in the pool.
12243 Possible enhancements to the algorithm (not implemented) are:
12245 1) For some processors and object formats, there may be benefit in
12246 aligning the pools to the start of cache lines; this alignment
12247 would need to be taken into account when calculating addressability
12250 /* These typedefs are located at the start of this file, so that
12251 they can be used in the prototypes there. This comment is to
12252 remind readers of that fact so that the following structures
12253 can be understood more easily.
12255 typedef struct minipool_node Mnode;
12256 typedef struct minipool_fixup Mfix; */
12258 struct minipool_node
12260 /* Doubly linked chain of entries. */
12263 /* The maximum offset into the code that this entry can be placed. While
12264 pushing fixes for forward references, all entries are sorted in order
12265 of increasing max_address. */
12266 HOST_WIDE_INT max_address;
12267 /* Similarly for an entry inserted for a backwards ref. */
12268 HOST_WIDE_INT min_address;
12269 /* The number of fixes referencing this entry. This can become zero
12270 if we "unpush" an entry. In this case we ignore the entry when we
12271 come to emit the code. */
12273 /* The offset from the start of the minipool. */
12274 HOST_WIDE_INT offset;
12275 /* The value in table. */
12277 /* The mode of value. */
12278 enum machine_mode mode;
12279 /* The size of the value. With iWMMXt enabled
12280 sizes > 4 also imply an alignment of 8-bytes. */
12284 struct minipool_fixup
12288 HOST_WIDE_INT address;
12290 enum machine_mode mode;
12294 HOST_WIDE_INT forwards;
12295 HOST_WIDE_INT backwards;
12298 /* Fixes less than a word need padding out to a word boundary. */
12299 #define MINIPOOL_FIX_SIZE(mode) \
12300 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12302 static Mnode * minipool_vector_head;
12303 static Mnode * minipool_vector_tail;
12304 static rtx minipool_vector_label;
12305 static int minipool_pad;
12307 /* The linked list of all minipool fixes required for this function. */
12308 Mfix * minipool_fix_head;
12309 Mfix * minipool_fix_tail;
12310 /* The fix entry for the current minipool, once it has been placed. */
12311 Mfix * minipool_barrier;
12313 /* Determines if INSN is the start of a jump table. Returns the end
12314 of the TABLE or NULL_RTX. */
12316 is_jump_table (rtx insn)
12320 if (jump_to_label_p (insn)
12321 && ((table = next_real_insn (JUMP_LABEL (insn)))
12322 == next_real_insn (insn))
12324 && GET_CODE (table) == JUMP_INSN
12325 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12326 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12332 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12333 #define JUMP_TABLES_IN_TEXT_SECTION 0
12336 static HOST_WIDE_INT
12337 get_jump_table_size (rtx insn)
12339 /* ADDR_VECs only take room if read-only data does into the text
12341 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12343 rtx body = PATTERN (insn);
12344 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12345 HOST_WIDE_INT size;
12346 HOST_WIDE_INT modesize;
12348 modesize = GET_MODE_SIZE (GET_MODE (body));
12349 size = modesize * XVECLEN (body, elt);
12353 /* Round up size of TBB table to a halfword boundary. */
12354 size = (size + 1) & ~(HOST_WIDE_INT)1;
12357 /* No padding necessary for TBH. */
12360 /* Add two bytes for alignment on Thumb. */
12365 gcc_unreachable ();
12373 /* Return the maximum amount of padding that will be inserted before
12376 static HOST_WIDE_INT
12377 get_label_padding (rtx label)
12379 HOST_WIDE_INT align, min_insn_size;
12381 align = 1 << label_to_alignment (label);
12382 min_insn_size = TARGET_THUMB ? 2 : 4;
12383 return align > min_insn_size ? align - min_insn_size : 0;
12386 /* Move a minipool fix MP from its current location to before MAX_MP.
12387 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12388 constraints may need updating. */
12390 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12391 HOST_WIDE_INT max_address)
12393 /* The code below assumes these are different. */
12394 gcc_assert (mp != max_mp);
12396 if (max_mp == NULL)
12398 if (max_address < mp->max_address)
12399 mp->max_address = max_address;
12403 if (max_address > max_mp->max_address - mp->fix_size)
12404 mp->max_address = max_mp->max_address - mp->fix_size;
12406 mp->max_address = max_address;
12408 /* Unlink MP from its current position. Since max_mp is non-null,
12409 mp->prev must be non-null. */
12410 mp->prev->next = mp->next;
12411 if (mp->next != NULL)
12412 mp->next->prev = mp->prev;
12414 minipool_vector_tail = mp->prev;
12416 /* Re-insert it before MAX_MP. */
12418 mp->prev = max_mp->prev;
12421 if (mp->prev != NULL)
12422 mp->prev->next = mp;
12424 minipool_vector_head = mp;
12427 /* Save the new entry. */
12430 /* Scan over the preceding entries and adjust their addresses as
12432 while (mp->prev != NULL
12433 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12435 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12442 /* Add a constant to the minipool for a forward reference. Returns the
12443 node added or NULL if the constant will not fit in this pool. */
12445 add_minipool_forward_ref (Mfix *fix)
12447 /* If set, max_mp is the first pool_entry that has a lower
12448 constraint than the one we are trying to add. */
12449 Mnode * max_mp = NULL;
12450 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12453 /* If the minipool starts before the end of FIX->INSN then this FIX
12454 can not be placed into the current pool. Furthermore, adding the
12455 new constant pool entry may cause the pool to start FIX_SIZE bytes
12457 if (minipool_vector_head &&
12458 (fix->address + get_attr_length (fix->insn)
12459 >= minipool_vector_head->max_address - fix->fix_size))
12462 /* Scan the pool to see if a constant with the same value has
12463 already been added. While we are doing this, also note the
12464 location where we must insert the constant if it doesn't already
12466 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12468 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12469 && fix->mode == mp->mode
12470 && (GET_CODE (fix->value) != CODE_LABEL
12471 || (CODE_LABEL_NUMBER (fix->value)
12472 == CODE_LABEL_NUMBER (mp->value)))
12473 && rtx_equal_p (fix->value, mp->value))
12475 /* More than one fix references this entry. */
12477 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12480 /* Note the insertion point if necessary. */
12482 && mp->max_address > max_address)
12485 /* If we are inserting an 8-bytes aligned quantity and
12486 we have not already found an insertion point, then
12487 make sure that all such 8-byte aligned quantities are
12488 placed at the start of the pool. */
12489 if (ARM_DOUBLEWORD_ALIGN
12491 && fix->fix_size >= 8
12492 && mp->fix_size < 8)
12495 max_address = mp->max_address;
12499 /* The value is not currently in the minipool, so we need to create
12500 a new entry for it. If MAX_MP is NULL, the entry will be put on
12501 the end of the list since the placement is less constrained than
12502 any existing entry. Otherwise, we insert the new fix before
12503 MAX_MP and, if necessary, adjust the constraints on the other
12506 mp->fix_size = fix->fix_size;
12507 mp->mode = fix->mode;
12508 mp->value = fix->value;
12510 /* Not yet required for a backwards ref. */
12511 mp->min_address = -65536;
12513 if (max_mp == NULL)
12515 mp->max_address = max_address;
12517 mp->prev = minipool_vector_tail;
12519 if (mp->prev == NULL)
12521 minipool_vector_head = mp;
12522 minipool_vector_label = gen_label_rtx ();
12525 mp->prev->next = mp;
12527 minipool_vector_tail = mp;
12531 if (max_address > max_mp->max_address - mp->fix_size)
12532 mp->max_address = max_mp->max_address - mp->fix_size;
12534 mp->max_address = max_address;
12537 mp->prev = max_mp->prev;
12539 if (mp->prev != NULL)
12540 mp->prev->next = mp;
12542 minipool_vector_head = mp;
12545 /* Save the new entry. */
12548 /* Scan over the preceding entries and adjust their addresses as
12550 while (mp->prev != NULL
12551 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12553 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12561 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12562 HOST_WIDE_INT min_address)
12564 HOST_WIDE_INT offset;
12566 /* The code below assumes these are different. */
12567 gcc_assert (mp != min_mp);
12569 if (min_mp == NULL)
12571 if (min_address > mp->min_address)
12572 mp->min_address = min_address;
12576 /* We will adjust this below if it is too loose. */
12577 mp->min_address = min_address;
12579 /* Unlink MP from its current position. Since min_mp is non-null,
12580 mp->next must be non-null. */
12581 mp->next->prev = mp->prev;
12582 if (mp->prev != NULL)
12583 mp->prev->next = mp->next;
12585 minipool_vector_head = mp->next;
12587 /* Reinsert it after MIN_MP. */
12589 mp->next = min_mp->next;
12591 if (mp->next != NULL)
12592 mp->next->prev = mp;
12594 minipool_vector_tail = mp;
12600 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12602 mp->offset = offset;
12603 if (mp->refcount > 0)
12604 offset += mp->fix_size;
12606 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12607 mp->next->min_address = mp->min_address + mp->fix_size;
12613 /* Add a constant to the minipool for a backward reference. Returns the
12614 node added or NULL if the constant will not fit in this pool.
12616 Note that the code for insertion for a backwards reference can be
12617 somewhat confusing because the calculated offsets for each fix do
12618 not take into account the size of the pool (which is still under
12621 add_minipool_backward_ref (Mfix *fix)
12623 /* If set, min_mp is the last pool_entry that has a lower constraint
12624 than the one we are trying to add. */
12625 Mnode *min_mp = NULL;
12626 /* This can be negative, since it is only a constraint. */
12627 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12630 /* If we can't reach the current pool from this insn, or if we can't
12631 insert this entry at the end of the pool without pushing other
12632 fixes out of range, then we don't try. This ensures that we
12633 can't fail later on. */
12634 if (min_address >= minipool_barrier->address
12635 || (minipool_vector_tail->min_address + fix->fix_size
12636 >= minipool_barrier->address))
12639 /* Scan the pool to see if a constant with the same value has
12640 already been added. While we are doing this, also note the
12641 location where we must insert the constant if it doesn't already
12643 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12645 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12646 && fix->mode == mp->mode
12647 && (GET_CODE (fix->value) != CODE_LABEL
12648 || (CODE_LABEL_NUMBER (fix->value)
12649 == CODE_LABEL_NUMBER (mp->value)))
12650 && rtx_equal_p (fix->value, mp->value)
12651 /* Check that there is enough slack to move this entry to the
12652 end of the table (this is conservative). */
12653 && (mp->max_address
12654 > (minipool_barrier->address
12655 + minipool_vector_tail->offset
12656 + minipool_vector_tail->fix_size)))
12659 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12662 if (min_mp != NULL)
12663 mp->min_address += fix->fix_size;
12666 /* Note the insertion point if necessary. */
12667 if (mp->min_address < min_address)
12669 /* For now, we do not allow the insertion of 8-byte alignment
12670 requiring nodes anywhere but at the start of the pool. */
12671 if (ARM_DOUBLEWORD_ALIGN
12672 && fix->fix_size >= 8 && mp->fix_size < 8)
12677 else if (mp->max_address
12678 < minipool_barrier->address + mp->offset + fix->fix_size)
12680 /* Inserting before this entry would push the fix beyond
12681 its maximum address (which can happen if we have
12682 re-located a forwards fix); force the new fix to come
12684 if (ARM_DOUBLEWORD_ALIGN
12685 && fix->fix_size >= 8 && mp->fix_size < 8)
12690 min_address = mp->min_address + fix->fix_size;
12693 /* Do not insert a non-8-byte aligned quantity before 8-byte
12694 aligned quantities. */
12695 else if (ARM_DOUBLEWORD_ALIGN
12696 && fix->fix_size < 8
12697 && mp->fix_size >= 8)
12700 min_address = mp->min_address + fix->fix_size;
12705 /* We need to create a new entry. */
12707 mp->fix_size = fix->fix_size;
12708 mp->mode = fix->mode;
12709 mp->value = fix->value;
12711 mp->max_address = minipool_barrier->address + 65536;
12713 mp->min_address = min_address;
12715 if (min_mp == NULL)
12718 mp->next = minipool_vector_head;
12720 if (mp->next == NULL)
12722 minipool_vector_tail = mp;
12723 minipool_vector_label = gen_label_rtx ();
12726 mp->next->prev = mp;
12728 minipool_vector_head = mp;
12732 mp->next = min_mp->next;
12736 if (mp->next != NULL)
12737 mp->next->prev = mp;
12739 minipool_vector_tail = mp;
12742 /* Save the new entry. */
12750 /* Scan over the following entries and adjust their offsets. */
12751 while (mp->next != NULL)
12753 if (mp->next->min_address < mp->min_address + mp->fix_size)
12754 mp->next->min_address = mp->min_address + mp->fix_size;
12757 mp->next->offset = mp->offset + mp->fix_size;
12759 mp->next->offset = mp->offset;
12768 assign_minipool_offsets (Mfix *barrier)
12770 HOST_WIDE_INT offset = 0;
12773 minipool_barrier = barrier;
12775 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12777 mp->offset = offset;
12779 if (mp->refcount > 0)
12780 offset += mp->fix_size;
12784 /* Output the literal table */
12786 dump_minipool (rtx scan)
12792 if (ARM_DOUBLEWORD_ALIGN)
12793 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12794 if (mp->refcount > 0 && mp->fix_size >= 8)
12801 fprintf (dump_file,
12802 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12803 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12805 scan = emit_label_after (gen_label_rtx (), scan);
12806 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12807 scan = emit_label_after (minipool_vector_label, scan);
12809 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12811 if (mp->refcount > 0)
12815 fprintf (dump_file,
12816 ";; Offset %u, min %ld, max %ld ",
12817 (unsigned) mp->offset, (unsigned long) mp->min_address,
12818 (unsigned long) mp->max_address);
12819 arm_print_value (dump_file, mp->value);
12820 fputc ('\n', dump_file);
12823 switch (mp->fix_size)
12825 #ifdef HAVE_consttable_1
12827 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12831 #ifdef HAVE_consttable_2
12833 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12837 #ifdef HAVE_consttable_4
12839 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12843 #ifdef HAVE_consttable_8
12845 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12849 #ifdef HAVE_consttable_16
12851 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12856 gcc_unreachable ();
12864 minipool_vector_head = minipool_vector_tail = NULL;
12865 scan = emit_insn_after (gen_consttable_end (), scan);
12866 scan = emit_barrier_after (scan);
12869 /* Return the cost of forcibly inserting a barrier after INSN. */
12871 arm_barrier_cost (rtx insn)
12873 /* Basing the location of the pool on the loop depth is preferable,
12874 but at the moment, the basic block information seems to be
12875 corrupt by this stage of the compilation. */
12876 int base_cost = 50;
12877 rtx next = next_nonnote_insn (insn);
12879 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12882 switch (GET_CODE (insn))
12885 /* It will always be better to place the table before the label, rather
12894 return base_cost - 10;
12897 return base_cost + 10;
12901 /* Find the best place in the insn stream in the range
12902 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12903 Create the barrier by inserting a jump and add a new fix entry for
12906 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12908 HOST_WIDE_INT count = 0;
12910 rtx from = fix->insn;
12911 /* The instruction after which we will insert the jump. */
12912 rtx selected = NULL;
12914 /* The address at which the jump instruction will be placed. */
12915 HOST_WIDE_INT selected_address;
12917 HOST_WIDE_INT max_count = max_address - fix->address;
12918 rtx label = gen_label_rtx ();
12920 selected_cost = arm_barrier_cost (from);
12921 selected_address = fix->address;
12923 while (from && count < max_count)
12928 /* This code shouldn't have been called if there was a natural barrier
12930 gcc_assert (GET_CODE (from) != BARRIER);
12932 /* Count the length of this insn. This must stay in sync with the
12933 code that pushes minipool fixes. */
12934 if (LABEL_P (from))
12935 count += get_label_padding (from);
12937 count += get_attr_length (from);
12939 /* If there is a jump table, add its length. */
12940 tmp = is_jump_table (from);
12943 count += get_jump_table_size (tmp);
12945 /* Jump tables aren't in a basic block, so base the cost on
12946 the dispatch insn. If we select this location, we will
12947 still put the pool after the table. */
12948 new_cost = arm_barrier_cost (from);
12950 if (count < max_count
12951 && (!selected || new_cost <= selected_cost))
12954 selected_cost = new_cost;
12955 selected_address = fix->address + count;
12958 /* Continue after the dispatch table. */
12959 from = NEXT_INSN (tmp);
12963 new_cost = arm_barrier_cost (from);
12965 if (count < max_count
12966 && (!selected || new_cost <= selected_cost))
12969 selected_cost = new_cost;
12970 selected_address = fix->address + count;
12973 from = NEXT_INSN (from);
12976 /* Make sure that we found a place to insert the jump. */
12977 gcc_assert (selected);
12979 /* Make sure we do not split a call and its corresponding
12980 CALL_ARG_LOCATION note. */
12981 if (CALL_P (selected))
12983 rtx next = NEXT_INSN (selected);
12984 if (next && NOTE_P (next)
12985 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12989 /* Create a new JUMP_INSN that branches around a barrier. */
12990 from = emit_jump_insn_after (gen_jump (label), selected);
12991 JUMP_LABEL (from) = label;
12992 barrier = emit_barrier_after (from);
12993 emit_label_after (label, barrier);
12995 /* Create a minipool barrier entry for the new barrier. */
12996 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12997 new_fix->insn = barrier;
12998 new_fix->address = selected_address;
12999 new_fix->next = fix->next;
13000 fix->next = new_fix;
13005 /* Record that there is a natural barrier in the insn stream at
13008 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13010 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13013 fix->address = address;
13016 if (minipool_fix_head != NULL)
13017 minipool_fix_tail->next = fix;
13019 minipool_fix_head = fix;
13021 minipool_fix_tail = fix;
13024 /* Record INSN, which will need fixing up to load a value from the
13025 minipool. ADDRESS is the offset of the insn since the start of the
13026 function; LOC is a pointer to the part of the insn which requires
13027 fixing; VALUE is the constant that must be loaded, which is of type
13030 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13031 enum machine_mode mode, rtx value)
13033 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13036 fix->address = address;
13039 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13040 fix->value = value;
13041 fix->forwards = get_attr_pool_range (insn);
13042 fix->backwards = get_attr_neg_pool_range (insn);
13043 fix->minipool = NULL;
13045 /* If an insn doesn't have a range defined for it, then it isn't
13046 expecting to be reworked by this code. Better to stop now than
13047 to generate duff assembly code. */
13048 gcc_assert (fix->forwards || fix->backwards);
13050 /* If an entry requires 8-byte alignment then assume all constant pools
13051 require 4 bytes of padding. Trying to do this later on a per-pool
13052 basis is awkward because existing pool entries have to be modified. */
13053 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13058 fprintf (dump_file,
13059 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13060 GET_MODE_NAME (mode),
13061 INSN_UID (insn), (unsigned long) address,
13062 -1 * (long)fix->backwards, (long)fix->forwards);
13063 arm_print_value (dump_file, fix->value);
13064 fprintf (dump_file, "\n");
13067 /* Add it to the chain of fixes. */
13070 if (minipool_fix_head != NULL)
13071 minipool_fix_tail->next = fix;
13073 minipool_fix_head = fix;
13075 minipool_fix_tail = fix;
13078 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13079 Returns the number of insns needed, or 99 if we don't know how to
13082 arm_const_double_inline_cost (rtx val)
13084 rtx lowpart, highpart;
13085 enum machine_mode mode;
13087 mode = GET_MODE (val);
13089 if (mode == VOIDmode)
13092 gcc_assert (GET_MODE_SIZE (mode) == 8);
13094 lowpart = gen_lowpart (SImode, val);
13095 highpart = gen_highpart_mode (SImode, mode, val);
13097 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13098 gcc_assert (GET_CODE (highpart) == CONST_INT);
13100 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13101 NULL_RTX, NULL_RTX, 0, 0)
13102 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13103 NULL_RTX, NULL_RTX, 0, 0));
13106 /* Return true if it is worthwhile to split a 64-bit constant into two
13107 32-bit operations. This is the case if optimizing for size, or
13108 if we have load delay slots, or if one 32-bit part can be done with
13109 a single data operation. */
13111 arm_const_double_by_parts (rtx val)
13113 enum machine_mode mode = GET_MODE (val);
13116 if (optimize_size || arm_ld_sched)
13119 if (mode == VOIDmode)
13122 part = gen_highpart_mode (SImode, mode, val);
13124 gcc_assert (GET_CODE (part) == CONST_INT);
13126 if (const_ok_for_arm (INTVAL (part))
13127 || const_ok_for_arm (~INTVAL (part)))
13130 part = gen_lowpart (SImode, val);
13132 gcc_assert (GET_CODE (part) == CONST_INT);
13134 if (const_ok_for_arm (INTVAL (part))
13135 || const_ok_for_arm (~INTVAL (part)))
13141 /* Return true if it is possible to inline both the high and low parts
13142 of a 64-bit constant into 32-bit data processing instructions. */
13144 arm_const_double_by_immediates (rtx val)
13146 enum machine_mode mode = GET_MODE (val);
13149 if (mode == VOIDmode)
13152 part = gen_highpart_mode (SImode, mode, val);
13154 gcc_assert (GET_CODE (part) == CONST_INT);
13156 if (!const_ok_for_arm (INTVAL (part)))
13159 part = gen_lowpart (SImode, val);
13161 gcc_assert (GET_CODE (part) == CONST_INT);
13163 if (!const_ok_for_arm (INTVAL (part)))
13169 /* Scan INSN and note any of its operands that need fixing.
13170 If DO_PUSHES is false we do not actually push any of the fixups
13171 needed. The function returns TRUE if any fixups were needed/pushed.
13172 This is used by arm_memory_load_p() which needs to know about loads
13173 of constants that will be converted into minipool loads. */
13175 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13177 bool result = false;
13180 extract_insn (insn);
13182 if (!constrain_operands (1))
13183 fatal_insn_not_found (insn);
13185 if (recog_data.n_alternatives == 0)
13188 /* Fill in recog_op_alt with information about the constraints of
13190 preprocess_constraints ();
13192 for (opno = 0; opno < recog_data.n_operands; opno++)
13194 /* Things we need to fix can only occur in inputs. */
13195 if (recog_data.operand_type[opno] != OP_IN)
13198 /* If this alternative is a memory reference, then any mention
13199 of constants in this alternative is really to fool reload
13200 into allowing us to accept one there. We need to fix them up
13201 now so that we output the right code. */
13202 if (recog_op_alt[opno][which_alternative].memory_ok)
13204 rtx op = recog_data.operand[opno];
13206 if (CONSTANT_P (op))
13209 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13210 recog_data.operand_mode[opno], op);
13213 else if (GET_CODE (op) == MEM
13214 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13215 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13219 rtx cop = avoid_constant_pool_reference (op);
13221 /* Casting the address of something to a mode narrower
13222 than a word can cause avoid_constant_pool_reference()
13223 to return the pool reference itself. That's no good to
13224 us here. Lets just hope that we can use the
13225 constant pool value directly. */
13227 cop = get_pool_constant (XEXP (op, 0));
13229 push_minipool_fix (insn, address,
13230 recog_data.operand_loc[opno],
13231 recog_data.operand_mode[opno], cop);
13242 /* Convert instructions to their cc-clobbering variant if possible, since
13243 that allows us to use smaller encodings. */
13246 thumb2_reorg (void)
13251 INIT_REG_SET (&live);
13253 /* We are freeing block_for_insn in the toplev to keep compatibility
13254 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13255 compute_bb_for_insn ();
13262 COPY_REG_SET (&live, DF_LR_OUT (bb));
13263 df_simulate_initialize_backwards (bb, &live);
13264 FOR_BB_INSNS_REVERSE (bb, insn)
13266 if (NONJUMP_INSN_P (insn)
13267 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13269 rtx pat = PATTERN (insn);
13270 if (GET_CODE (pat) == SET
13271 && low_register_operand (XEXP (pat, 0), SImode)
13272 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13273 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13274 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13276 rtx dst = XEXP (pat, 0);
13277 rtx src = XEXP (pat, 1);
13278 rtx op0 = XEXP (src, 0);
13279 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13280 ? XEXP (src, 1) : NULL);
13282 if (rtx_equal_p (dst, op0)
13283 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13285 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13286 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13287 rtvec vec = gen_rtvec (2, pat, clobber);
13289 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13290 INSN_CODE (insn) = -1;
13292 /* We can also handle a commutative operation where the
13293 second operand matches the destination. */
13294 else if (op1 && rtx_equal_p (dst, op1))
13296 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13297 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13300 src = copy_rtx (src);
13301 XEXP (src, 0) = op1;
13302 XEXP (src, 1) = op0;
13303 pat = gen_rtx_SET (VOIDmode, dst, src);
13304 vec = gen_rtvec (2, pat, clobber);
13305 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13306 INSN_CODE (insn) = -1;
13311 if (NONDEBUG_INSN_P (insn))
13312 df_simulate_one_insn_backwards (bb, insn, &live);
13316 CLEAR_REG_SET (&live);
13319 /* Gcc puts the pool in the wrong place for ARM, since we can only
13320 load addresses a limited distance around the pc. We do some
13321 special munging to move the constant pool values to the correct
13322 point in the code. */
13327 HOST_WIDE_INT address = 0;
13333 minipool_fix_head = minipool_fix_tail = NULL;
13335 /* The first insn must always be a note, or the code below won't
13336 scan it properly. */
13337 insn = get_insns ();
13338 gcc_assert (GET_CODE (insn) == NOTE);
13341 /* Scan all the insns and record the operands that will need fixing. */
13342 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13344 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13345 && (arm_cirrus_insn_p (insn)
13346 || GET_CODE (insn) == JUMP_INSN
13347 || arm_memory_load_p (insn)))
13348 cirrus_reorg (insn);
13350 if (GET_CODE (insn) == BARRIER)
13351 push_minipool_barrier (insn, address);
13352 else if (INSN_P (insn))
13356 note_invalid_constants (insn, address, true);
13357 address += get_attr_length (insn);
13359 /* If the insn is a vector jump, add the size of the table
13360 and skip the table. */
13361 if ((table = is_jump_table (insn)) != NULL)
13363 address += get_jump_table_size (table);
13367 else if (LABEL_P (insn))
13368 /* Add the worst-case padding due to alignment. We don't add
13369 the _current_ padding because the minipool insertions
13370 themselves might change it. */
13371 address += get_label_padding (insn);
13374 fix = minipool_fix_head;
13376 /* Now scan the fixups and perform the required changes. */
13381 Mfix * last_added_fix;
13382 Mfix * last_barrier = NULL;
13385 /* Skip any further barriers before the next fix. */
13386 while (fix && GET_CODE (fix->insn) == BARRIER)
13389 /* No more fixes. */
13393 last_added_fix = NULL;
13395 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13397 if (GET_CODE (ftmp->insn) == BARRIER)
13399 if (ftmp->address >= minipool_vector_head->max_address)
13402 last_barrier = ftmp;
13404 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13407 last_added_fix = ftmp; /* Keep track of the last fix added. */
13410 /* If we found a barrier, drop back to that; any fixes that we
13411 could have reached but come after the barrier will now go in
13412 the next mini-pool. */
13413 if (last_barrier != NULL)
13415 /* Reduce the refcount for those fixes that won't go into this
13417 for (fdel = last_barrier->next;
13418 fdel && fdel != ftmp;
13421 fdel->minipool->refcount--;
13422 fdel->minipool = NULL;
13425 ftmp = last_barrier;
13429 /* ftmp is first fix that we can't fit into this pool and
13430 there no natural barriers that we could use. Insert a
13431 new barrier in the code somewhere between the previous
13432 fix and this one, and arrange to jump around it. */
13433 HOST_WIDE_INT max_address;
13435 /* The last item on the list of fixes must be a barrier, so
13436 we can never run off the end of the list of fixes without
13437 last_barrier being set. */
13440 max_address = minipool_vector_head->max_address;
13441 /* Check that there isn't another fix that is in range that
13442 we couldn't fit into this pool because the pool was
13443 already too large: we need to put the pool before such an
13444 instruction. The pool itself may come just after the
13445 fix because create_fix_barrier also allows space for a
13446 jump instruction. */
13447 if (ftmp->address < max_address)
13448 max_address = ftmp->address + 1;
13450 last_barrier = create_fix_barrier (last_added_fix, max_address);
13453 assign_minipool_offsets (last_barrier);
13457 if (GET_CODE (ftmp->insn) != BARRIER
13458 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13465 /* Scan over the fixes we have identified for this pool, fixing them
13466 up and adding the constants to the pool itself. */
13467 for (this_fix = fix; this_fix && ftmp != this_fix;
13468 this_fix = this_fix->next)
13469 if (GET_CODE (this_fix->insn) != BARRIER)
13472 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13473 minipool_vector_label),
13474 this_fix->minipool->offset);
13475 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13478 dump_minipool (last_barrier->insn);
13482 /* From now on we must synthesize any constants that we can't handle
13483 directly. This can happen if the RTL gets split during final
13484 instruction generation. */
13485 after_arm_reorg = 1;
13487 /* Free the minipool memory. */
13488 obstack_free (&minipool_obstack, minipool_startobj);
13491 /* Routines to output assembly language. */
13493 /* If the rtx is the correct value then return the string of the number.
13494 In this way we can ensure that valid double constants are generated even
13495 when cross compiling. */
13497 fp_immediate_constant (rtx x)
13502 if (!fp_consts_inited)
13505 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13506 for (i = 0; i < 8; i++)
13507 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13508 return strings_fp[i];
13510 gcc_unreachable ();
13513 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13514 static const char *
13515 fp_const_from_val (REAL_VALUE_TYPE *r)
13519 if (!fp_consts_inited)
13522 for (i = 0; i < 8; i++)
13523 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13524 return strings_fp[i];
13526 gcc_unreachable ();
13529 /* Output the operands of a LDM/STM instruction to STREAM.
13530 MASK is the ARM register set mask of which only bits 0-15 are important.
13531 REG is the base register, either the frame pointer or the stack pointer,
13532 INSTR is the possibly suffixed load or store instruction.
13533 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13536 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13537 unsigned long mask, int rfe)
13540 bool not_first = FALSE;
13542 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13543 fputc ('\t', stream);
13544 asm_fprintf (stream, instr, reg);
13545 fputc ('{', stream);
13547 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13548 if (mask & (1 << i))
13551 fprintf (stream, ", ");
13553 asm_fprintf (stream, "%r", i);
13558 fprintf (stream, "}^\n");
13560 fprintf (stream, "}\n");
13564 /* Output a FLDMD instruction to STREAM.
13565 BASE if the register containing the address.
13566 REG and COUNT specify the register range.
13567 Extra registers may be added to avoid hardware bugs.
13569 We output FLDMD even for ARMv5 VFP implementations. Although
13570 FLDMD is technically not supported until ARMv6, it is believed
13571 that all VFP implementations support its use in this context. */
13574 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13578 /* Workaround ARM10 VFPr1 bug. */
13579 if (count == 2 && !arm_arch6)
13586 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13587 load into multiple parts if we have to handle more than 16 registers. */
13590 vfp_output_fldmd (stream, base, reg, 16);
13591 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13595 fputc ('\t', stream);
13596 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13598 for (i = reg; i < reg + count; i++)
13601 fputs (", ", stream);
13602 asm_fprintf (stream, "d%d", i);
13604 fputs ("}\n", stream);
13609 /* Output the assembly for a store multiple. */
13612 vfp_output_fstmd (rtx * operands)
13619 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13620 p = strlen (pattern);
13622 gcc_assert (GET_CODE (operands[1]) == REG);
13624 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13625 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13627 p += sprintf (&pattern[p], ", d%d", base + i);
13629 strcpy (&pattern[p], "}");
13631 output_asm_insn (pattern, operands);
13636 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13637 number of bytes pushed. */
13640 vfp_emit_fstmd (int base_reg, int count)
13647 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13648 register pairs are stored by a store multiple insn. We avoid this
13649 by pushing an extra pair. */
13650 if (count == 2 && !arm_arch6)
13652 if (base_reg == LAST_VFP_REGNUM - 3)
13657 /* FSTMD may not store more than 16 doubleword registers at once. Split
13658 larger stores into multiple parts (up to a maximum of two, in
13663 /* NOTE: base_reg is an internal register number, so each D register
13665 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13666 saved += vfp_emit_fstmd (base_reg, 16);
13670 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13671 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13673 reg = gen_rtx_REG (DFmode, base_reg);
13676 XVECEXP (par, 0, 0)
13677 = gen_rtx_SET (VOIDmode,
13680 gen_rtx_PRE_MODIFY (Pmode,
13683 (stack_pointer_rtx,
13686 gen_rtx_UNSPEC (BLKmode,
13687 gen_rtvec (1, reg),
13688 UNSPEC_PUSH_MULT));
13690 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13691 plus_constant (stack_pointer_rtx, -(count * 8)));
13692 RTX_FRAME_RELATED_P (tmp) = 1;
13693 XVECEXP (dwarf, 0, 0) = tmp;
13695 tmp = gen_rtx_SET (VOIDmode,
13696 gen_frame_mem (DFmode, stack_pointer_rtx),
13698 RTX_FRAME_RELATED_P (tmp) = 1;
13699 XVECEXP (dwarf, 0, 1) = tmp;
13701 for (i = 1; i < count; i++)
13703 reg = gen_rtx_REG (DFmode, base_reg);
13705 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13707 tmp = gen_rtx_SET (VOIDmode,
13708 gen_frame_mem (DFmode,
13709 plus_constant (stack_pointer_rtx,
13712 RTX_FRAME_RELATED_P (tmp) = 1;
13713 XVECEXP (dwarf, 0, i + 1) = tmp;
13716 par = emit_insn (par);
13717 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13718 RTX_FRAME_RELATED_P (par) = 1;
13723 /* Emit a call instruction with pattern PAT. ADDR is the address of
13724 the call target. */
13727 arm_emit_call_insn (rtx pat, rtx addr)
13731 insn = emit_call_insn (pat);
13733 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13734 If the call might use such an entry, add a use of the PIC register
13735 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13736 if (TARGET_VXWORKS_RTP
13738 && GET_CODE (addr) == SYMBOL_REF
13739 && (SYMBOL_REF_DECL (addr)
13740 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13741 : !SYMBOL_REF_LOCAL_P (addr)))
13743 require_pic_register ();
13744 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13748 /* Output a 'call' insn. */
13750 output_call (rtx *operands)
13752 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13754 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13755 if (REGNO (operands[0]) == LR_REGNUM)
13757 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13758 output_asm_insn ("mov%?\t%0, %|lr", operands);
13761 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13763 if (TARGET_INTERWORK || arm_arch4t)
13764 output_asm_insn ("bx%?\t%0", operands);
13766 output_asm_insn ("mov%?\t%|pc, %0", operands);
13771 /* Output a 'call' insn that is a reference in memory. This is
13772 disabled for ARMv5 and we prefer a blx instead because otherwise
13773 there's a significant performance overhead. */
13775 output_call_mem (rtx *operands)
13777 gcc_assert (!arm_arch5);
13778 if (TARGET_INTERWORK)
13780 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13781 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13782 output_asm_insn ("bx%?\t%|ip", operands);
13784 else if (regno_use_in (LR_REGNUM, operands[0]))
13786 /* LR is used in the memory address. We load the address in the
13787 first instruction. It's safe to use IP as the target of the
13788 load since the call will kill it anyway. */
13789 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13790 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13792 output_asm_insn ("bx%?\t%|ip", operands);
13794 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13798 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13799 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13806 /* Output a move from arm registers to an fpa registers.
13807 OPERANDS[0] is an fpa register.
13808 OPERANDS[1] is the first registers of an arm register pair. */
13810 output_mov_long_double_fpa_from_arm (rtx *operands)
13812 int arm_reg0 = REGNO (operands[1]);
13815 gcc_assert (arm_reg0 != IP_REGNUM);
13817 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13818 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13819 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13821 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13822 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13827 /* Output a move from an fpa register to arm registers.
13828 OPERANDS[0] is the first registers of an arm register pair.
13829 OPERANDS[1] is an fpa register. */
13831 output_mov_long_double_arm_from_fpa (rtx *operands)
13833 int arm_reg0 = REGNO (operands[0]);
13836 gcc_assert (arm_reg0 != IP_REGNUM);
13838 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13839 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13840 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13842 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13843 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13847 /* Output a move from arm registers to arm registers of a long double
13848 OPERANDS[0] is the destination.
13849 OPERANDS[1] is the source. */
13851 output_mov_long_double_arm_from_arm (rtx *operands)
13853 /* We have to be careful here because the two might overlap. */
13854 int dest_start = REGNO (operands[0]);
13855 int src_start = REGNO (operands[1]);
13859 if (dest_start < src_start)
13861 for (i = 0; i < 3; i++)
13863 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13864 ops[1] = gen_rtx_REG (SImode, src_start + i);
13865 output_asm_insn ("mov%?\t%0, %1", ops);
13870 for (i = 2; i >= 0; i--)
13872 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13873 ops[1] = gen_rtx_REG (SImode, src_start + i);
13874 output_asm_insn ("mov%?\t%0, %1", ops);
13882 arm_emit_movpair (rtx dest, rtx src)
13884 /* If the src is an immediate, simplify it. */
13885 if (CONST_INT_P (src))
13887 HOST_WIDE_INT val = INTVAL (src);
13888 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13889 if ((val >> 16) & 0x0000ffff)
13890 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13892 GEN_INT ((val >> 16) & 0x0000ffff));
13895 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13896 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13899 /* Output a move from arm registers to an fpa registers.
13900 OPERANDS[0] is an fpa register.
13901 OPERANDS[1] is the first registers of an arm register pair. */
13903 output_mov_double_fpa_from_arm (rtx *operands)
13905 int arm_reg0 = REGNO (operands[1]);
13908 gcc_assert (arm_reg0 != IP_REGNUM);
13910 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13911 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13912 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13913 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13917 /* Output a move from an fpa register to arm registers.
13918 OPERANDS[0] is the first registers of an arm register pair.
13919 OPERANDS[1] is an fpa register. */
13921 output_mov_double_arm_from_fpa (rtx *operands)
13923 int arm_reg0 = REGNO (operands[0]);
13926 gcc_assert (arm_reg0 != IP_REGNUM);
13928 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13929 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13930 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13931 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13935 /* Output a move between double words. It must be REG<-MEM
13938 output_move_double (rtx *operands, bool emit, int *count)
13940 enum rtx_code code0 = GET_CODE (operands[0]);
13941 enum rtx_code code1 = GET_CODE (operands[1]);
13946 /* The only case when this might happen is when
13947 you are looking at the length of a DImode instruction
13948 that has an invalid constant in it. */
13949 if (code0 == REG && code1 != MEM)
13951 gcc_assert (!emit);
13958 unsigned int reg0 = REGNO (operands[0]);
13960 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13962 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13964 switch (GET_CODE (XEXP (operands[1], 0)))
13971 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13972 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13974 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13979 gcc_assert (TARGET_LDRD);
13981 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13988 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13990 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13998 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14000 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14005 gcc_assert (TARGET_LDRD);
14007 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14012 /* Autoicrement addressing modes should never have overlapping
14013 base and destination registers, and overlapping index registers
14014 are already prohibited, so this doesn't need to worry about
14016 otherops[0] = operands[0];
14017 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14018 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14020 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14022 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14024 /* Registers overlap so split out the increment. */
14027 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14028 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14035 /* Use a single insn if we can.
14036 FIXME: IWMMXT allows offsets larger than ldrd can
14037 handle, fix these up with a pair of ldr. */
14039 || GET_CODE (otherops[2]) != CONST_INT
14040 || (INTVAL (otherops[2]) > -256
14041 && INTVAL (otherops[2]) < 256))
14044 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14050 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14051 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14061 /* Use a single insn if we can.
14062 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14063 fix these up with a pair of ldr. */
14065 || GET_CODE (otherops[2]) != CONST_INT
14066 || (INTVAL (otherops[2]) > -256
14067 && INTVAL (otherops[2]) < 256))
14070 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14087 /* We might be able to use ldrd %0, %1 here. However the range is
14088 different to ldr/adr, and it is broken on some ARMv7-M
14089 implementations. */
14090 /* Use the second register of the pair to avoid problematic
14092 otherops[1] = operands[1];
14094 output_asm_insn ("adr%?\t%0, %1", otherops);
14095 operands[1] = otherops[0];
14099 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14101 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14108 /* ??? This needs checking for thumb2. */
14110 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14111 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14113 otherops[0] = operands[0];
14114 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14115 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14117 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14119 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14121 switch ((int) INTVAL (otherops[2]))
14125 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14131 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14137 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14141 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14142 operands[1] = otherops[0];
14144 && (GET_CODE (otherops[2]) == REG
14146 || (GET_CODE (otherops[2]) == CONST_INT
14147 && INTVAL (otherops[2]) > -256
14148 && INTVAL (otherops[2]) < 256)))
14150 if (reg_overlap_mentioned_p (operands[0],
14154 /* Swap base and index registers over to
14155 avoid a conflict. */
14157 otherops[1] = otherops[2];
14160 /* If both registers conflict, it will usually
14161 have been fixed by a splitter. */
14162 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14163 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14167 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14168 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14175 otherops[0] = operands[0];
14177 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14182 if (GET_CODE (otherops[2]) == CONST_INT)
14186 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14187 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14189 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14195 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14201 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14205 return "ldr%(d%)\t%0, [%1]";
14207 return "ldm%(ia%)\t%1, %M0";
14211 otherops[1] = adjust_address (operands[1], SImode, 4);
14212 /* Take care of overlapping base/data reg. */
14213 if (reg_mentioned_p (operands[0], operands[1]))
14217 output_asm_insn ("ldr%?\t%0, %1", otherops);
14218 output_asm_insn ("ldr%?\t%0, %1", operands);
14228 output_asm_insn ("ldr%?\t%0, %1", operands);
14229 output_asm_insn ("ldr%?\t%0, %1", otherops);
14239 /* Constraints should ensure this. */
14240 gcc_assert (code0 == MEM && code1 == REG);
14241 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14243 switch (GET_CODE (XEXP (operands[0], 0)))
14249 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14251 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14256 gcc_assert (TARGET_LDRD);
14258 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14265 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14267 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14275 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14277 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14282 gcc_assert (TARGET_LDRD);
14284 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14289 otherops[0] = operands[1];
14290 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14291 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14293 /* IWMMXT allows offsets larger than ldrd can handle,
14294 fix these up with a pair of ldr. */
14296 && GET_CODE (otherops[2]) == CONST_INT
14297 && (INTVAL(otherops[2]) <= -256
14298 || INTVAL(otherops[2]) >= 256))
14300 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14304 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14305 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14314 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14315 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14321 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14324 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14329 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14334 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14335 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14337 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14341 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14348 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14355 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14360 && (GET_CODE (otherops[2]) == REG
14362 || (GET_CODE (otherops[2]) == CONST_INT
14363 && INTVAL (otherops[2]) > -256
14364 && INTVAL (otherops[2]) < 256)))
14366 otherops[0] = operands[1];
14367 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14369 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14375 otherops[0] = adjust_address (operands[0], SImode, 4);
14376 otherops[1] = operands[1];
14379 output_asm_insn ("str%?\t%1, %0", operands);
14380 output_asm_insn ("str%?\t%H1, %0", otherops);
14390 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14391 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14394 output_move_quad (rtx *operands)
14396 if (REG_P (operands[0]))
14398 /* Load, or reg->reg move. */
14400 if (MEM_P (operands[1]))
14402 switch (GET_CODE (XEXP (operands[1], 0)))
14405 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14410 output_asm_insn ("adr%?\t%0, %1", operands);
14411 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14415 gcc_unreachable ();
14423 gcc_assert (REG_P (operands[1]));
14425 dest = REGNO (operands[0]);
14426 src = REGNO (operands[1]);
14428 /* This seems pretty dumb, but hopefully GCC won't try to do it
14431 for (i = 0; i < 4; i++)
14433 ops[0] = gen_rtx_REG (SImode, dest + i);
14434 ops[1] = gen_rtx_REG (SImode, src + i);
14435 output_asm_insn ("mov%?\t%0, %1", ops);
14438 for (i = 3; i >= 0; i--)
14440 ops[0] = gen_rtx_REG (SImode, dest + i);
14441 ops[1] = gen_rtx_REG (SImode, src + i);
14442 output_asm_insn ("mov%?\t%0, %1", ops);
14448 gcc_assert (MEM_P (operands[0]));
14449 gcc_assert (REG_P (operands[1]));
14450 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14452 switch (GET_CODE (XEXP (operands[0], 0)))
14455 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14459 gcc_unreachable ();
14466 /* Output a VFP load or store instruction. */
14469 output_move_vfp (rtx *operands)
14471 rtx reg, mem, addr, ops[2];
14472 int load = REG_P (operands[0]);
14473 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14474 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14477 enum machine_mode mode;
14479 reg = operands[!load];
14480 mem = operands[load];
14482 mode = GET_MODE (reg);
14484 gcc_assert (REG_P (reg));
14485 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14486 gcc_assert (mode == SFmode
14490 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14491 gcc_assert (MEM_P (mem));
14493 addr = XEXP (mem, 0);
14495 switch (GET_CODE (addr))
14498 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14499 ops[0] = XEXP (addr, 0);
14504 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14505 ops[0] = XEXP (addr, 0);
14510 templ = "f%s%c%%?\t%%%s0, %%1%s";
14516 sprintf (buff, templ,
14517 load ? "ld" : "st",
14520 integer_p ? "\t%@ int" : "");
14521 output_asm_insn (buff, ops);
14526 /* Output a Neon quad-word load or store, or a load or store for
14527 larger structure modes.
14529 WARNING: The ordering of elements is weird in big-endian mode,
14530 because we use VSTM, as required by the EABI. GCC RTL defines
14531 element ordering based on in-memory order. This can be differ
14532 from the architectural ordering of elements within a NEON register.
14533 The intrinsics defined in arm_neon.h use the NEON register element
14534 ordering, not the GCC RTL element ordering.
14536 For example, the in-memory ordering of a big-endian a quadword
14537 vector with 16-bit elements when stored from register pair {d0,d1}
14538 will be (lowest address first, d0[N] is NEON register element N):
14540 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14542 When necessary, quadword registers (dN, dN+1) are moved to ARM
14543 registers from rN in the order:
14545 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14547 So that STM/LDM can be used on vectors in ARM registers, and the
14548 same memory layout will result as if VSTM/VLDM were used. */
14551 output_move_neon (rtx *operands)
14553 rtx reg, mem, addr, ops[2];
14554 int regno, load = REG_P (operands[0]);
14557 enum machine_mode mode;
14559 reg = operands[!load];
14560 mem = operands[load];
14562 mode = GET_MODE (reg);
14564 gcc_assert (REG_P (reg));
14565 regno = REGNO (reg);
14566 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14567 || NEON_REGNO_OK_FOR_QUAD (regno));
14568 gcc_assert (VALID_NEON_DREG_MODE (mode)
14569 || VALID_NEON_QREG_MODE (mode)
14570 || VALID_NEON_STRUCT_MODE (mode));
14571 gcc_assert (MEM_P (mem));
14573 addr = XEXP (mem, 0);
14575 /* Strip off const from addresses like (const (plus (...))). */
14576 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14577 addr = XEXP (addr, 0);
14579 switch (GET_CODE (addr))
14582 templ = "v%smia%%?\t%%0!, %%h1";
14583 ops[0] = XEXP (addr, 0);
14588 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14589 templ = "v%smdb%%?\t%%0!, %%h1";
14590 ops[0] = XEXP (addr, 0);
14595 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14596 gcc_unreachable ();
14601 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14604 for (i = 0; i < nregs; i++)
14606 /* We're only using DImode here because it's a convenient size. */
14607 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14608 ops[1] = adjust_address (mem, DImode, 8 * i);
14609 if (reg_overlap_mentioned_p (ops[0], mem))
14611 gcc_assert (overlap == -1);
14616 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14617 output_asm_insn (buff, ops);
14622 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14623 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14624 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14625 output_asm_insn (buff, ops);
14632 templ = "v%smia%%?\t%%m0, %%h1";
14637 sprintf (buff, templ, load ? "ld" : "st");
14638 output_asm_insn (buff, ops);
14643 /* Compute and return the length of neon_mov<mode>, where <mode> is
14644 one of VSTRUCT modes: EI, OI, CI or XI. */
14646 arm_attr_length_move_neon (rtx insn)
14648 rtx reg, mem, addr;
14650 enum machine_mode mode;
14652 extract_insn_cached (insn);
14654 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14656 mode = GET_MODE (recog_data.operand[0]);
14667 gcc_unreachable ();
14671 load = REG_P (recog_data.operand[0]);
14672 reg = recog_data.operand[!load];
14673 mem = recog_data.operand[load];
14675 gcc_assert (MEM_P (mem));
14677 mode = GET_MODE (reg);
14678 addr = XEXP (mem, 0);
14680 /* Strip off const from addresses like (const (plus (...))). */
14681 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14682 addr = XEXP (addr, 0);
14684 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14686 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14693 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14697 arm_address_offset_is_imm (rtx insn)
14701 extract_insn_cached (insn);
14703 if (REG_P (recog_data.operand[0]))
14706 mem = recog_data.operand[0];
14708 gcc_assert (MEM_P (mem));
14710 addr = XEXP (mem, 0);
14712 if (GET_CODE (addr) == REG
14713 || (GET_CODE (addr) == PLUS
14714 && GET_CODE (XEXP (addr, 0)) == REG
14715 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14721 /* Output an ADD r, s, #n where n may be too big for one instruction.
14722 If adding zero to one register, output nothing. */
14724 output_add_immediate (rtx *operands)
14726 HOST_WIDE_INT n = INTVAL (operands[2]);
14728 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14731 output_multi_immediate (operands,
14732 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14735 output_multi_immediate (operands,
14736 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14743 /* Output a multiple immediate operation.
14744 OPERANDS is the vector of operands referred to in the output patterns.
14745 INSTR1 is the output pattern to use for the first constant.
14746 INSTR2 is the output pattern to use for subsequent constants.
14747 IMMED_OP is the index of the constant slot in OPERANDS.
14748 N is the constant value. */
14749 static const char *
14750 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14751 int immed_op, HOST_WIDE_INT n)
14753 #if HOST_BITS_PER_WIDE_INT > 32
14759 /* Quick and easy output. */
14760 operands[immed_op] = const0_rtx;
14761 output_asm_insn (instr1, operands);
14766 const char * instr = instr1;
14768 /* Note that n is never zero here (which would give no output). */
14769 for (i = 0; i < 32; i += 2)
14773 operands[immed_op] = GEN_INT (n & (255 << i));
14774 output_asm_insn (instr, operands);
14784 /* Return the name of a shifter operation. */
14785 static const char *
14786 arm_shift_nmem(enum rtx_code code)
14791 return ARM_LSL_NAME;
14807 /* Return the appropriate ARM instruction for the operation code.
14808 The returned result should not be overwritten. OP is the rtx of the
14809 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14812 arithmetic_instr (rtx op, int shift_first_arg)
14814 switch (GET_CODE (op))
14820 return shift_first_arg ? "rsb" : "sub";
14835 return arm_shift_nmem(GET_CODE(op));
14838 gcc_unreachable ();
14842 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14843 for the operation code. The returned result should not be overwritten.
14844 OP is the rtx code of the shift.
14845 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14847 static const char *
14848 shift_op (rtx op, HOST_WIDE_INT *amountp)
14851 enum rtx_code code = GET_CODE (op);
14853 switch (GET_CODE (XEXP (op, 1)))
14861 *amountp = INTVAL (XEXP (op, 1));
14865 gcc_unreachable ();
14871 gcc_assert (*amountp != -1);
14872 *amountp = 32 - *amountp;
14875 /* Fall through. */
14881 mnem = arm_shift_nmem(code);
14885 /* We never have to worry about the amount being other than a
14886 power of 2, since this case can never be reloaded from a reg. */
14887 gcc_assert (*amountp != -1);
14888 *amountp = int_log2 (*amountp);
14889 return ARM_LSL_NAME;
14892 gcc_unreachable ();
14895 if (*amountp != -1)
14897 /* This is not 100% correct, but follows from the desire to merge
14898 multiplication by a power of 2 with the recognizer for a
14899 shift. >=32 is not a valid shift for "lsl", so we must try and
14900 output a shift that produces the correct arithmetical result.
14901 Using lsr #32 is identical except for the fact that the carry bit
14902 is not set correctly if we set the flags; but we never use the
14903 carry bit from such an operation, so we can ignore that. */
14904 if (code == ROTATERT)
14905 /* Rotate is just modulo 32. */
14907 else if (*amountp != (*amountp & 31))
14909 if (code == ASHIFT)
14914 /* Shifts of 0 are no-ops. */
14922 /* Obtain the shift from the POWER of two. */
14924 static HOST_WIDE_INT
14925 int_log2 (HOST_WIDE_INT power)
14927 HOST_WIDE_INT shift = 0;
14929 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14931 gcc_assert (shift <= 31);
14938 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14939 because /bin/as is horribly restrictive. The judgement about
14940 whether or not each character is 'printable' (and can be output as
14941 is) or not (and must be printed with an octal escape) must be made
14942 with reference to the *host* character set -- the situation is
14943 similar to that discussed in the comments above pp_c_char in
14944 c-pretty-print.c. */
14946 #define MAX_ASCII_LEN 51
14949 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14952 int len_so_far = 0;
14954 fputs ("\t.ascii\t\"", stream);
14956 for (i = 0; i < len; i++)
14960 if (len_so_far >= MAX_ASCII_LEN)
14962 fputs ("\"\n\t.ascii\t\"", stream);
14968 if (c == '\\' || c == '\"')
14970 putc ('\\', stream);
14978 fprintf (stream, "\\%03o", c);
14983 fputs ("\"\n", stream);
14986 /* Compute the register save mask for registers 0 through 12
14987 inclusive. This code is used by arm_compute_save_reg_mask. */
14989 static unsigned long
14990 arm_compute_save_reg0_reg12_mask (void)
14992 unsigned long func_type = arm_current_func_type ();
14993 unsigned long save_reg_mask = 0;
14996 if (IS_INTERRUPT (func_type))
14998 unsigned int max_reg;
14999 /* Interrupt functions must not corrupt any registers,
15000 even call clobbered ones. If this is a leaf function
15001 we can just examine the registers used by the RTL, but
15002 otherwise we have to assume that whatever function is
15003 called might clobber anything, and so we have to save
15004 all the call-clobbered registers as well. */
15005 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15006 /* FIQ handlers have registers r8 - r12 banked, so
15007 we only need to check r0 - r7, Normal ISRs only
15008 bank r14 and r15, so we must check up to r12.
15009 r13 is the stack pointer which is always preserved,
15010 so we do not need to consider it here. */
15015 for (reg = 0; reg <= max_reg; reg++)
15016 if (df_regs_ever_live_p (reg)
15017 || (! current_function_is_leaf && call_used_regs[reg]))
15018 save_reg_mask |= (1 << reg);
15020 /* Also save the pic base register if necessary. */
15022 && !TARGET_SINGLE_PIC_BASE
15023 && arm_pic_register != INVALID_REGNUM
15024 && crtl->uses_pic_offset_table)
15025 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15027 else if (IS_VOLATILE(func_type))
15029 /* For noreturn functions we historically omitted register saves
15030 altogether. However this really messes up debugging. As a
15031 compromise save just the frame pointers. Combined with the link
15032 register saved elsewhere this should be sufficient to get
15034 if (frame_pointer_needed)
15035 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15036 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15037 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15038 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15039 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15043 /* In the normal case we only need to save those registers
15044 which are call saved and which are used by this function. */
15045 for (reg = 0; reg <= 11; reg++)
15046 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15047 save_reg_mask |= (1 << reg);
15049 /* Handle the frame pointer as a special case. */
15050 if (frame_pointer_needed)
15051 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15053 /* If we aren't loading the PIC register,
15054 don't stack it even though it may be live. */
15056 && !TARGET_SINGLE_PIC_BASE
15057 && arm_pic_register != INVALID_REGNUM
15058 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15059 || crtl->uses_pic_offset_table))
15060 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15062 /* The prologue will copy SP into R0, so save it. */
15063 if (IS_STACKALIGN (func_type))
15064 save_reg_mask |= 1;
15067 /* Save registers so the exception handler can modify them. */
15068 if (crtl->calls_eh_return)
15074 reg = EH_RETURN_DATA_REGNO (i);
15075 if (reg == INVALID_REGNUM)
15077 save_reg_mask |= 1 << reg;
15081 return save_reg_mask;
15085 /* Compute the number of bytes used to store the static chain register on the
15086 stack, above the stack frame. We need to know this accurately to get the
15087 alignment of the rest of the stack frame correct. */
15089 static int arm_compute_static_chain_stack_bytes (void)
15091 unsigned long func_type = arm_current_func_type ();
15092 int static_chain_stack_bytes = 0;
15094 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15095 IS_NESTED (func_type) &&
15096 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15097 static_chain_stack_bytes = 4;
15099 return static_chain_stack_bytes;
15103 /* Compute a bit mask of which registers need to be
15104 saved on the stack for the current function.
15105 This is used by arm_get_frame_offsets, which may add extra registers. */
15107 static unsigned long
15108 arm_compute_save_reg_mask (void)
15110 unsigned int save_reg_mask = 0;
15111 unsigned long func_type = arm_current_func_type ();
15114 if (IS_NAKED (func_type))
15115 /* This should never really happen. */
15118 /* If we are creating a stack frame, then we must save the frame pointer,
15119 IP (which will hold the old stack pointer), LR and the PC. */
15120 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15122 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15125 | (1 << PC_REGNUM);
15127 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15129 /* Decide if we need to save the link register.
15130 Interrupt routines have their own banked link register,
15131 so they never need to save it.
15132 Otherwise if we do not use the link register we do not need to save
15133 it. If we are pushing other registers onto the stack however, we
15134 can save an instruction in the epilogue by pushing the link register
15135 now and then popping it back into the PC. This incurs extra memory
15136 accesses though, so we only do it when optimizing for size, and only
15137 if we know that we will not need a fancy return sequence. */
15138 if (df_regs_ever_live_p (LR_REGNUM)
15141 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15142 && !crtl->calls_eh_return))
15143 save_reg_mask |= 1 << LR_REGNUM;
15145 if (cfun->machine->lr_save_eliminated)
15146 save_reg_mask &= ~ (1 << LR_REGNUM);
15148 if (TARGET_REALLY_IWMMXT
15149 && ((bit_count (save_reg_mask)
15150 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15151 arm_compute_static_chain_stack_bytes())
15154 /* The total number of registers that are going to be pushed
15155 onto the stack is odd. We need to ensure that the stack
15156 is 64-bit aligned before we start to save iWMMXt registers,
15157 and also before we start to create locals. (A local variable
15158 might be a double or long long which we will load/store using
15159 an iWMMXt instruction). Therefore we need to push another
15160 ARM register, so that the stack will be 64-bit aligned. We
15161 try to avoid using the arg registers (r0 -r3) as they might be
15162 used to pass values in a tail call. */
15163 for (reg = 4; reg <= 12; reg++)
15164 if ((save_reg_mask & (1 << reg)) == 0)
15168 save_reg_mask |= (1 << reg);
15171 cfun->machine->sibcall_blocked = 1;
15172 save_reg_mask |= (1 << 3);
15176 /* We may need to push an additional register for use initializing the
15177 PIC base register. */
15178 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15179 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15181 reg = thumb_find_work_register (1 << 4);
15182 if (!call_used_regs[reg])
15183 save_reg_mask |= (1 << reg);
15186 return save_reg_mask;
15190 /* Compute a bit mask of which registers need to be
15191 saved on the stack for the current function. */
15192 static unsigned long
15193 thumb1_compute_save_reg_mask (void)
15195 unsigned long mask;
15199 for (reg = 0; reg < 12; reg ++)
15200 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15204 && !TARGET_SINGLE_PIC_BASE
15205 && arm_pic_register != INVALID_REGNUM
15206 && crtl->uses_pic_offset_table)
15207 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15209 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15210 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15211 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15213 /* LR will also be pushed if any lo regs are pushed. */
15214 if (mask & 0xff || thumb_force_lr_save ())
15215 mask |= (1 << LR_REGNUM);
15217 /* Make sure we have a low work register if we need one.
15218 We will need one if we are going to push a high register,
15219 but we are not currently intending to push a low register. */
15220 if ((mask & 0xff) == 0
15221 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15223 /* Use thumb_find_work_register to choose which register
15224 we will use. If the register is live then we will
15225 have to push it. Use LAST_LO_REGNUM as our fallback
15226 choice for the register to select. */
15227 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15228 /* Make sure the register returned by thumb_find_work_register is
15229 not part of the return value. */
15230 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15231 reg = LAST_LO_REGNUM;
15233 if (! call_used_regs[reg])
15237 /* The 504 below is 8 bytes less than 512 because there are two possible
15238 alignment words. We can't tell here if they will be present or not so we
15239 have to play it safe and assume that they are. */
15240 if ((CALLER_INTERWORKING_SLOT_SIZE +
15241 ROUND_UP_WORD (get_frame_size ()) +
15242 crtl->outgoing_args_size) >= 504)
15244 /* This is the same as the code in thumb1_expand_prologue() which
15245 determines which register to use for stack decrement. */
15246 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15247 if (mask & (1 << reg))
15250 if (reg > LAST_LO_REGNUM)
15252 /* Make sure we have a register available for stack decrement. */
15253 mask |= 1 << LAST_LO_REGNUM;
15261 /* Return the number of bytes required to save VFP registers. */
15263 arm_get_vfp_saved_size (void)
15265 unsigned int regno;
15270 /* Space for saved VFP registers. */
15271 if (TARGET_HARD_FLOAT && TARGET_VFP)
15274 for (regno = FIRST_VFP_REGNUM;
15275 regno < LAST_VFP_REGNUM;
15278 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15279 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15283 /* Workaround ARM10 VFPr1 bug. */
15284 if (count == 2 && !arm_arch6)
15286 saved += count * 8;
15295 if (count == 2 && !arm_arch6)
15297 saved += count * 8;
15304 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15305 everything bar the final return instruction. */
15307 output_return_instruction (rtx operand, int really_return, int reverse)
15309 char conditional[10];
15312 unsigned long live_regs_mask;
15313 unsigned long func_type;
15314 arm_stack_offsets *offsets;
15316 func_type = arm_current_func_type ();
15318 if (IS_NAKED (func_type))
15321 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15323 /* If this function was declared non-returning, and we have
15324 found a tail call, then we have to trust that the called
15325 function won't return. */
15330 /* Otherwise, trap an attempted return by aborting. */
15332 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15334 assemble_external_libcall (ops[1]);
15335 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15341 gcc_assert (!cfun->calls_alloca || really_return);
15343 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15345 cfun->machine->return_used_this_function = 1;
15347 offsets = arm_get_frame_offsets ();
15348 live_regs_mask = offsets->saved_regs_mask;
15350 if (live_regs_mask)
15352 const char * return_reg;
15354 /* If we do not have any special requirements for function exit
15355 (e.g. interworking) then we can load the return address
15356 directly into the PC. Otherwise we must load it into LR. */
15358 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15359 return_reg = reg_names[PC_REGNUM];
15361 return_reg = reg_names[LR_REGNUM];
15363 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15365 /* There are three possible reasons for the IP register
15366 being saved. 1) a stack frame was created, in which case
15367 IP contains the old stack pointer, or 2) an ISR routine
15368 corrupted it, or 3) it was saved to align the stack on
15369 iWMMXt. In case 1, restore IP into SP, otherwise just
15371 if (frame_pointer_needed)
15373 live_regs_mask &= ~ (1 << IP_REGNUM);
15374 live_regs_mask |= (1 << SP_REGNUM);
15377 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15380 /* On some ARM architectures it is faster to use LDR rather than
15381 LDM to load a single register. On other architectures, the
15382 cost is the same. In 26 bit mode, or for exception handlers,
15383 we have to use LDM to load the PC so that the CPSR is also
15385 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15386 if (live_regs_mask == (1U << reg))
15389 if (reg <= LAST_ARM_REGNUM
15390 && (reg != LR_REGNUM
15392 || ! IS_INTERRUPT (func_type)))
15394 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15395 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15402 /* Generate the load multiple instruction to restore the
15403 registers. Note we can get here, even if
15404 frame_pointer_needed is true, but only if sp already
15405 points to the base of the saved core registers. */
15406 if (live_regs_mask & (1 << SP_REGNUM))
15408 unsigned HOST_WIDE_INT stack_adjust;
15410 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15411 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15413 if (stack_adjust && arm_arch5 && TARGET_ARM)
15414 if (TARGET_UNIFIED_ASM)
15415 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15417 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15420 /* If we can't use ldmib (SA110 bug),
15421 then try to pop r3 instead. */
15423 live_regs_mask |= 1 << 3;
15425 if (TARGET_UNIFIED_ASM)
15426 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15428 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15432 if (TARGET_UNIFIED_ASM)
15433 sprintf (instr, "pop%s\t{", conditional);
15435 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15437 p = instr + strlen (instr);
15439 for (reg = 0; reg <= SP_REGNUM; reg++)
15440 if (live_regs_mask & (1 << reg))
15442 int l = strlen (reg_names[reg]);
15448 memcpy (p, ", ", 2);
15452 memcpy (p, "%|", 2);
15453 memcpy (p + 2, reg_names[reg], l);
15457 if (live_regs_mask & (1 << LR_REGNUM))
15459 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15460 /* If returning from an interrupt, restore the CPSR. */
15461 if (IS_INTERRUPT (func_type))
15468 output_asm_insn (instr, & operand);
15470 /* See if we need to generate an extra instruction to
15471 perform the actual function return. */
15473 && func_type != ARM_FT_INTERWORKED
15474 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15476 /* The return has already been handled
15477 by loading the LR into the PC. */
15484 switch ((int) ARM_FUNC_TYPE (func_type))
15488 /* ??? This is wrong for unified assembly syntax. */
15489 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15492 case ARM_FT_INTERWORKED:
15493 sprintf (instr, "bx%s\t%%|lr", conditional);
15496 case ARM_FT_EXCEPTION:
15497 /* ??? This is wrong for unified assembly syntax. */
15498 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15502 /* Use bx if it's available. */
15503 if (arm_arch5 || arm_arch4t)
15504 sprintf (instr, "bx%s\t%%|lr", conditional);
15506 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15510 output_asm_insn (instr, & operand);
15516 /* Write the function name into the code section, directly preceding
15517 the function prologue.
15519 Code will be output similar to this:
15521 .ascii "arm_poke_function_name", 0
15524 .word 0xff000000 + (t1 - t0)
15525 arm_poke_function_name
15527 stmfd sp!, {fp, ip, lr, pc}
15530 When performing a stack backtrace, code can inspect the value
15531 of 'pc' stored at 'fp' + 0. If the trace function then looks
15532 at location pc - 12 and the top 8 bits are set, then we know
15533 that there is a function name embedded immediately preceding this
15534 location and has length ((pc[-3]) & 0xff000000).
15536 We assume that pc is declared as a pointer to an unsigned long.
15538 It is of no benefit to output the function name if we are assembling
15539 a leaf function. These function types will not contain a stack
15540 backtrace structure, therefore it is not possible to determine the
15543 arm_poke_function_name (FILE *stream, const char *name)
15545 unsigned long alignlength;
15546 unsigned long length;
15549 length = strlen (name) + 1;
15550 alignlength = ROUND_UP_WORD (length);
15552 ASM_OUTPUT_ASCII (stream, name, length);
15553 ASM_OUTPUT_ALIGN (stream, 2);
15554 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15555 assemble_aligned_integer (UNITS_PER_WORD, x);
15558 /* Place some comments into the assembler stream
15559 describing the current function. */
15561 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15563 unsigned long func_type;
15565 /* ??? Do we want to print some of the below anyway? */
15569 /* Sanity check. */
15570 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15572 func_type = arm_current_func_type ();
15574 switch ((int) ARM_FUNC_TYPE (func_type))
15577 case ARM_FT_NORMAL:
15579 case ARM_FT_INTERWORKED:
15580 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15583 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15586 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15588 case ARM_FT_EXCEPTION:
15589 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15593 if (IS_NAKED (func_type))
15594 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15596 if (IS_VOLATILE (func_type))
15597 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15599 if (IS_NESTED (func_type))
15600 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15601 if (IS_STACKALIGN (func_type))
15602 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15604 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15606 crtl->args.pretend_args_size, frame_size);
15608 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15609 frame_pointer_needed,
15610 cfun->machine->uses_anonymous_args);
15612 if (cfun->machine->lr_save_eliminated)
15613 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15615 if (crtl->calls_eh_return)
15616 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15621 arm_output_epilogue (rtx sibling)
15624 unsigned long saved_regs_mask;
15625 unsigned long func_type;
15626 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15627 frame that is $fp + 4 for a non-variadic function. */
15628 int floats_offset = 0;
15630 FILE * f = asm_out_file;
15631 unsigned int lrm_count = 0;
15632 int really_return = (sibling == NULL);
15634 arm_stack_offsets *offsets;
15636 /* If we have already generated the return instruction
15637 then it is futile to generate anything else. */
15638 if (use_return_insn (FALSE, sibling) &&
15639 (cfun->machine->return_used_this_function != 0))
15642 func_type = arm_current_func_type ();
15644 if (IS_NAKED (func_type))
15645 /* Naked functions don't have epilogues. */
15648 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15652 /* A volatile function should never return. Call abort. */
15653 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15654 assemble_external_libcall (op);
15655 output_asm_insn ("bl\t%a0", &op);
15660 /* If we are throwing an exception, then we really must be doing a
15661 return, so we can't tail-call. */
15662 gcc_assert (!crtl->calls_eh_return || really_return);
15664 offsets = arm_get_frame_offsets ();
15665 saved_regs_mask = offsets->saved_regs_mask;
15668 lrm_count = bit_count (saved_regs_mask);
15670 floats_offset = offsets->saved_args;
15671 /* Compute how far away the floats will be. */
15672 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15673 if (saved_regs_mask & (1 << reg))
15674 floats_offset += 4;
15676 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15678 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15679 int vfp_offset = offsets->frame;
15681 if (TARGET_FPA_EMU2)
15683 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15684 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15686 floats_offset += 12;
15687 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15688 reg, FP_REGNUM, floats_offset - vfp_offset);
15693 start_reg = LAST_FPA_REGNUM;
15695 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15697 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15699 floats_offset += 12;
15701 /* We can't unstack more than four registers at once. */
15702 if (start_reg - reg == 3)
15704 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15705 reg, FP_REGNUM, floats_offset - vfp_offset);
15706 start_reg = reg - 1;
15711 if (reg != start_reg)
15712 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15713 reg + 1, start_reg - reg,
15714 FP_REGNUM, floats_offset - vfp_offset);
15715 start_reg = reg - 1;
15719 /* Just in case the last register checked also needs unstacking. */
15720 if (reg != start_reg)
15721 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15722 reg + 1, start_reg - reg,
15723 FP_REGNUM, floats_offset - vfp_offset);
15726 if (TARGET_HARD_FLOAT && TARGET_VFP)
15730 /* The fldmd insns do not have base+offset addressing
15731 modes, so we use IP to hold the address. */
15732 saved_size = arm_get_vfp_saved_size ();
15734 if (saved_size > 0)
15736 floats_offset += saved_size;
15737 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15738 FP_REGNUM, floats_offset - vfp_offset);
15740 start_reg = FIRST_VFP_REGNUM;
15741 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15743 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15744 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15746 if (start_reg != reg)
15747 vfp_output_fldmd (f, IP_REGNUM,
15748 (start_reg - FIRST_VFP_REGNUM) / 2,
15749 (reg - start_reg) / 2);
15750 start_reg = reg + 2;
15753 if (start_reg != reg)
15754 vfp_output_fldmd (f, IP_REGNUM,
15755 (start_reg - FIRST_VFP_REGNUM) / 2,
15756 (reg - start_reg) / 2);
15761 /* The frame pointer is guaranteed to be non-double-word aligned.
15762 This is because it is set to (old_stack_pointer - 4) and the
15763 old_stack_pointer was double word aligned. Thus the offset to
15764 the iWMMXt registers to be loaded must also be non-double-word
15765 sized, so that the resultant address *is* double-word aligned.
15766 We can ignore floats_offset since that was already included in
15767 the live_regs_mask. */
15768 lrm_count += (lrm_count % 2 ? 2 : 1);
15770 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15771 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15773 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15774 reg, FP_REGNUM, lrm_count * 4);
15779 /* saved_regs_mask should contain the IP, which at the time of stack
15780 frame generation actually contains the old stack pointer. So a
15781 quick way to unwind the stack is just pop the IP register directly
15782 into the stack pointer. */
15783 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15784 saved_regs_mask &= ~ (1 << IP_REGNUM);
15785 saved_regs_mask |= (1 << SP_REGNUM);
15787 /* There are two registers left in saved_regs_mask - LR and PC. We
15788 only need to restore the LR register (the return address), but to
15789 save time we can load it directly into the PC, unless we need a
15790 special function exit sequence, or we are not really returning. */
15792 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15793 && !crtl->calls_eh_return)
15794 /* Delete the LR from the register mask, so that the LR on
15795 the stack is loaded into the PC in the register mask. */
15796 saved_regs_mask &= ~ (1 << LR_REGNUM);
15798 saved_regs_mask &= ~ (1 << PC_REGNUM);
15800 /* We must use SP as the base register, because SP is one of the
15801 registers being restored. If an interrupt or page fault
15802 happens in the ldm instruction, the SP might or might not
15803 have been restored. That would be bad, as then SP will no
15804 longer indicate the safe area of stack, and we can get stack
15805 corruption. Using SP as the base register means that it will
15806 be reset correctly to the original value, should an interrupt
15807 occur. If the stack pointer already points at the right
15808 place, then omit the subtraction. */
15809 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15810 || cfun->calls_alloca)
15811 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15812 4 * bit_count (saved_regs_mask));
15813 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15815 if (IS_INTERRUPT (func_type))
15816 /* Interrupt handlers will have pushed the
15817 IP onto the stack, so restore it now. */
15818 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15822 /* This branch is executed for ARM mode (non-apcs frames) and
15823 Thumb-2 mode. Frame layout is essentially the same for those
15824 cases, except that in ARM mode frame pointer points to the
15825 first saved register, while in Thumb-2 mode the frame pointer points
15826 to the last saved register.
15828 It is possible to make frame pointer point to last saved
15829 register in both cases, and remove some conditionals below.
15830 That means that fp setup in prologue would be just "mov fp, sp"
15831 and sp restore in epilogue would be just "mov sp, fp", whereas
15832 now we have to use add/sub in those cases. However, the value
15833 of that would be marginal, as both mov and add/sub are 32-bit
15834 in ARM mode, and it would require extra conditionals
15835 in arm_expand_prologue to distingish ARM-apcs-frame case
15836 (where frame pointer is required to point at first register)
15837 and ARM-non-apcs-frame. Therefore, such change is postponed
15838 until real need arise. */
15839 unsigned HOST_WIDE_INT amount;
15841 /* Restore stack pointer if necessary. */
15842 if (TARGET_ARM && frame_pointer_needed)
15844 operands[0] = stack_pointer_rtx;
15845 operands[1] = hard_frame_pointer_rtx;
15847 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15848 output_add_immediate (operands);
15852 if (frame_pointer_needed)
15854 /* For Thumb-2 restore sp from the frame pointer.
15855 Operand restrictions mean we have to incrememnt FP, then copy
15857 amount = offsets->locals_base - offsets->saved_regs;
15858 operands[0] = hard_frame_pointer_rtx;
15862 unsigned long count;
15863 operands[0] = stack_pointer_rtx;
15864 amount = offsets->outgoing_args - offsets->saved_regs;
15865 /* pop call clobbered registers if it avoids a
15866 separate stack adjustment. */
15867 count = offsets->saved_regs - offsets->saved_args;
15870 && !crtl->calls_eh_return
15871 && bit_count(saved_regs_mask) * 4 == count
15872 && !IS_INTERRUPT (func_type)
15873 && !IS_STACKALIGN (func_type)
15874 && !crtl->tail_call_emit)
15876 unsigned long mask;
15877 /* Preserve return values, of any size. */
15878 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15880 mask &= ~saved_regs_mask;
15882 while (bit_count (mask) * 4 > amount)
15884 while ((mask & (1 << reg)) == 0)
15886 mask &= ~(1 << reg);
15888 if (bit_count (mask) * 4 == amount) {
15890 saved_regs_mask |= mask;
15897 operands[1] = operands[0];
15898 operands[2] = GEN_INT (amount);
15899 output_add_immediate (operands);
15901 if (frame_pointer_needed)
15902 asm_fprintf (f, "\tmov\t%r, %r\n",
15903 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15906 if (TARGET_FPA_EMU2)
15908 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15909 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15910 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15915 start_reg = FIRST_FPA_REGNUM;
15917 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15919 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15921 if (reg - start_reg == 3)
15923 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15924 start_reg, SP_REGNUM);
15925 start_reg = reg + 1;
15930 if (reg != start_reg)
15931 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15932 start_reg, reg - start_reg,
15935 start_reg = reg + 1;
15939 /* Just in case the last register checked also needs unstacking. */
15940 if (reg != start_reg)
15941 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15942 start_reg, reg - start_reg, SP_REGNUM);
15945 if (TARGET_HARD_FLOAT && TARGET_VFP)
15947 int end_reg = LAST_VFP_REGNUM + 1;
15949 /* Scan the registers in reverse order. We need to match
15950 any groupings made in the prologue and generate matching
15952 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15954 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15955 && (!df_regs_ever_live_p (reg + 1)
15956 || call_used_regs[reg + 1]))
15958 if (end_reg > reg + 2)
15959 vfp_output_fldmd (f, SP_REGNUM,
15960 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15961 (end_reg - (reg + 2)) / 2);
15965 if (end_reg > reg + 2)
15966 vfp_output_fldmd (f, SP_REGNUM, 0,
15967 (end_reg - (reg + 2)) / 2);
15971 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15972 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15973 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15975 /* If we can, restore the LR into the PC. */
15976 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15977 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15978 && !IS_STACKALIGN (func_type)
15980 && crtl->args.pretend_args_size == 0
15981 && saved_regs_mask & (1 << LR_REGNUM)
15982 && !crtl->calls_eh_return)
15984 saved_regs_mask &= ~ (1 << LR_REGNUM);
15985 saved_regs_mask |= (1 << PC_REGNUM);
15986 rfe = IS_INTERRUPT (func_type);
15991 /* Load the registers off the stack. If we only have one register
15992 to load use the LDR instruction - it is faster. For Thumb-2
15993 always use pop and the assembler will pick the best instruction.*/
15994 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15995 && !IS_INTERRUPT(func_type))
15997 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15999 else if (saved_regs_mask)
16001 if (saved_regs_mask & (1 << SP_REGNUM))
16002 /* Note - write back to the stack register is not enabled
16003 (i.e. "ldmfd sp!..."). We know that the stack pointer is
16004 in the list of registers and if we add writeback the
16005 instruction becomes UNPREDICTABLE. */
16006 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
16008 else if (TARGET_ARM)
16009 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
16012 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
16015 if (crtl->args.pretend_args_size)
16017 /* Unwind the pre-pushed regs. */
16018 operands[0] = operands[1] = stack_pointer_rtx;
16019 operands[2] = GEN_INT (crtl->args.pretend_args_size);
16020 output_add_immediate (operands);
16024 /* We may have already restored PC directly from the stack. */
16025 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16028 /* Stack adjustment for exception handler. */
16029 if (crtl->calls_eh_return)
16030 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16031 ARM_EH_STACKADJ_REGNUM);
16033 /* Generate the return instruction. */
16034 switch ((int) ARM_FUNC_TYPE (func_type))
16038 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16041 case ARM_FT_EXCEPTION:
16042 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16045 case ARM_FT_INTERWORKED:
16046 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16050 if (IS_STACKALIGN (func_type))
16052 /* See comment in arm_expand_prologue. */
16053 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16055 if (arm_arch5 || arm_arch4t)
16056 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16058 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16066 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16067 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16069 arm_stack_offsets *offsets;
16075 /* Emit any call-via-reg trampolines that are needed for v4t support
16076 of call_reg and call_value_reg type insns. */
16077 for (regno = 0; regno < LR_REGNUM; regno++)
16079 rtx label = cfun->machine->call_via[regno];
16083 switch_to_section (function_section (current_function_decl));
16084 targetm.asm_out.internal_label (asm_out_file, "L",
16085 CODE_LABEL_NUMBER (label));
16086 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16090 /* ??? Probably not safe to set this here, since it assumes that a
16091 function will be emitted as assembly immediately after we generate
16092 RTL for it. This does not happen for inline functions. */
16093 cfun->machine->return_used_this_function = 0;
16095 else /* TARGET_32BIT */
16097 /* We need to take into account any stack-frame rounding. */
16098 offsets = arm_get_frame_offsets ();
16100 gcc_assert (!use_return_insn (FALSE, NULL)
16101 || (cfun->machine->return_used_this_function != 0)
16102 || offsets->saved_regs == offsets->outgoing_args
16103 || frame_pointer_needed);
16105 /* Reset the ARM-specific per-function variables. */
16106 after_arm_reorg = 0;
16110 /* Generate and emit an insn that we will recognize as a push_multi.
16111 Unfortunately, since this insn does not reflect very well the actual
16112 semantics of the operation, we need to annotate the insn for the benefit
16113 of DWARF2 frame unwind information. */
16115 emit_multi_reg_push (unsigned long mask)
16118 int num_dwarf_regs;
16122 int dwarf_par_index;
16125 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16126 if (mask & (1 << i))
16129 gcc_assert (num_regs && num_regs <= 16);
16131 /* We don't record the PC in the dwarf frame information. */
16132 num_dwarf_regs = num_regs;
16133 if (mask & (1 << PC_REGNUM))
16136 /* For the body of the insn we are going to generate an UNSPEC in
16137 parallel with several USEs. This allows the insn to be recognized
16138 by the push_multi pattern in the arm.md file.
16140 The body of the insn looks something like this:
16143 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16144 (const_int:SI <num>)))
16145 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16151 For the frame note however, we try to be more explicit and actually
16152 show each register being stored into the stack frame, plus a (single)
16153 decrement of the stack pointer. We do it this way in order to be
16154 friendly to the stack unwinding code, which only wants to see a single
16155 stack decrement per instruction. The RTL we generate for the note looks
16156 something like this:
16159 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16160 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16161 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16162 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16166 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16167 instead we'd have a parallel expression detailing all
16168 the stores to the various memory addresses so that debug
16169 information is more up-to-date. Remember however while writing
16170 this to take care of the constraints with the push instruction.
16172 Note also that this has to be taken care of for the VFP registers.
16174 For more see PR43399. */
16176 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16177 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16178 dwarf_par_index = 1;
16180 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16182 if (mask & (1 << i))
16184 reg = gen_rtx_REG (SImode, i);
16186 XVECEXP (par, 0, 0)
16187 = gen_rtx_SET (VOIDmode,
16190 gen_rtx_PRE_MODIFY (Pmode,
16193 (stack_pointer_rtx,
16196 gen_rtx_UNSPEC (BLKmode,
16197 gen_rtvec (1, reg),
16198 UNSPEC_PUSH_MULT));
16200 if (i != PC_REGNUM)
16202 tmp = gen_rtx_SET (VOIDmode,
16203 gen_frame_mem (SImode, stack_pointer_rtx),
16205 RTX_FRAME_RELATED_P (tmp) = 1;
16206 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16214 for (j = 1, i++; j < num_regs; i++)
16216 if (mask & (1 << i))
16218 reg = gen_rtx_REG (SImode, i);
16220 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16222 if (i != PC_REGNUM)
16225 = gen_rtx_SET (VOIDmode,
16228 plus_constant (stack_pointer_rtx,
16231 RTX_FRAME_RELATED_P (tmp) = 1;
16232 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16239 par = emit_insn (par);
16241 tmp = gen_rtx_SET (VOIDmode,
16243 plus_constant (stack_pointer_rtx, -4 * num_regs));
16244 RTX_FRAME_RELATED_P (tmp) = 1;
16245 XVECEXP (dwarf, 0, 0) = tmp;
16247 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16252 /* Calculate the size of the return value that is passed in registers. */
16254 arm_size_return_regs (void)
16256 enum machine_mode mode;
16258 if (crtl->return_rtx != 0)
16259 mode = GET_MODE (crtl->return_rtx);
16261 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16263 return GET_MODE_SIZE (mode);
16267 emit_sfm (int base_reg, int count)
16274 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16275 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16277 reg = gen_rtx_REG (XFmode, base_reg++);
16279 XVECEXP (par, 0, 0)
16280 = gen_rtx_SET (VOIDmode,
16283 gen_rtx_PRE_MODIFY (Pmode,
16286 (stack_pointer_rtx,
16289 gen_rtx_UNSPEC (BLKmode,
16290 gen_rtvec (1, reg),
16291 UNSPEC_PUSH_MULT));
16292 tmp = gen_rtx_SET (VOIDmode,
16293 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16294 RTX_FRAME_RELATED_P (tmp) = 1;
16295 XVECEXP (dwarf, 0, 1) = tmp;
16297 for (i = 1; i < count; i++)
16299 reg = gen_rtx_REG (XFmode, base_reg++);
16300 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16302 tmp = gen_rtx_SET (VOIDmode,
16303 gen_frame_mem (XFmode,
16304 plus_constant (stack_pointer_rtx,
16307 RTX_FRAME_RELATED_P (tmp) = 1;
16308 XVECEXP (dwarf, 0, i + 1) = tmp;
16311 tmp = gen_rtx_SET (VOIDmode,
16313 plus_constant (stack_pointer_rtx, -12 * count));
16315 RTX_FRAME_RELATED_P (tmp) = 1;
16316 XVECEXP (dwarf, 0, 0) = tmp;
16318 par = emit_insn (par);
16319 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16325 /* Return true if the current function needs to save/restore LR. */
16328 thumb_force_lr_save (void)
16330 return !cfun->machine->lr_save_eliminated
16331 && (!leaf_function_p ()
16332 || thumb_far_jump_used_p ()
16333 || df_regs_ever_live_p (LR_REGNUM));
16337 /* Return true if r3 is used by any of the tail call insns in the
16338 current function. */
16341 any_sibcall_uses_r3 (void)
16346 if (!crtl->tail_call_emit)
16348 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16349 if (e->flags & EDGE_SIBCALL)
16351 rtx call = BB_END (e->src);
16352 if (!CALL_P (call))
16353 call = prev_nonnote_nondebug_insn (call);
16354 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16355 if (find_regno_fusage (call, USE, 3))
16362 /* Compute the distance from register FROM to register TO.
16363 These can be the arg pointer (26), the soft frame pointer (25),
16364 the stack pointer (13) or the hard frame pointer (11).
16365 In thumb mode r7 is used as the soft frame pointer, if needed.
16366 Typical stack layout looks like this:
16368 old stack pointer -> | |
16371 | | saved arguments for
16372 | | vararg functions
16375 hard FP & arg pointer -> | | \
16383 soft frame pointer -> | | /
16388 locals base pointer -> | | /
16393 current stack pointer -> | | /
16396 For a given function some or all of these stack components
16397 may not be needed, giving rise to the possibility of
16398 eliminating some of the registers.
16400 The values returned by this function must reflect the behavior
16401 of arm_expand_prologue() and arm_compute_save_reg_mask().
16403 The sign of the number returned reflects the direction of stack
16404 growth, so the values are positive for all eliminations except
16405 from the soft frame pointer to the hard frame pointer.
16407 SFP may point just inside the local variables block to ensure correct
16411 /* Calculate stack offsets. These are used to calculate register elimination
16412 offsets and in prologue/epilogue code. Also calculates which registers
16413 should be saved. */
16415 static arm_stack_offsets *
16416 arm_get_frame_offsets (void)
16418 struct arm_stack_offsets *offsets;
16419 unsigned long func_type;
16423 HOST_WIDE_INT frame_size;
16426 offsets = &cfun->machine->stack_offsets;
16428 /* We need to know if we are a leaf function. Unfortunately, it
16429 is possible to be called after start_sequence has been called,
16430 which causes get_insns to return the insns for the sequence,
16431 not the function, which will cause leaf_function_p to return
16432 the incorrect result.
16434 to know about leaf functions once reload has completed, and the
16435 frame size cannot be changed after that time, so we can safely
16436 use the cached value. */
16438 if (reload_completed)
16441 /* Initially this is the size of the local variables. It will translated
16442 into an offset once we have determined the size of preceding data. */
16443 frame_size = ROUND_UP_WORD (get_frame_size ());
16445 leaf = leaf_function_p ();
16447 /* Space for variadic functions. */
16448 offsets->saved_args = crtl->args.pretend_args_size;
16450 /* In Thumb mode this is incorrect, but never used. */
16451 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16452 arm_compute_static_chain_stack_bytes();
16456 unsigned int regno;
16458 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16459 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16460 saved = core_saved;
16462 /* We know that SP will be doubleword aligned on entry, and we must
16463 preserve that condition at any subroutine call. We also require the
16464 soft frame pointer to be doubleword aligned. */
16466 if (TARGET_REALLY_IWMMXT)
16468 /* Check for the call-saved iWMMXt registers. */
16469 for (regno = FIRST_IWMMXT_REGNUM;
16470 regno <= LAST_IWMMXT_REGNUM;
16472 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16476 func_type = arm_current_func_type ();
16477 if (! IS_VOLATILE (func_type))
16479 /* Space for saved FPA registers. */
16480 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16481 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16484 /* Space for saved VFP registers. */
16485 if (TARGET_HARD_FLOAT && TARGET_VFP)
16486 saved += arm_get_vfp_saved_size ();
16489 else /* TARGET_THUMB1 */
16491 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16492 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16493 saved = core_saved;
16494 if (TARGET_BACKTRACE)
16498 /* Saved registers include the stack frame. */
16499 offsets->saved_regs = offsets->saved_args + saved +
16500 arm_compute_static_chain_stack_bytes();
16501 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16502 /* A leaf function does not need any stack alignment if it has nothing
16504 if (leaf && frame_size == 0
16505 /* However if it calls alloca(), we have a dynamically allocated
16506 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16507 && ! cfun->calls_alloca)
16509 offsets->outgoing_args = offsets->soft_frame;
16510 offsets->locals_base = offsets->soft_frame;
16514 /* Ensure SFP has the correct alignment. */
16515 if (ARM_DOUBLEWORD_ALIGN
16516 && (offsets->soft_frame & 7))
16518 offsets->soft_frame += 4;
16519 /* Try to align stack by pushing an extra reg. Don't bother doing this
16520 when there is a stack frame as the alignment will be rolled into
16521 the normal stack adjustment. */
16522 if (frame_size + crtl->outgoing_args_size == 0)
16526 /* If it is safe to use r3, then do so. This sometimes
16527 generates better code on Thumb-2 by avoiding the need to
16528 use 32-bit push/pop instructions. */
16529 if (! any_sibcall_uses_r3 ()
16530 && arm_size_return_regs () <= 12
16531 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16536 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16538 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16547 offsets->saved_regs += 4;
16548 offsets->saved_regs_mask |= (1 << reg);
16553 offsets->locals_base = offsets->soft_frame + frame_size;
16554 offsets->outgoing_args = (offsets->locals_base
16555 + crtl->outgoing_args_size);
16557 if (ARM_DOUBLEWORD_ALIGN)
16559 /* Ensure SP remains doubleword aligned. */
16560 if (offsets->outgoing_args & 7)
16561 offsets->outgoing_args += 4;
16562 gcc_assert (!(offsets->outgoing_args & 7));
16569 /* Calculate the relative offsets for the different stack pointers. Positive
16570 offsets are in the direction of stack growth. */
16573 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16575 arm_stack_offsets *offsets;
16577 offsets = arm_get_frame_offsets ();
16579 /* OK, now we have enough information to compute the distances.
16580 There must be an entry in these switch tables for each pair
16581 of registers in ELIMINABLE_REGS, even if some of the entries
16582 seem to be redundant or useless. */
16585 case ARG_POINTER_REGNUM:
16588 case THUMB_HARD_FRAME_POINTER_REGNUM:
16591 case FRAME_POINTER_REGNUM:
16592 /* This is the reverse of the soft frame pointer
16593 to hard frame pointer elimination below. */
16594 return offsets->soft_frame - offsets->saved_args;
16596 case ARM_HARD_FRAME_POINTER_REGNUM:
16597 /* This is only non-zero in the case where the static chain register
16598 is stored above the frame. */
16599 return offsets->frame - offsets->saved_args - 4;
16601 case STACK_POINTER_REGNUM:
16602 /* If nothing has been pushed on the stack at all
16603 then this will return -4. This *is* correct! */
16604 return offsets->outgoing_args - (offsets->saved_args + 4);
16607 gcc_unreachable ();
16609 gcc_unreachable ();
16611 case FRAME_POINTER_REGNUM:
16614 case THUMB_HARD_FRAME_POINTER_REGNUM:
16617 case ARM_HARD_FRAME_POINTER_REGNUM:
16618 /* The hard frame pointer points to the top entry in the
16619 stack frame. The soft frame pointer to the bottom entry
16620 in the stack frame. If there is no stack frame at all,
16621 then they are identical. */
16623 return offsets->frame - offsets->soft_frame;
16625 case STACK_POINTER_REGNUM:
16626 return offsets->outgoing_args - offsets->soft_frame;
16629 gcc_unreachable ();
16631 gcc_unreachable ();
16634 /* You cannot eliminate from the stack pointer.
16635 In theory you could eliminate from the hard frame
16636 pointer to the stack pointer, but this will never
16637 happen, since if a stack frame is not needed the
16638 hard frame pointer will never be used. */
16639 gcc_unreachable ();
16643 /* Given FROM and TO register numbers, say whether this elimination is
16644 allowed. Frame pointer elimination is automatically handled.
16646 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16647 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16648 pointer, we must eliminate FRAME_POINTER_REGNUM into
16649 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16650 ARG_POINTER_REGNUM. */
16653 arm_can_eliminate (const int from, const int to)
16655 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16656 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16657 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16658 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16662 /* Emit RTL to save coprocessor registers on function entry. Returns the
16663 number of bytes pushed. */
16666 arm_save_coproc_regs(void)
16668 int saved_size = 0;
16670 unsigned start_reg;
16673 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16674 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16676 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16677 insn = gen_rtx_MEM (V2SImode, insn);
16678 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16679 RTX_FRAME_RELATED_P (insn) = 1;
16683 /* Save any floating point call-saved registers used by this
16685 if (TARGET_FPA_EMU2)
16687 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16688 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16690 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16691 insn = gen_rtx_MEM (XFmode, insn);
16692 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16693 RTX_FRAME_RELATED_P (insn) = 1;
16699 start_reg = LAST_FPA_REGNUM;
16701 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16703 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16705 if (start_reg - reg == 3)
16707 insn = emit_sfm (reg, 4);
16708 RTX_FRAME_RELATED_P (insn) = 1;
16710 start_reg = reg - 1;
16715 if (start_reg != reg)
16717 insn = emit_sfm (reg + 1, start_reg - reg);
16718 RTX_FRAME_RELATED_P (insn) = 1;
16719 saved_size += (start_reg - reg) * 12;
16721 start_reg = reg - 1;
16725 if (start_reg != reg)
16727 insn = emit_sfm (reg + 1, start_reg - reg);
16728 saved_size += (start_reg - reg) * 12;
16729 RTX_FRAME_RELATED_P (insn) = 1;
16732 if (TARGET_HARD_FLOAT && TARGET_VFP)
16734 start_reg = FIRST_VFP_REGNUM;
16736 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16738 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16739 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16741 if (start_reg != reg)
16742 saved_size += vfp_emit_fstmd (start_reg,
16743 (reg - start_reg) / 2);
16744 start_reg = reg + 2;
16747 if (start_reg != reg)
16748 saved_size += vfp_emit_fstmd (start_reg,
16749 (reg - start_reg) / 2);
16755 /* Set the Thumb frame pointer from the stack pointer. */
16758 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16760 HOST_WIDE_INT amount;
16763 amount = offsets->outgoing_args - offsets->locals_base;
16765 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16766 stack_pointer_rtx, GEN_INT (amount)));
16769 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16770 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16771 expects the first two operands to be the same. */
16774 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16776 hard_frame_pointer_rtx));
16780 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16781 hard_frame_pointer_rtx,
16782 stack_pointer_rtx));
16784 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16785 plus_constant (stack_pointer_rtx, amount));
16786 RTX_FRAME_RELATED_P (dwarf) = 1;
16787 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16790 RTX_FRAME_RELATED_P (insn) = 1;
16793 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16796 arm_expand_prologue (void)
16801 unsigned long live_regs_mask;
16802 unsigned long func_type;
16804 int saved_pretend_args = 0;
16805 int saved_regs = 0;
16806 unsigned HOST_WIDE_INT args_to_push;
16807 arm_stack_offsets *offsets;
16809 func_type = arm_current_func_type ();
16811 /* Naked functions don't have prologues. */
16812 if (IS_NAKED (func_type))
16815 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16816 args_to_push = crtl->args.pretend_args_size;
16818 /* Compute which register we will have to save onto the stack. */
16819 offsets = arm_get_frame_offsets ();
16820 live_regs_mask = offsets->saved_regs_mask;
16822 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16824 if (IS_STACKALIGN (func_type))
16828 /* Handle a word-aligned stack pointer. We generate the following:
16833 <save and restore r0 in normal prologue/epilogue>
16837 The unwinder doesn't need to know about the stack realignment.
16838 Just tell it we saved SP in r0. */
16839 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16841 r0 = gen_rtx_REG (SImode, 0);
16842 r1 = gen_rtx_REG (SImode, 1);
16844 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16845 RTX_FRAME_RELATED_P (insn) = 1;
16846 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16848 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16850 /* ??? The CFA changes here, which may cause GDB to conclude that it
16851 has entered a different function. That said, the unwind info is
16852 correct, individually, before and after this instruction because
16853 we've described the save of SP, which will override the default
16854 handling of SP as restoring from the CFA. */
16855 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16858 /* For APCS frames, if IP register is clobbered
16859 when creating frame, save that register in a special
16861 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16863 if (IS_INTERRUPT (func_type))
16865 /* Interrupt functions must not corrupt any registers.
16866 Creating a frame pointer however, corrupts the IP
16867 register, so we must push it first. */
16868 emit_multi_reg_push (1 << IP_REGNUM);
16870 /* Do not set RTX_FRAME_RELATED_P on this insn.
16871 The dwarf stack unwinding code only wants to see one
16872 stack decrement per function, and this is not it. If
16873 this instruction is labeled as being part of the frame
16874 creation sequence then dwarf2out_frame_debug_expr will
16875 die when it encounters the assignment of IP to FP
16876 later on, since the use of SP here establishes SP as
16877 the CFA register and not IP.
16879 Anyway this instruction is not really part of the stack
16880 frame creation although it is part of the prologue. */
16882 else if (IS_NESTED (func_type))
16884 /* The Static chain register is the same as the IP register
16885 used as a scratch register during stack frame creation.
16886 To get around this need to find somewhere to store IP
16887 whilst the frame is being created. We try the following
16890 1. The last argument register.
16891 2. A slot on the stack above the frame. (This only
16892 works if the function is not a varargs function).
16893 3. Register r3, after pushing the argument registers
16896 Note - we only need to tell the dwarf2 backend about the SP
16897 adjustment in the second variant; the static chain register
16898 doesn't need to be unwound, as it doesn't contain a value
16899 inherited from the caller. */
16901 if (df_regs_ever_live_p (3) == false)
16902 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16903 else if (args_to_push == 0)
16907 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16910 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16911 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16914 /* Just tell the dwarf backend that we adjusted SP. */
16915 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16916 plus_constant (stack_pointer_rtx,
16918 RTX_FRAME_RELATED_P (insn) = 1;
16919 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16923 /* Store the args on the stack. */
16924 if (cfun->machine->uses_anonymous_args)
16925 insn = emit_multi_reg_push
16926 ((0xf0 >> (args_to_push / 4)) & 0xf);
16929 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16930 GEN_INT (- args_to_push)));
16932 RTX_FRAME_RELATED_P (insn) = 1;
16934 saved_pretend_args = 1;
16935 fp_offset = args_to_push;
16938 /* Now reuse r3 to preserve IP. */
16939 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16943 insn = emit_set_insn (ip_rtx,
16944 plus_constant (stack_pointer_rtx, fp_offset));
16945 RTX_FRAME_RELATED_P (insn) = 1;
16950 /* Push the argument registers, or reserve space for them. */
16951 if (cfun->machine->uses_anonymous_args)
16952 insn = emit_multi_reg_push
16953 ((0xf0 >> (args_to_push / 4)) & 0xf);
16956 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16957 GEN_INT (- args_to_push)));
16958 RTX_FRAME_RELATED_P (insn) = 1;
16961 /* If this is an interrupt service routine, and the link register
16962 is going to be pushed, and we're not generating extra
16963 push of IP (needed when frame is needed and frame layout if apcs),
16964 subtracting four from LR now will mean that the function return
16965 can be done with a single instruction. */
16966 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16967 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16968 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16971 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16973 emit_set_insn (lr, plus_constant (lr, -4));
16976 if (live_regs_mask)
16978 saved_regs += bit_count (live_regs_mask) * 4;
16979 if (optimize_size && !frame_pointer_needed
16980 && saved_regs == offsets->saved_regs - offsets->saved_args)
16982 /* If no coprocessor registers are being pushed and we don't have
16983 to worry about a frame pointer then push extra registers to
16984 create the stack frame. This is done is a way that does not
16985 alter the frame layout, so is independent of the epilogue. */
16989 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16991 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16992 if (frame && n * 4 >= frame)
16995 live_regs_mask |= (1 << n) - 1;
16996 saved_regs += frame;
16999 insn = emit_multi_reg_push (live_regs_mask);
17000 RTX_FRAME_RELATED_P (insn) = 1;
17003 if (! IS_VOLATILE (func_type))
17004 saved_regs += arm_save_coproc_regs ();
17006 if (frame_pointer_needed && TARGET_ARM)
17008 /* Create the new frame pointer. */
17009 if (TARGET_APCS_FRAME)
17011 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17012 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17013 RTX_FRAME_RELATED_P (insn) = 1;
17015 if (IS_NESTED (func_type))
17017 /* Recover the static chain register. */
17018 if (!df_regs_ever_live_p (3)
17019 || saved_pretend_args)
17020 insn = gen_rtx_REG (SImode, 3);
17021 else /* if (crtl->args.pretend_args_size == 0) */
17023 insn = plus_constant (hard_frame_pointer_rtx, 4);
17024 insn = gen_frame_mem (SImode, insn);
17026 emit_set_insn (ip_rtx, insn);
17027 /* Add a USE to stop propagate_one_insn() from barfing. */
17028 emit_insn (gen_prologue_use (ip_rtx));
17033 insn = GEN_INT (saved_regs - 4);
17034 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17035 stack_pointer_rtx, insn));
17036 RTX_FRAME_RELATED_P (insn) = 1;
17040 if (flag_stack_usage_info)
17041 current_function_static_stack_size
17042 = offsets->outgoing_args - offsets->saved_args;
17044 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17046 /* This add can produce multiple insns for a large constant, so we
17047 need to get tricky. */
17048 rtx last = get_last_insn ();
17050 amount = GEN_INT (offsets->saved_args + saved_regs
17051 - offsets->outgoing_args);
17053 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17057 last = last ? NEXT_INSN (last) : get_insns ();
17058 RTX_FRAME_RELATED_P (last) = 1;
17060 while (last != insn);
17062 /* If the frame pointer is needed, emit a special barrier that
17063 will prevent the scheduler from moving stores to the frame
17064 before the stack adjustment. */
17065 if (frame_pointer_needed)
17066 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17067 hard_frame_pointer_rtx));
17071 if (frame_pointer_needed && TARGET_THUMB2)
17072 thumb_set_frame_pointer (offsets);
17074 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17076 unsigned long mask;
17078 mask = live_regs_mask;
17079 mask &= THUMB2_WORK_REGS;
17080 if (!IS_NESTED (func_type))
17081 mask |= (1 << IP_REGNUM);
17082 arm_load_pic_register (mask);
17085 /* If we are profiling, make sure no instructions are scheduled before
17086 the call to mcount. Similarly if the user has requested no
17087 scheduling in the prolog. Similarly if we want non-call exceptions
17088 using the EABI unwinder, to prevent faulting instructions from being
17089 swapped with a stack adjustment. */
17090 if (crtl->profile || !TARGET_SCHED_PROLOG
17091 || (arm_except_unwind_info (&global_options) == UI_TARGET
17092 && cfun->can_throw_non_call_exceptions))
17093 emit_insn (gen_blockage ());
17095 /* If the link register is being kept alive, with the return address in it,
17096 then make sure that it does not get reused by the ce2 pass. */
17097 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17098 cfun->machine->lr_save_eliminated = 1;
17101 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17103 arm_print_condition (FILE *stream)
17105 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17107 /* Branch conversion is not implemented for Thumb-2. */
17110 output_operand_lossage ("predicated Thumb instruction");
17113 if (current_insn_predicate != NULL)
17115 output_operand_lossage
17116 ("predicated instruction in conditional sequence");
17120 fputs (arm_condition_codes[arm_current_cc], stream);
17122 else if (current_insn_predicate)
17124 enum arm_cond_code code;
17128 output_operand_lossage ("predicated Thumb instruction");
17132 code = get_arm_condition_code (current_insn_predicate);
17133 fputs (arm_condition_codes[code], stream);
17138 /* If CODE is 'd', then the X is a condition operand and the instruction
17139 should only be executed if the condition is true.
17140 if CODE is 'D', then the X is a condition operand and the instruction
17141 should only be executed if the condition is false: however, if the mode
17142 of the comparison is CCFPEmode, then always execute the instruction -- we
17143 do this because in these circumstances !GE does not necessarily imply LT;
17144 in these cases the instruction pattern will take care to make sure that
17145 an instruction containing %d will follow, thereby undoing the effects of
17146 doing this instruction unconditionally.
17147 If CODE is 'N' then X is a floating point operand that must be negated
17149 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17150 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17152 arm_print_operand (FILE *stream, rtx x, int code)
17157 fputs (ASM_COMMENT_START, stream);
17161 fputs (user_label_prefix, stream);
17165 fputs (REGISTER_PREFIX, stream);
17169 arm_print_condition (stream);
17173 /* Nothing in unified syntax, otherwise the current condition code. */
17174 if (!TARGET_UNIFIED_ASM)
17175 arm_print_condition (stream);
17179 /* The current condition code in unified syntax, otherwise nothing. */
17180 if (TARGET_UNIFIED_ASM)
17181 arm_print_condition (stream);
17185 /* The current condition code for a condition code setting instruction.
17186 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17187 if (TARGET_UNIFIED_ASM)
17189 fputc('s', stream);
17190 arm_print_condition (stream);
17194 arm_print_condition (stream);
17195 fputc('s', stream);
17200 /* If the instruction is conditionally executed then print
17201 the current condition code, otherwise print 's'. */
17202 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17203 if (current_insn_predicate)
17204 arm_print_condition (stream);
17206 fputc('s', stream);
17209 /* %# is a "break" sequence. It doesn't output anything, but is used to
17210 separate e.g. operand numbers from following text, if that text consists
17211 of further digits which we don't want to be part of the operand
17219 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17220 r = real_value_negate (&r);
17221 fprintf (stream, "%s", fp_const_from_val (&r));
17225 /* An integer or symbol address without a preceding # sign. */
17227 switch (GET_CODE (x))
17230 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17234 output_addr_const (stream, x);
17238 if (GET_CODE (XEXP (x, 0)) == PLUS
17239 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17241 output_addr_const (stream, x);
17244 /* Fall through. */
17247 output_operand_lossage ("Unsupported operand for code '%c'", code);
17252 if (GET_CODE (x) == CONST_INT)
17255 val = ARM_SIGN_EXTEND (~INTVAL (x));
17256 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17260 putc ('~', stream);
17261 output_addr_const (stream, x);
17266 /* The low 16 bits of an immediate constant. */
17267 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17271 fprintf (stream, "%s", arithmetic_instr (x, 1));
17274 /* Truncate Cirrus shift counts. */
17276 if (GET_CODE (x) == CONST_INT)
17278 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17281 arm_print_operand (stream, x, 0);
17285 fprintf (stream, "%s", arithmetic_instr (x, 0));
17293 if (!shift_operator (x, SImode))
17295 output_operand_lossage ("invalid shift operand");
17299 shift = shift_op (x, &val);
17303 fprintf (stream, ", %s ", shift);
17305 arm_print_operand (stream, XEXP (x, 1), 0);
17307 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17312 /* An explanation of the 'Q', 'R' and 'H' register operands:
17314 In a pair of registers containing a DI or DF value the 'Q'
17315 operand returns the register number of the register containing
17316 the least significant part of the value. The 'R' operand returns
17317 the register number of the register containing the most
17318 significant part of the value.
17320 The 'H' operand returns the higher of the two register numbers.
17321 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17322 same as the 'Q' operand, since the most significant part of the
17323 value is held in the lower number register. The reverse is true
17324 on systems where WORDS_BIG_ENDIAN is false.
17326 The purpose of these operands is to distinguish between cases
17327 where the endian-ness of the values is important (for example
17328 when they are added together), and cases where the endian-ness
17329 is irrelevant, but the order of register operations is important.
17330 For example when loading a value from memory into a register
17331 pair, the endian-ness does not matter. Provided that the value
17332 from the lower memory address is put into the lower numbered
17333 register, and the value from the higher address is put into the
17334 higher numbered register, the load will work regardless of whether
17335 the value being loaded is big-wordian or little-wordian. The
17336 order of the two register loads can matter however, if the address
17337 of the memory location is actually held in one of the registers
17338 being overwritten by the load.
17340 The 'Q' and 'R' constraints are also available for 64-bit
17343 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17345 rtx part = gen_lowpart (SImode, x);
17346 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17350 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17352 output_operand_lossage ("invalid operand for code '%c'", code);
17356 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17360 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17362 enum machine_mode mode = GET_MODE (x);
17365 if (mode == VOIDmode)
17367 part = gen_highpart_mode (SImode, mode, x);
17368 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17372 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17374 output_operand_lossage ("invalid operand for code '%c'", code);
17378 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17382 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17384 output_operand_lossage ("invalid operand for code '%c'", code);
17388 asm_fprintf (stream, "%r", REGNO (x) + 1);
17392 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17394 output_operand_lossage ("invalid operand for code '%c'", code);
17398 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17402 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17404 output_operand_lossage ("invalid operand for code '%c'", code);
17408 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17412 asm_fprintf (stream, "%r",
17413 GET_CODE (XEXP (x, 0)) == REG
17414 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17418 asm_fprintf (stream, "{%r-%r}",
17420 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17423 /* Like 'M', but writing doubleword vector registers, for use by Neon
17427 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17428 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17430 asm_fprintf (stream, "{d%d}", regno);
17432 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17437 /* CONST_TRUE_RTX means always -- that's the default. */
17438 if (x == const_true_rtx)
17441 if (!COMPARISON_P (x))
17443 output_operand_lossage ("invalid operand for code '%c'", code);
17447 fputs (arm_condition_codes[get_arm_condition_code (x)],
17452 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17453 want to do that. */
17454 if (x == const_true_rtx)
17456 output_operand_lossage ("instruction never executed");
17459 if (!COMPARISON_P (x))
17461 output_operand_lossage ("invalid operand for code '%c'", code);
17465 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17466 (get_arm_condition_code (x))],
17470 /* Cirrus registers can be accessed in a variety of ways:
17471 single floating point (f)
17472 double floating point (d)
17474 64bit integer (dx). */
17475 case 'W': /* Cirrus register in F mode. */
17476 case 'X': /* Cirrus register in D mode. */
17477 case 'Y': /* Cirrus register in FX mode. */
17478 case 'Z': /* Cirrus register in DX mode. */
17479 gcc_assert (GET_CODE (x) == REG
17480 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17482 fprintf (stream, "mv%s%s",
17484 : code == 'X' ? "d"
17485 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17489 /* Print cirrus register in the mode specified by the register's mode. */
17492 int mode = GET_MODE (x);
17494 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17496 output_operand_lossage ("invalid operand for code '%c'", code);
17500 fprintf (stream, "mv%s%s",
17501 mode == DFmode ? "d"
17502 : mode == SImode ? "fx"
17503 : mode == DImode ? "dx"
17504 : "f", reg_names[REGNO (x)] + 2);
17510 if (GET_CODE (x) != REG
17511 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17512 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17513 /* Bad value for wCG register number. */
17515 output_operand_lossage ("invalid operand for code '%c'", code);
17520 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17523 /* Print an iWMMXt control register name. */
17525 if (GET_CODE (x) != CONST_INT
17527 || INTVAL (x) >= 16)
17528 /* Bad value for wC register number. */
17530 output_operand_lossage ("invalid operand for code '%c'", code);
17536 static const char * wc_reg_names [16] =
17538 "wCID", "wCon", "wCSSF", "wCASF",
17539 "wC4", "wC5", "wC6", "wC7",
17540 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17541 "wC12", "wC13", "wC14", "wC15"
17544 fprintf (stream, wc_reg_names [INTVAL (x)]);
17548 /* Print the high single-precision register of a VFP double-precision
17552 int mode = GET_MODE (x);
17555 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17557 output_operand_lossage ("invalid operand for code '%c'", code);
17562 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17564 output_operand_lossage ("invalid operand for code '%c'", code);
17568 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17572 /* Print a VFP/Neon double precision or quad precision register name. */
17576 int mode = GET_MODE (x);
17577 int is_quad = (code == 'q');
17580 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17582 output_operand_lossage ("invalid operand for code '%c'", code);
17586 if (GET_CODE (x) != REG
17587 || !IS_VFP_REGNUM (REGNO (x)))
17589 output_operand_lossage ("invalid operand for code '%c'", code);
17594 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17595 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17597 output_operand_lossage ("invalid operand for code '%c'", code);
17601 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17602 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17606 /* These two codes print the low/high doubleword register of a Neon quad
17607 register, respectively. For pair-structure types, can also print
17608 low/high quadword registers. */
17612 int mode = GET_MODE (x);
17615 if ((GET_MODE_SIZE (mode) != 16
17616 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17618 output_operand_lossage ("invalid operand for code '%c'", code);
17623 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17625 output_operand_lossage ("invalid operand for code '%c'", code);
17629 if (GET_MODE_SIZE (mode) == 16)
17630 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17631 + (code == 'f' ? 1 : 0));
17633 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17634 + (code == 'f' ? 1 : 0));
17638 /* Print a VFPv3 floating-point constant, represented as an integer
17642 int index = vfp3_const_double_index (x);
17643 gcc_assert (index != -1);
17644 fprintf (stream, "%d", index);
17648 /* Print bits representing opcode features for Neon.
17650 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17651 and polynomials as unsigned.
17653 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17655 Bit 2 is 1 for rounding functions, 0 otherwise. */
17657 /* Identify the type as 's', 'u', 'p' or 'f'. */
17660 HOST_WIDE_INT bits = INTVAL (x);
17661 fputc ("uspf"[bits & 3], stream);
17665 /* Likewise, but signed and unsigned integers are both 'i'. */
17668 HOST_WIDE_INT bits = INTVAL (x);
17669 fputc ("iipf"[bits & 3], stream);
17673 /* As for 'T', but emit 'u' instead of 'p'. */
17676 HOST_WIDE_INT bits = INTVAL (x);
17677 fputc ("usuf"[bits & 3], stream);
17681 /* Bit 2: rounding (vs none). */
17684 HOST_WIDE_INT bits = INTVAL (x);
17685 fputs ((bits & 4) != 0 ? "r" : "", stream);
17689 /* Memory operand for vld1/vst1 instruction. */
17693 bool postinc = FALSE;
17694 unsigned align, memsize, align_bits;
17696 gcc_assert (GET_CODE (x) == MEM);
17697 addr = XEXP (x, 0);
17698 if (GET_CODE (addr) == POST_INC)
17701 addr = XEXP (addr, 0);
17703 asm_fprintf (stream, "[%r", REGNO (addr));
17705 /* We know the alignment of this access, so we can emit a hint in the
17706 instruction (for some alignments) as an aid to the memory subsystem
17708 align = MEM_ALIGN (x) >> 3;
17709 memsize = MEM_SIZE (x);
17711 /* Only certain alignment specifiers are supported by the hardware. */
17712 if (memsize == 16 && (align % 32) == 0)
17714 else if (memsize == 16 && (align % 16) == 0)
17716 else if (memsize >= 8 && (align % 8) == 0)
17721 if (align_bits != 0)
17722 asm_fprintf (stream, ":%d", align_bits);
17724 asm_fprintf (stream, "]");
17727 fputs("!", stream);
17735 gcc_assert (GET_CODE (x) == MEM);
17736 addr = XEXP (x, 0);
17737 gcc_assert (GET_CODE (addr) == REG);
17738 asm_fprintf (stream, "[%r]", REGNO (addr));
17742 /* Translate an S register number into a D register number and element index. */
17745 int mode = GET_MODE (x);
17748 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17750 output_operand_lossage ("invalid operand for code '%c'", code);
17755 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17757 output_operand_lossage ("invalid operand for code '%c'", code);
17761 regno = regno - FIRST_VFP_REGNUM;
17762 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17767 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17768 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17771 /* Register specifier for vld1.16/vst1.16. Translate the S register
17772 number into a D register number and element index. */
17775 int mode = GET_MODE (x);
17778 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17780 output_operand_lossage ("invalid operand for code '%c'", code);
17785 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17787 output_operand_lossage ("invalid operand for code '%c'", code);
17791 regno = regno - FIRST_VFP_REGNUM;
17792 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17799 output_operand_lossage ("missing operand");
17803 switch (GET_CODE (x))
17806 asm_fprintf (stream, "%r", REGNO (x));
17810 output_memory_reference_mode = GET_MODE (x);
17811 output_address (XEXP (x, 0));
17818 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17819 sizeof (fpstr), 0, 1);
17820 fprintf (stream, "#%s", fpstr);
17823 fprintf (stream, "#%s", fp_immediate_constant (x));
17827 gcc_assert (GET_CODE (x) != NEG);
17828 fputc ('#', stream);
17829 if (GET_CODE (x) == HIGH)
17831 fputs (":lower16:", stream);
17835 output_addr_const (stream, x);
17841 /* Target hook for printing a memory address. */
17843 arm_print_operand_address (FILE *stream, rtx x)
17847 int is_minus = GET_CODE (x) == MINUS;
17849 if (GET_CODE (x) == REG)
17850 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17851 else if (GET_CODE (x) == PLUS || is_minus)
17853 rtx base = XEXP (x, 0);
17854 rtx index = XEXP (x, 1);
17855 HOST_WIDE_INT offset = 0;
17856 if (GET_CODE (base) != REG
17857 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17859 /* Ensure that BASE is a register. */
17860 /* (one of them must be). */
17861 /* Also ensure the SP is not used as in index register. */
17866 switch (GET_CODE (index))
17869 offset = INTVAL (index);
17872 asm_fprintf (stream, "[%r, #%wd]",
17873 REGNO (base), offset);
17877 asm_fprintf (stream, "[%r, %s%r]",
17878 REGNO (base), is_minus ? "-" : "",
17888 asm_fprintf (stream, "[%r, %s%r",
17889 REGNO (base), is_minus ? "-" : "",
17890 REGNO (XEXP (index, 0)));
17891 arm_print_operand (stream, index, 'S');
17892 fputs ("]", stream);
17897 gcc_unreachable ();
17900 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17901 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17903 extern enum machine_mode output_memory_reference_mode;
17905 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17907 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17908 asm_fprintf (stream, "[%r, #%s%d]!",
17909 REGNO (XEXP (x, 0)),
17910 GET_CODE (x) == PRE_DEC ? "-" : "",
17911 GET_MODE_SIZE (output_memory_reference_mode));
17913 asm_fprintf (stream, "[%r], #%s%d",
17914 REGNO (XEXP (x, 0)),
17915 GET_CODE (x) == POST_DEC ? "-" : "",
17916 GET_MODE_SIZE (output_memory_reference_mode));
17918 else if (GET_CODE (x) == PRE_MODIFY)
17920 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17921 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17922 asm_fprintf (stream, "#%wd]!",
17923 INTVAL (XEXP (XEXP (x, 1), 1)));
17925 asm_fprintf (stream, "%r]!",
17926 REGNO (XEXP (XEXP (x, 1), 1)));
17928 else if (GET_CODE (x) == POST_MODIFY)
17930 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17931 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17932 asm_fprintf (stream, "#%wd",
17933 INTVAL (XEXP (XEXP (x, 1), 1)));
17935 asm_fprintf (stream, "%r",
17936 REGNO (XEXP (XEXP (x, 1), 1)));
17938 else output_addr_const (stream, x);
17942 if (GET_CODE (x) == REG)
17943 asm_fprintf (stream, "[%r]", REGNO (x));
17944 else if (GET_CODE (x) == POST_INC)
17945 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17946 else if (GET_CODE (x) == PLUS)
17948 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17950 asm_fprintf (stream, "[%r, #%wd]",
17951 REGNO (XEXP (x, 0)),
17952 INTVAL (XEXP (x, 1)));
17954 asm_fprintf (stream, "[%r, %r]",
17955 REGNO (XEXP (x, 0)),
17956 REGNO (XEXP (x, 1)));
17959 output_addr_const (stream, x);
17963 /* Target hook for indicating whether a punctuation character for
17964 TARGET_PRINT_OPERAND is valid. */
17966 arm_print_operand_punct_valid_p (unsigned char code)
17968 return (code == '@' || code == '|' || code == '.'
17969 || code == '(' || code == ')' || code == '#'
17970 || (TARGET_32BIT && (code == '?'))
17971 || (TARGET_THUMB2 && (code == '!'))
17972 || (TARGET_THUMB && (code == '_')));
17975 /* Target hook for assembling integer objects. The ARM version needs to
17976 handle word-sized values specially. */
17978 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17980 enum machine_mode mode;
17982 if (size == UNITS_PER_WORD && aligned_p)
17984 fputs ("\t.word\t", asm_out_file);
17985 output_addr_const (asm_out_file, x);
17987 /* Mark symbols as position independent. We only do this in the
17988 .text segment, not in the .data segment. */
17989 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17990 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17992 /* See legitimize_pic_address for an explanation of the
17993 TARGET_VXWORKS_RTP check. */
17994 if (TARGET_VXWORKS_RTP
17995 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17996 fputs ("(GOT)", asm_out_file);
17998 fputs ("(GOTOFF)", asm_out_file);
18000 fputc ('\n', asm_out_file);
18004 mode = GET_MODE (x);
18006 if (arm_vector_mode_supported_p (mode))
18010 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18012 units = CONST_VECTOR_NUNITS (x);
18013 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18016 for (i = 0; i < units; i++)
18018 rtx elt = CONST_VECTOR_ELT (x, i);
18020 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18023 for (i = 0; i < units; i++)
18025 rtx elt = CONST_VECTOR_ELT (x, i);
18026 REAL_VALUE_TYPE rval;
18028 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18031 (rval, GET_MODE_INNER (mode),
18032 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18038 return default_assemble_integer (x, size, aligned_p);
18042 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18046 if (!TARGET_AAPCS_BASED)
18049 default_named_section_asm_out_constructor
18050 : default_named_section_asm_out_destructor) (symbol, priority);
18054 /* Put these in the .init_array section, using a special relocation. */
18055 if (priority != DEFAULT_INIT_PRIORITY)
18058 sprintf (buf, "%s.%.5u",
18059 is_ctor ? ".init_array" : ".fini_array",
18061 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18068 switch_to_section (s);
18069 assemble_align (POINTER_SIZE);
18070 fputs ("\t.word\t", asm_out_file);
18071 output_addr_const (asm_out_file, symbol);
18072 fputs ("(target1)\n", asm_out_file);
18075 /* Add a function to the list of static constructors. */
18078 arm_elf_asm_constructor (rtx symbol, int priority)
18080 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18083 /* Add a function to the list of static destructors. */
18086 arm_elf_asm_destructor (rtx symbol, int priority)
18088 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18091 /* A finite state machine takes care of noticing whether or not instructions
18092 can be conditionally executed, and thus decrease execution time and code
18093 size by deleting branch instructions. The fsm is controlled by
18094 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18096 /* The state of the fsm controlling condition codes are:
18097 0: normal, do nothing special
18098 1: make ASM_OUTPUT_OPCODE not output this instruction
18099 2: make ASM_OUTPUT_OPCODE not output this instruction
18100 3: make instructions conditional
18101 4: make instructions conditional
18103 State transitions (state->state by whom under condition):
18104 0 -> 1 final_prescan_insn if the `target' is a label
18105 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18106 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18107 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18108 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18109 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18110 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18111 (the target insn is arm_target_insn).
18113 If the jump clobbers the conditions then we use states 2 and 4.
18115 A similar thing can be done with conditional return insns.
18117 XXX In case the `target' is an unconditional branch, this conditionalising
18118 of the instructions always reduces code size, but not always execution
18119 time. But then, I want to reduce the code size to somewhere near what
18120 /bin/cc produces. */
18122 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18123 instructions. When a COND_EXEC instruction is seen the subsequent
18124 instructions are scanned so that multiple conditional instructions can be
18125 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18126 specify the length and true/false mask for the IT block. These will be
18127 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18129 /* Returns the index of the ARM condition code string in
18130 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18131 COMPARISON should be an rtx like `(eq (...) (...))'. */
18134 maybe_get_arm_condition_code (rtx comparison)
18136 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18137 enum arm_cond_code code;
18138 enum rtx_code comp_code = GET_CODE (comparison);
18140 if (GET_MODE_CLASS (mode) != MODE_CC)
18141 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18142 XEXP (comparison, 1));
18146 case CC_DNEmode: code = ARM_NE; goto dominance;
18147 case CC_DEQmode: code = ARM_EQ; goto dominance;
18148 case CC_DGEmode: code = ARM_GE; goto dominance;
18149 case CC_DGTmode: code = ARM_GT; goto dominance;
18150 case CC_DLEmode: code = ARM_LE; goto dominance;
18151 case CC_DLTmode: code = ARM_LT; goto dominance;
18152 case CC_DGEUmode: code = ARM_CS; goto dominance;
18153 case CC_DGTUmode: code = ARM_HI; goto dominance;
18154 case CC_DLEUmode: code = ARM_LS; goto dominance;
18155 case CC_DLTUmode: code = ARM_CC;
18158 if (comp_code == EQ)
18159 return ARM_INVERSE_CONDITION_CODE (code);
18160 if (comp_code == NE)
18167 case NE: return ARM_NE;
18168 case EQ: return ARM_EQ;
18169 case GE: return ARM_PL;
18170 case LT: return ARM_MI;
18171 default: return ARM_NV;
18177 case NE: return ARM_NE;
18178 case EQ: return ARM_EQ;
18179 default: return ARM_NV;
18185 case NE: return ARM_MI;
18186 case EQ: return ARM_PL;
18187 default: return ARM_NV;
18192 /* These encodings assume that AC=1 in the FPA system control
18193 byte. This allows us to handle all cases except UNEQ and
18197 case GE: return ARM_GE;
18198 case GT: return ARM_GT;
18199 case LE: return ARM_LS;
18200 case LT: return ARM_MI;
18201 case NE: return ARM_NE;
18202 case EQ: return ARM_EQ;
18203 case ORDERED: return ARM_VC;
18204 case UNORDERED: return ARM_VS;
18205 case UNLT: return ARM_LT;
18206 case UNLE: return ARM_LE;
18207 case UNGT: return ARM_HI;
18208 case UNGE: return ARM_PL;
18209 /* UNEQ and LTGT do not have a representation. */
18210 case UNEQ: /* Fall through. */
18211 case LTGT: /* Fall through. */
18212 default: return ARM_NV;
18218 case NE: return ARM_NE;
18219 case EQ: return ARM_EQ;
18220 case GE: return ARM_LE;
18221 case GT: return ARM_LT;
18222 case LE: return ARM_GE;
18223 case LT: return ARM_GT;
18224 case GEU: return ARM_LS;
18225 case GTU: return ARM_CC;
18226 case LEU: return ARM_CS;
18227 case LTU: return ARM_HI;
18228 default: return ARM_NV;
18234 case LTU: return ARM_CS;
18235 case GEU: return ARM_CC;
18236 default: return ARM_NV;
18242 case NE: return ARM_NE;
18243 case EQ: return ARM_EQ;
18244 case GEU: return ARM_CS;
18245 case GTU: return ARM_HI;
18246 case LEU: return ARM_LS;
18247 case LTU: return ARM_CC;
18248 default: return ARM_NV;
18254 case GE: return ARM_GE;
18255 case LT: return ARM_LT;
18256 case GEU: return ARM_CS;
18257 case LTU: return ARM_CC;
18258 default: return ARM_NV;
18264 case NE: return ARM_NE;
18265 case EQ: return ARM_EQ;
18266 case GE: return ARM_GE;
18267 case GT: return ARM_GT;
18268 case LE: return ARM_LE;
18269 case LT: return ARM_LT;
18270 case GEU: return ARM_CS;
18271 case GTU: return ARM_HI;
18272 case LEU: return ARM_LS;
18273 case LTU: return ARM_CC;
18274 default: return ARM_NV;
18277 default: gcc_unreachable ();
18281 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18282 static enum arm_cond_code
18283 get_arm_condition_code (rtx comparison)
18285 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18286 gcc_assert (code != ARM_NV);
18290 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18293 thumb2_final_prescan_insn (rtx insn)
18295 rtx first_insn = insn;
18296 rtx body = PATTERN (insn);
18298 enum arm_cond_code code;
18302 /* Remove the previous insn from the count of insns to be output. */
18303 if (arm_condexec_count)
18304 arm_condexec_count--;
18306 /* Nothing to do if we are already inside a conditional block. */
18307 if (arm_condexec_count)
18310 if (GET_CODE (body) != COND_EXEC)
18313 /* Conditional jumps are implemented directly. */
18314 if (GET_CODE (insn) == JUMP_INSN)
18317 predicate = COND_EXEC_TEST (body);
18318 arm_current_cc = get_arm_condition_code (predicate);
18320 n = get_attr_ce_count (insn);
18321 arm_condexec_count = 1;
18322 arm_condexec_mask = (1 << n) - 1;
18323 arm_condexec_masklen = n;
18324 /* See if subsequent instructions can be combined into the same block. */
18327 insn = next_nonnote_insn (insn);
18329 /* Jumping into the middle of an IT block is illegal, so a label or
18330 barrier terminates the block. */
18331 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18334 body = PATTERN (insn);
18335 /* USE and CLOBBER aren't really insns, so just skip them. */
18336 if (GET_CODE (body) == USE
18337 || GET_CODE (body) == CLOBBER)
18340 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18341 if (GET_CODE (body) != COND_EXEC)
18343 /* Allow up to 4 conditionally executed instructions in a block. */
18344 n = get_attr_ce_count (insn);
18345 if (arm_condexec_masklen + n > 4)
18348 predicate = COND_EXEC_TEST (body);
18349 code = get_arm_condition_code (predicate);
18350 mask = (1 << n) - 1;
18351 if (arm_current_cc == code)
18352 arm_condexec_mask |= (mask << arm_condexec_masklen);
18353 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18356 arm_condexec_count++;
18357 arm_condexec_masklen += n;
18359 /* A jump must be the last instruction in a conditional block. */
18360 if (GET_CODE(insn) == JUMP_INSN)
18363 /* Restore recog_data (getting the attributes of other insns can
18364 destroy this array, but final.c assumes that it remains intact
18365 across this call). */
18366 extract_constrain_insn_cached (first_insn);
18370 arm_final_prescan_insn (rtx insn)
18372 /* BODY will hold the body of INSN. */
18373 rtx body = PATTERN (insn);
18375 /* This will be 1 if trying to repeat the trick, and things need to be
18376 reversed if it appears to fail. */
18379 /* If we start with a return insn, we only succeed if we find another one. */
18380 int seeking_return = 0;
18381 enum rtx_code return_code = UNKNOWN;
18383 /* START_INSN will hold the insn from where we start looking. This is the
18384 first insn after the following code_label if REVERSE is true. */
18385 rtx start_insn = insn;
18387 /* If in state 4, check if the target branch is reached, in order to
18388 change back to state 0. */
18389 if (arm_ccfsm_state == 4)
18391 if (insn == arm_target_insn)
18393 arm_target_insn = NULL;
18394 arm_ccfsm_state = 0;
18399 /* If in state 3, it is possible to repeat the trick, if this insn is an
18400 unconditional branch to a label, and immediately following this branch
18401 is the previous target label which is only used once, and the label this
18402 branch jumps to is not too far off. */
18403 if (arm_ccfsm_state == 3)
18405 if (simplejump_p (insn))
18407 start_insn = next_nonnote_insn (start_insn);
18408 if (GET_CODE (start_insn) == BARRIER)
18410 /* XXX Isn't this always a barrier? */
18411 start_insn = next_nonnote_insn (start_insn);
18413 if (GET_CODE (start_insn) == CODE_LABEL
18414 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18415 && LABEL_NUSES (start_insn) == 1)
18420 else if (ANY_RETURN_P (body))
18422 start_insn = next_nonnote_insn (start_insn);
18423 if (GET_CODE (start_insn) == BARRIER)
18424 start_insn = next_nonnote_insn (start_insn);
18425 if (GET_CODE (start_insn) == CODE_LABEL
18426 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18427 && LABEL_NUSES (start_insn) == 1)
18430 seeking_return = 1;
18431 return_code = GET_CODE (body);
18440 gcc_assert (!arm_ccfsm_state || reverse);
18441 if (GET_CODE (insn) != JUMP_INSN)
18444 /* This jump might be paralleled with a clobber of the condition codes
18445 the jump should always come first */
18446 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18447 body = XVECEXP (body, 0, 0);
18450 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18451 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18454 int fail = FALSE, succeed = FALSE;
18455 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18456 int then_not_else = TRUE;
18457 rtx this_insn = start_insn, label = 0;
18459 /* Register the insn jumped to. */
18462 if (!seeking_return)
18463 label = XEXP (SET_SRC (body), 0);
18465 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18466 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18467 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18469 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18470 then_not_else = FALSE;
18472 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18474 seeking_return = 1;
18475 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18477 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18479 seeking_return = 1;
18480 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18481 then_not_else = FALSE;
18484 gcc_unreachable ();
18486 /* See how many insns this branch skips, and what kind of insns. If all
18487 insns are okay, and the label or unconditional branch to the same
18488 label is not too far away, succeed. */
18489 for (insns_skipped = 0;
18490 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18494 this_insn = next_nonnote_insn (this_insn);
18498 switch (GET_CODE (this_insn))
18501 /* Succeed if it is the target label, otherwise fail since
18502 control falls in from somewhere else. */
18503 if (this_insn == label)
18505 arm_ccfsm_state = 1;
18513 /* Succeed if the following insn is the target label.
18515 If return insns are used then the last insn in a function
18516 will be a barrier. */
18517 this_insn = next_nonnote_insn (this_insn);
18518 if (this_insn && this_insn == label)
18520 arm_ccfsm_state = 1;
18528 /* The AAPCS says that conditional calls should not be
18529 used since they make interworking inefficient (the
18530 linker can't transform BL<cond> into BLX). That's
18531 only a problem if the machine has BLX. */
18538 /* Succeed if the following insn is the target label, or
18539 if the following two insns are a barrier and the
18541 this_insn = next_nonnote_insn (this_insn);
18542 if (this_insn && GET_CODE (this_insn) == BARRIER)
18543 this_insn = next_nonnote_insn (this_insn);
18545 if (this_insn && this_insn == label
18546 && insns_skipped < max_insns_skipped)
18548 arm_ccfsm_state = 1;
18556 /* If this is an unconditional branch to the same label, succeed.
18557 If it is to another label, do nothing. If it is conditional,
18559 /* XXX Probably, the tests for SET and the PC are
18562 scanbody = PATTERN (this_insn);
18563 if (GET_CODE (scanbody) == SET
18564 && GET_CODE (SET_DEST (scanbody)) == PC)
18566 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18567 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18569 arm_ccfsm_state = 2;
18572 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18575 /* Fail if a conditional return is undesirable (e.g. on a
18576 StrongARM), but still allow this if optimizing for size. */
18577 else if (GET_CODE (scanbody) == return_code
18578 && !use_return_insn (TRUE, NULL)
18581 else if (GET_CODE (scanbody) == return_code)
18583 arm_ccfsm_state = 2;
18586 else if (GET_CODE (scanbody) == PARALLEL)
18588 switch (get_attr_conds (this_insn))
18598 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18603 /* Instructions using or affecting the condition codes make it
18605 scanbody = PATTERN (this_insn);
18606 if (!(GET_CODE (scanbody) == SET
18607 || GET_CODE (scanbody) == PARALLEL)
18608 || get_attr_conds (this_insn) != CONDS_NOCOND)
18611 /* A conditional cirrus instruction must be followed by
18612 a non Cirrus instruction. However, since we
18613 conditionalize instructions in this function and by
18614 the time we get here we can't add instructions
18615 (nops), because shorten_branches() has already been
18616 called, we will disable conditionalizing Cirrus
18617 instructions to be safe. */
18618 if (GET_CODE (scanbody) != USE
18619 && GET_CODE (scanbody) != CLOBBER
18620 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18630 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18631 arm_target_label = CODE_LABEL_NUMBER (label);
18634 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18636 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18638 this_insn = next_nonnote_insn (this_insn);
18639 gcc_assert (!this_insn
18640 || (GET_CODE (this_insn) != BARRIER
18641 && GET_CODE (this_insn) != CODE_LABEL));
18645 /* Oh, dear! we ran off the end.. give up. */
18646 extract_constrain_insn_cached (insn);
18647 arm_ccfsm_state = 0;
18648 arm_target_insn = NULL;
18651 arm_target_insn = this_insn;
18654 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18657 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18659 if (reverse || then_not_else)
18660 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18663 /* Restore recog_data (getting the attributes of other insns can
18664 destroy this array, but final.c assumes that it remains intact
18665 across this call. */
18666 extract_constrain_insn_cached (insn);
18670 /* Output IT instructions. */
18672 thumb2_asm_output_opcode (FILE * stream)
18677 if (arm_condexec_mask)
18679 for (n = 0; n < arm_condexec_masklen; n++)
18680 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18682 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18683 arm_condition_codes[arm_current_cc]);
18684 arm_condexec_mask = 0;
18688 /* Returns true if REGNO is a valid register
18689 for holding a quantity of type MODE. */
18691 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18693 if (GET_MODE_CLASS (mode) == MODE_CC)
18694 return (regno == CC_REGNUM
18695 || (TARGET_HARD_FLOAT && TARGET_VFP
18696 && regno == VFPCC_REGNUM));
18699 /* For the Thumb we only allow values bigger than SImode in
18700 registers 0 - 6, so that there is always a second low
18701 register available to hold the upper part of the value.
18702 We probably we ought to ensure that the register is the
18703 start of an even numbered register pair. */
18704 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18706 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18707 && IS_CIRRUS_REGNUM (regno))
18708 /* We have outlawed SI values in Cirrus registers because they
18709 reside in the lower 32 bits, but SF values reside in the
18710 upper 32 bits. This causes gcc all sorts of grief. We can't
18711 even split the registers into pairs because Cirrus SI values
18712 get sign extended to 64bits-- aldyh. */
18713 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18715 if (TARGET_HARD_FLOAT && TARGET_VFP
18716 && IS_VFP_REGNUM (regno))
18718 if (mode == SFmode || mode == SImode)
18719 return VFP_REGNO_OK_FOR_SINGLE (regno);
18721 if (mode == DFmode)
18722 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18724 /* VFP registers can hold HFmode values, but there is no point in
18725 putting them there unless we have hardware conversion insns. */
18726 if (mode == HFmode)
18727 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18730 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18731 || (VALID_NEON_QREG_MODE (mode)
18732 && NEON_REGNO_OK_FOR_QUAD (regno))
18733 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18734 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18735 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18736 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18737 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18742 if (TARGET_REALLY_IWMMXT)
18744 if (IS_IWMMXT_GR_REGNUM (regno))
18745 return mode == SImode;
18747 if (IS_IWMMXT_REGNUM (regno))
18748 return VALID_IWMMXT_REG_MODE (mode);
18751 /* We allow almost any value to be stored in the general registers.
18752 Restrict doubleword quantities to even register pairs so that we can
18753 use ldrd. Do not allow very large Neon structure opaque modes in
18754 general registers; they would use too many. */
18755 if (regno <= LAST_ARM_REGNUM)
18756 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18757 && ARM_NUM_REGS (mode) <= 4;
18759 if (regno == FRAME_POINTER_REGNUM
18760 || regno == ARG_POINTER_REGNUM)
18761 /* We only allow integers in the fake hard registers. */
18762 return GET_MODE_CLASS (mode) == MODE_INT;
18764 /* The only registers left are the FPA registers
18765 which we only allow to hold FP values. */
18766 return (TARGET_HARD_FLOAT && TARGET_FPA
18767 && GET_MODE_CLASS (mode) == MODE_FLOAT
18768 && regno >= FIRST_FPA_REGNUM
18769 && regno <= LAST_FPA_REGNUM);
18772 /* Implement MODES_TIEABLE_P. */
18775 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18777 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18780 /* We specifically want to allow elements of "structure" modes to
18781 be tieable to the structure. This more general condition allows
18782 other rarer situations too. */
18784 && (VALID_NEON_DREG_MODE (mode1)
18785 || VALID_NEON_QREG_MODE (mode1)
18786 || VALID_NEON_STRUCT_MODE (mode1))
18787 && (VALID_NEON_DREG_MODE (mode2)
18788 || VALID_NEON_QREG_MODE (mode2)
18789 || VALID_NEON_STRUCT_MODE (mode2)))
18795 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18796 not used in arm mode. */
18799 arm_regno_class (int regno)
18803 if (regno == STACK_POINTER_REGNUM)
18805 if (regno == CC_REGNUM)
18812 if (TARGET_THUMB2 && regno < 8)
18815 if ( regno <= LAST_ARM_REGNUM
18816 || regno == FRAME_POINTER_REGNUM
18817 || regno == ARG_POINTER_REGNUM)
18818 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18820 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18821 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18823 if (IS_CIRRUS_REGNUM (regno))
18824 return CIRRUS_REGS;
18826 if (IS_VFP_REGNUM (regno))
18828 if (regno <= D7_VFP_REGNUM)
18829 return VFP_D0_D7_REGS;
18830 else if (regno <= LAST_LO_VFP_REGNUM)
18831 return VFP_LO_REGS;
18833 return VFP_HI_REGS;
18836 if (IS_IWMMXT_REGNUM (regno))
18837 return IWMMXT_REGS;
18839 if (IS_IWMMXT_GR_REGNUM (regno))
18840 return IWMMXT_GR_REGS;
18845 /* Handle a special case when computing the offset
18846 of an argument from the frame pointer. */
18848 arm_debugger_arg_offset (int value, rtx addr)
18852 /* We are only interested if dbxout_parms() failed to compute the offset. */
18856 /* We can only cope with the case where the address is held in a register. */
18857 if (GET_CODE (addr) != REG)
18860 /* If we are using the frame pointer to point at the argument, then
18861 an offset of 0 is correct. */
18862 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18865 /* If we are using the stack pointer to point at the
18866 argument, then an offset of 0 is correct. */
18867 /* ??? Check this is consistent with thumb2 frame layout. */
18868 if ((TARGET_THUMB || !frame_pointer_needed)
18869 && REGNO (addr) == SP_REGNUM)
18872 /* Oh dear. The argument is pointed to by a register rather
18873 than being held in a register, or being stored at a known
18874 offset from the frame pointer. Since GDB only understands
18875 those two kinds of argument we must translate the address
18876 held in the register into an offset from the frame pointer.
18877 We do this by searching through the insns for the function
18878 looking to see where this register gets its value. If the
18879 register is initialized from the frame pointer plus an offset
18880 then we are in luck and we can continue, otherwise we give up.
18882 This code is exercised by producing debugging information
18883 for a function with arguments like this:
18885 double func (double a, double b, int c, double d) {return d;}
18887 Without this code the stab for parameter 'd' will be set to
18888 an offset of 0 from the frame pointer, rather than 8. */
18890 /* The if() statement says:
18892 If the insn is a normal instruction
18893 and if the insn is setting the value in a register
18894 and if the register being set is the register holding the address of the argument
18895 and if the address is computing by an addition
18896 that involves adding to a register
18897 which is the frame pointer
18902 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18904 if ( GET_CODE (insn) == INSN
18905 && GET_CODE (PATTERN (insn)) == SET
18906 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18907 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18908 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18909 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18910 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18913 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18922 warning (0, "unable to compute real location of stacked parameter");
18923 value = 8; /* XXX magic hack */
18943 T_MAX /* Size of enum. Keep last. */
18944 } neon_builtin_type_mode;
18946 #define TYPE_MODE_BIT(X) (1 << (X))
18948 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18949 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18950 | TYPE_MODE_BIT (T_DI))
18951 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18952 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18953 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18955 #define v8qi_UP T_V8QI
18956 #define v4hi_UP T_V4HI
18957 #define v2si_UP T_V2SI
18958 #define v2sf_UP T_V2SF
18960 #define v16qi_UP T_V16QI
18961 #define v8hi_UP T_V8HI
18962 #define v4si_UP T_V4SI
18963 #define v4sf_UP T_V4SF
18964 #define v2di_UP T_V2DI
18969 #define UP(X) X##_UP
19002 NEON_LOADSTRUCTLANE,
19004 NEON_STORESTRUCTLANE,
19013 const neon_itype itype;
19014 const neon_builtin_type_mode mode;
19015 const enum insn_code code;
19016 unsigned int fcode;
19017 } neon_builtin_datum;
19019 #define CF(N,X) CODE_FOR_neon_##N##X
19021 #define VAR1(T, N, A) \
19022 {#N, NEON_##T, UP (A), CF (N, A), 0}
19023 #define VAR2(T, N, A, B) \
19025 {#N, NEON_##T, UP (B), CF (N, B), 0}
19026 #define VAR3(T, N, A, B, C) \
19027 VAR2 (T, N, A, B), \
19028 {#N, NEON_##T, UP (C), CF (N, C), 0}
19029 #define VAR4(T, N, A, B, C, D) \
19030 VAR3 (T, N, A, B, C), \
19031 {#N, NEON_##T, UP (D), CF (N, D), 0}
19032 #define VAR5(T, N, A, B, C, D, E) \
19033 VAR4 (T, N, A, B, C, D), \
19034 {#N, NEON_##T, UP (E), CF (N, E), 0}
19035 #define VAR6(T, N, A, B, C, D, E, F) \
19036 VAR5 (T, N, A, B, C, D, E), \
19037 {#N, NEON_##T, UP (F), CF (N, F), 0}
19038 #define VAR7(T, N, A, B, C, D, E, F, G) \
19039 VAR6 (T, N, A, B, C, D, E, F), \
19040 {#N, NEON_##T, UP (G), CF (N, G), 0}
19041 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19042 VAR7 (T, N, A, B, C, D, E, F, G), \
19043 {#N, NEON_##T, UP (H), CF (N, H), 0}
19044 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19045 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19046 {#N, NEON_##T, UP (I), CF (N, I), 0}
19047 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19048 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19049 {#N, NEON_##T, UP (J), CF (N, J), 0}
19051 /* The mode entries in the following table correspond to the "key" type of the
19052 instruction variant, i.e. equivalent to that which would be specified after
19053 the assembler mnemonic, which usually refers to the last vector operand.
19054 (Signed/unsigned/polynomial types are not differentiated between though, and
19055 are all mapped onto the same mode for a given element size.) The modes
19056 listed per instruction should be the same as those defined for that
19057 instruction's pattern in neon.md. */
19059 static neon_builtin_datum neon_builtin_data[] =
19061 VAR10 (BINOP, vadd,
19062 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19063 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19064 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19065 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19066 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19067 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19068 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19069 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19070 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19071 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19072 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19073 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19074 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19075 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19076 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19077 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19078 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19079 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19080 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19081 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19082 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19083 VAR2 (BINOP, vqdmull, v4hi, v2si),
19084 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19085 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19086 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19087 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19088 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19089 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19090 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19091 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19092 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19093 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19094 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19095 VAR10 (BINOP, vsub,
19096 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19097 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19098 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19099 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19100 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19101 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19102 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19103 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19104 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19105 VAR2 (BINOP, vcage, v2sf, v4sf),
19106 VAR2 (BINOP, vcagt, v2sf, v4sf),
19107 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19108 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19109 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19110 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19111 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19112 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19113 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19114 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19115 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19116 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19117 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19118 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19119 VAR2 (BINOP, vrecps, v2sf, v4sf),
19120 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19121 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19122 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19123 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19124 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19125 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19126 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19127 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19128 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19129 VAR2 (UNOP, vcnt, v8qi, v16qi),
19130 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19131 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19132 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19133 /* FIXME: vget_lane supports more variants than this! */
19134 VAR10 (GETLANE, vget_lane,
19135 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19136 VAR10 (SETLANE, vset_lane,
19137 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19138 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19139 VAR10 (DUP, vdup_n,
19140 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19141 VAR10 (DUPLANE, vdup_lane,
19142 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19143 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19144 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19145 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19146 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19147 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19148 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19149 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19150 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19151 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19152 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19153 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19154 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19155 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19156 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19157 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19158 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19159 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19160 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19161 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19162 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19163 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19164 VAR10 (BINOP, vext,
19165 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19166 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19167 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19168 VAR2 (UNOP, vrev16, v8qi, v16qi),
19169 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19170 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19171 VAR10 (SELECT, vbsl,
19172 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19173 VAR1 (VTBL, vtbl1, v8qi),
19174 VAR1 (VTBL, vtbl2, v8qi),
19175 VAR1 (VTBL, vtbl3, v8qi),
19176 VAR1 (VTBL, vtbl4, v8qi),
19177 VAR1 (VTBX, vtbx1, v8qi),
19178 VAR1 (VTBX, vtbx2, v8qi),
19179 VAR1 (VTBX, vtbx3, v8qi),
19180 VAR1 (VTBX, vtbx4, v8qi),
19181 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19182 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19183 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19184 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19185 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19186 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19187 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19188 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19189 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19190 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19191 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19192 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19193 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19194 VAR10 (LOAD1, vld1,
19195 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19196 VAR10 (LOAD1LANE, vld1_lane,
19197 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19198 VAR10 (LOAD1, vld1_dup,
19199 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19200 VAR10 (STORE1, vst1,
19201 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19202 VAR10 (STORE1LANE, vst1_lane,
19203 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19205 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19206 VAR7 (LOADSTRUCTLANE, vld2_lane,
19207 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19208 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19209 VAR9 (STORESTRUCT, vst2,
19210 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19211 VAR7 (STORESTRUCTLANE, vst2_lane,
19212 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19214 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19215 VAR7 (LOADSTRUCTLANE, vld3_lane,
19216 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19217 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19218 VAR9 (STORESTRUCT, vst3,
19219 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19220 VAR7 (STORESTRUCTLANE, vst3_lane,
19221 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19222 VAR9 (LOADSTRUCT, vld4,
19223 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19224 VAR7 (LOADSTRUCTLANE, vld4_lane,
19225 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19226 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19227 VAR9 (STORESTRUCT, vst4,
19228 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19229 VAR7 (STORESTRUCTLANE, vst4_lane,
19230 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19231 VAR10 (LOGICBINOP, vand,
19232 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19233 VAR10 (LOGICBINOP, vorr,
19234 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19235 VAR10 (BINOP, veor,
19236 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19237 VAR10 (LOGICBINOP, vbic,
19238 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19239 VAR10 (LOGICBINOP, vorn,
19240 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19255 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19256 symbolic names defined here (which would require too much duplication).
19260 ARM_BUILTIN_GETWCX,
19261 ARM_BUILTIN_SETWCX,
19265 ARM_BUILTIN_WAVG2BR,
19266 ARM_BUILTIN_WAVG2HR,
19267 ARM_BUILTIN_WAVG2B,
19268 ARM_BUILTIN_WAVG2H,
19275 ARM_BUILTIN_WMACSZ,
19277 ARM_BUILTIN_WMACUZ,
19280 ARM_BUILTIN_WSADBZ,
19282 ARM_BUILTIN_WSADHZ,
19284 ARM_BUILTIN_WALIGN,
19287 ARM_BUILTIN_TMIAPH,
19288 ARM_BUILTIN_TMIABB,
19289 ARM_BUILTIN_TMIABT,
19290 ARM_BUILTIN_TMIATB,
19291 ARM_BUILTIN_TMIATT,
19293 ARM_BUILTIN_TMOVMSKB,
19294 ARM_BUILTIN_TMOVMSKH,
19295 ARM_BUILTIN_TMOVMSKW,
19297 ARM_BUILTIN_TBCSTB,
19298 ARM_BUILTIN_TBCSTH,
19299 ARM_BUILTIN_TBCSTW,
19301 ARM_BUILTIN_WMADDS,
19302 ARM_BUILTIN_WMADDU,
19304 ARM_BUILTIN_WPACKHSS,
19305 ARM_BUILTIN_WPACKWSS,
19306 ARM_BUILTIN_WPACKDSS,
19307 ARM_BUILTIN_WPACKHUS,
19308 ARM_BUILTIN_WPACKWUS,
19309 ARM_BUILTIN_WPACKDUS,
19314 ARM_BUILTIN_WADDSSB,
19315 ARM_BUILTIN_WADDSSH,
19316 ARM_BUILTIN_WADDSSW,
19317 ARM_BUILTIN_WADDUSB,
19318 ARM_BUILTIN_WADDUSH,
19319 ARM_BUILTIN_WADDUSW,
19323 ARM_BUILTIN_WSUBSSB,
19324 ARM_BUILTIN_WSUBSSH,
19325 ARM_BUILTIN_WSUBSSW,
19326 ARM_BUILTIN_WSUBUSB,
19327 ARM_BUILTIN_WSUBUSH,
19328 ARM_BUILTIN_WSUBUSW,
19335 ARM_BUILTIN_WCMPEQB,
19336 ARM_BUILTIN_WCMPEQH,
19337 ARM_BUILTIN_WCMPEQW,
19338 ARM_BUILTIN_WCMPGTUB,
19339 ARM_BUILTIN_WCMPGTUH,
19340 ARM_BUILTIN_WCMPGTUW,
19341 ARM_BUILTIN_WCMPGTSB,
19342 ARM_BUILTIN_WCMPGTSH,
19343 ARM_BUILTIN_WCMPGTSW,
19345 ARM_BUILTIN_TEXTRMSB,
19346 ARM_BUILTIN_TEXTRMSH,
19347 ARM_BUILTIN_TEXTRMSW,
19348 ARM_BUILTIN_TEXTRMUB,
19349 ARM_BUILTIN_TEXTRMUH,
19350 ARM_BUILTIN_TEXTRMUW,
19351 ARM_BUILTIN_TINSRB,
19352 ARM_BUILTIN_TINSRH,
19353 ARM_BUILTIN_TINSRW,
19355 ARM_BUILTIN_WMAXSW,
19356 ARM_BUILTIN_WMAXSH,
19357 ARM_BUILTIN_WMAXSB,
19358 ARM_BUILTIN_WMAXUW,
19359 ARM_BUILTIN_WMAXUH,
19360 ARM_BUILTIN_WMAXUB,
19361 ARM_BUILTIN_WMINSW,
19362 ARM_BUILTIN_WMINSH,
19363 ARM_BUILTIN_WMINSB,
19364 ARM_BUILTIN_WMINUW,
19365 ARM_BUILTIN_WMINUH,
19366 ARM_BUILTIN_WMINUB,
19368 ARM_BUILTIN_WMULUM,
19369 ARM_BUILTIN_WMULSM,
19370 ARM_BUILTIN_WMULUL,
19372 ARM_BUILTIN_PSADBH,
19373 ARM_BUILTIN_WSHUFH,
19387 ARM_BUILTIN_WSLLHI,
19388 ARM_BUILTIN_WSLLWI,
19389 ARM_BUILTIN_WSLLDI,
19390 ARM_BUILTIN_WSRAHI,
19391 ARM_BUILTIN_WSRAWI,
19392 ARM_BUILTIN_WSRADI,
19393 ARM_BUILTIN_WSRLHI,
19394 ARM_BUILTIN_WSRLWI,
19395 ARM_BUILTIN_WSRLDI,
19396 ARM_BUILTIN_WRORHI,
19397 ARM_BUILTIN_WRORWI,
19398 ARM_BUILTIN_WRORDI,
19400 ARM_BUILTIN_WUNPCKIHB,
19401 ARM_BUILTIN_WUNPCKIHH,
19402 ARM_BUILTIN_WUNPCKIHW,
19403 ARM_BUILTIN_WUNPCKILB,
19404 ARM_BUILTIN_WUNPCKILH,
19405 ARM_BUILTIN_WUNPCKILW,
19407 ARM_BUILTIN_WUNPCKEHSB,
19408 ARM_BUILTIN_WUNPCKEHSH,
19409 ARM_BUILTIN_WUNPCKEHSW,
19410 ARM_BUILTIN_WUNPCKEHUB,
19411 ARM_BUILTIN_WUNPCKEHUH,
19412 ARM_BUILTIN_WUNPCKEHUW,
19413 ARM_BUILTIN_WUNPCKELSB,
19414 ARM_BUILTIN_WUNPCKELSH,
19415 ARM_BUILTIN_WUNPCKELSW,
19416 ARM_BUILTIN_WUNPCKELUB,
19417 ARM_BUILTIN_WUNPCKELUH,
19418 ARM_BUILTIN_WUNPCKELUW,
19420 ARM_BUILTIN_THREAD_POINTER,
19422 ARM_BUILTIN_NEON_BASE,
19424 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19427 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19430 arm_init_neon_builtins (void)
19432 unsigned int i, fcode;
19435 tree neon_intQI_type_node;
19436 tree neon_intHI_type_node;
19437 tree neon_polyQI_type_node;
19438 tree neon_polyHI_type_node;
19439 tree neon_intSI_type_node;
19440 tree neon_intDI_type_node;
19441 tree neon_float_type_node;
19443 tree intQI_pointer_node;
19444 tree intHI_pointer_node;
19445 tree intSI_pointer_node;
19446 tree intDI_pointer_node;
19447 tree float_pointer_node;
19449 tree const_intQI_node;
19450 tree const_intHI_node;
19451 tree const_intSI_node;
19452 tree const_intDI_node;
19453 tree const_float_node;
19455 tree const_intQI_pointer_node;
19456 tree const_intHI_pointer_node;
19457 tree const_intSI_pointer_node;
19458 tree const_intDI_pointer_node;
19459 tree const_float_pointer_node;
19461 tree V8QI_type_node;
19462 tree V4HI_type_node;
19463 tree V2SI_type_node;
19464 tree V2SF_type_node;
19465 tree V16QI_type_node;
19466 tree V8HI_type_node;
19467 tree V4SI_type_node;
19468 tree V4SF_type_node;
19469 tree V2DI_type_node;
19471 tree intUQI_type_node;
19472 tree intUHI_type_node;
19473 tree intUSI_type_node;
19474 tree intUDI_type_node;
19476 tree intEI_type_node;
19477 tree intOI_type_node;
19478 tree intCI_type_node;
19479 tree intXI_type_node;
19481 tree V8QI_pointer_node;
19482 tree V4HI_pointer_node;
19483 tree V2SI_pointer_node;
19484 tree V2SF_pointer_node;
19485 tree V16QI_pointer_node;
19486 tree V8HI_pointer_node;
19487 tree V4SI_pointer_node;
19488 tree V4SF_pointer_node;
19489 tree V2DI_pointer_node;
19491 tree void_ftype_pv8qi_v8qi_v8qi;
19492 tree void_ftype_pv4hi_v4hi_v4hi;
19493 tree void_ftype_pv2si_v2si_v2si;
19494 tree void_ftype_pv2sf_v2sf_v2sf;
19495 tree void_ftype_pdi_di_di;
19496 tree void_ftype_pv16qi_v16qi_v16qi;
19497 tree void_ftype_pv8hi_v8hi_v8hi;
19498 tree void_ftype_pv4si_v4si_v4si;
19499 tree void_ftype_pv4sf_v4sf_v4sf;
19500 tree void_ftype_pv2di_v2di_v2di;
19502 tree reinterp_ftype_dreg[5][5];
19503 tree reinterp_ftype_qreg[5][5];
19504 tree dreg_types[5], qreg_types[5];
19506 /* Create distinguished type nodes for NEON vector element types,
19507 and pointers to values of such types, so we can detect them later. */
19508 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19509 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19510 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19511 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19512 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19513 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19514 neon_float_type_node = make_node (REAL_TYPE);
19515 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19516 layout_type (neon_float_type_node);
19518 /* Define typedefs which exactly correspond to the modes we are basing vector
19519 types on. If you change these names you'll need to change
19520 the table used by arm_mangle_type too. */
19521 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19522 "__builtin_neon_qi");
19523 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19524 "__builtin_neon_hi");
19525 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19526 "__builtin_neon_si");
19527 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19528 "__builtin_neon_sf");
19529 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19530 "__builtin_neon_di");
19531 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19532 "__builtin_neon_poly8");
19533 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19534 "__builtin_neon_poly16");
19536 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19537 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19538 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19539 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19540 float_pointer_node = build_pointer_type (neon_float_type_node);
19542 /* Next create constant-qualified versions of the above types. */
19543 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19545 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19547 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19549 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19551 const_float_node = build_qualified_type (neon_float_type_node,
19554 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19555 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19556 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19557 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19558 const_float_pointer_node = build_pointer_type (const_float_node);
19560 /* Now create vector types based on our NEON element types. */
19561 /* 64-bit vectors. */
19563 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19565 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19567 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19569 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19570 /* 128-bit vectors. */
19572 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19574 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19576 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19578 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19580 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19582 /* Unsigned integer types for various mode sizes. */
19583 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19584 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19585 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19586 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19588 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19589 "__builtin_neon_uqi");
19590 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19591 "__builtin_neon_uhi");
19592 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19593 "__builtin_neon_usi");
19594 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19595 "__builtin_neon_udi");
19597 /* Opaque integer types for structures of vectors. */
19598 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19599 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19600 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19601 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19603 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19604 "__builtin_neon_ti");
19605 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19606 "__builtin_neon_ei");
19607 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19608 "__builtin_neon_oi");
19609 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19610 "__builtin_neon_ci");
19611 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19612 "__builtin_neon_xi");
19614 /* Pointers to vector types. */
19615 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19616 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19617 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19618 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19619 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19620 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19621 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19622 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19623 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19625 /* Operations which return results as pairs. */
19626 void_ftype_pv8qi_v8qi_v8qi =
19627 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19628 V8QI_type_node, NULL);
19629 void_ftype_pv4hi_v4hi_v4hi =
19630 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19631 V4HI_type_node, NULL);
19632 void_ftype_pv2si_v2si_v2si =
19633 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19634 V2SI_type_node, NULL);
19635 void_ftype_pv2sf_v2sf_v2sf =
19636 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19637 V2SF_type_node, NULL);
19638 void_ftype_pdi_di_di =
19639 build_function_type_list (void_type_node, intDI_pointer_node,
19640 neon_intDI_type_node, neon_intDI_type_node, NULL);
19641 void_ftype_pv16qi_v16qi_v16qi =
19642 build_function_type_list (void_type_node, V16QI_pointer_node,
19643 V16QI_type_node, V16QI_type_node, NULL);
19644 void_ftype_pv8hi_v8hi_v8hi =
19645 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19646 V8HI_type_node, NULL);
19647 void_ftype_pv4si_v4si_v4si =
19648 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19649 V4SI_type_node, NULL);
19650 void_ftype_pv4sf_v4sf_v4sf =
19651 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19652 V4SF_type_node, NULL);
19653 void_ftype_pv2di_v2di_v2di =
19654 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19655 V2DI_type_node, NULL);
19657 dreg_types[0] = V8QI_type_node;
19658 dreg_types[1] = V4HI_type_node;
19659 dreg_types[2] = V2SI_type_node;
19660 dreg_types[3] = V2SF_type_node;
19661 dreg_types[4] = neon_intDI_type_node;
19663 qreg_types[0] = V16QI_type_node;
19664 qreg_types[1] = V8HI_type_node;
19665 qreg_types[2] = V4SI_type_node;
19666 qreg_types[3] = V4SF_type_node;
19667 qreg_types[4] = V2DI_type_node;
19669 for (i = 0; i < 5; i++)
19672 for (j = 0; j < 5; j++)
19674 reinterp_ftype_dreg[i][j]
19675 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19676 reinterp_ftype_qreg[i][j]
19677 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19681 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19682 i < ARRAY_SIZE (neon_builtin_data);
19685 neon_builtin_datum *d = &neon_builtin_data[i];
19687 const char* const modenames[] = {
19688 "v8qi", "v4hi", "v2si", "v2sf", "di",
19689 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19694 int is_load = 0, is_store = 0;
19696 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19703 case NEON_LOAD1LANE:
19704 case NEON_LOADSTRUCT:
19705 case NEON_LOADSTRUCTLANE:
19707 /* Fall through. */
19709 case NEON_STORE1LANE:
19710 case NEON_STORESTRUCT:
19711 case NEON_STORESTRUCTLANE:
19714 /* Fall through. */
19717 case NEON_LOGICBINOP:
19718 case NEON_SHIFTINSERT:
19725 case NEON_SHIFTIMM:
19726 case NEON_SHIFTACC:
19732 case NEON_LANEMULL:
19733 case NEON_LANEMULH:
19735 case NEON_SCALARMUL:
19736 case NEON_SCALARMULL:
19737 case NEON_SCALARMULH:
19738 case NEON_SCALARMAC:
19744 tree return_type = void_type_node, args = void_list_node;
19746 /* Build a function type directly from the insn_data for
19747 this builtin. The build_function_type() function takes
19748 care of removing duplicates for us. */
19749 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19753 if (is_load && k == 1)
19755 /* Neon load patterns always have the memory
19756 operand in the operand 1 position. */
19757 gcc_assert (insn_data[d->code].operand[k].predicate
19758 == neon_struct_operand);
19764 eltype = const_intQI_pointer_node;
19769 eltype = const_intHI_pointer_node;
19774 eltype = const_intSI_pointer_node;
19779 eltype = const_float_pointer_node;
19784 eltype = const_intDI_pointer_node;
19787 default: gcc_unreachable ();
19790 else if (is_store && k == 0)
19792 /* Similarly, Neon store patterns use operand 0 as
19793 the memory location to store to. */
19794 gcc_assert (insn_data[d->code].operand[k].predicate
19795 == neon_struct_operand);
19801 eltype = intQI_pointer_node;
19806 eltype = intHI_pointer_node;
19811 eltype = intSI_pointer_node;
19816 eltype = float_pointer_node;
19821 eltype = intDI_pointer_node;
19824 default: gcc_unreachable ();
19829 switch (insn_data[d->code].operand[k].mode)
19831 case VOIDmode: eltype = void_type_node; break;
19833 case QImode: eltype = neon_intQI_type_node; break;
19834 case HImode: eltype = neon_intHI_type_node; break;
19835 case SImode: eltype = neon_intSI_type_node; break;
19836 case SFmode: eltype = neon_float_type_node; break;
19837 case DImode: eltype = neon_intDI_type_node; break;
19838 case TImode: eltype = intTI_type_node; break;
19839 case EImode: eltype = intEI_type_node; break;
19840 case OImode: eltype = intOI_type_node; break;
19841 case CImode: eltype = intCI_type_node; break;
19842 case XImode: eltype = intXI_type_node; break;
19843 /* 64-bit vectors. */
19844 case V8QImode: eltype = V8QI_type_node; break;
19845 case V4HImode: eltype = V4HI_type_node; break;
19846 case V2SImode: eltype = V2SI_type_node; break;
19847 case V2SFmode: eltype = V2SF_type_node; break;
19848 /* 128-bit vectors. */
19849 case V16QImode: eltype = V16QI_type_node; break;
19850 case V8HImode: eltype = V8HI_type_node; break;
19851 case V4SImode: eltype = V4SI_type_node; break;
19852 case V4SFmode: eltype = V4SF_type_node; break;
19853 case V2DImode: eltype = V2DI_type_node; break;
19854 default: gcc_unreachable ();
19858 if (k == 0 && !is_store)
19859 return_type = eltype;
19861 args = tree_cons (NULL_TREE, eltype, args);
19864 ftype = build_function_type (return_type, args);
19868 case NEON_RESULTPAIR:
19870 switch (insn_data[d->code].operand[1].mode)
19872 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19873 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19874 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19875 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19876 case DImode: ftype = void_ftype_pdi_di_di; break;
19877 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19878 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19879 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19880 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19881 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19882 default: gcc_unreachable ();
19887 case NEON_REINTERP:
19889 /* We iterate over 5 doubleword types, then 5 quadword
19891 int rhs = d->mode % 5;
19892 switch (insn_data[d->code].operand[0].mode)
19894 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19895 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19896 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19897 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19898 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19899 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19900 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19901 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19902 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19903 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19904 default: gcc_unreachable ();
19910 gcc_unreachable ();
19913 gcc_assert (ftype != NULL);
19915 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19917 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19919 arm_builtin_decls[fcode] = decl;
19923 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19926 if ((MASK) & insn_flags) \
19929 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19930 BUILT_IN_MD, NULL, NULL_TREE); \
19931 arm_builtin_decls[CODE] = bdecl; \
19936 struct builtin_description
19938 const unsigned int mask;
19939 const enum insn_code icode;
19940 const char * const name;
19941 const enum arm_builtins code;
19942 const enum rtx_code comparison;
19943 const unsigned int flag;
19946 static const struct builtin_description bdesc_2arg[] =
19948 #define IWMMXT_BUILTIN(code, string, builtin) \
19949 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19950 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19952 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19953 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19954 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19955 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19956 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19957 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19958 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19959 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19960 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19961 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19962 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19963 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19964 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19965 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19966 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19967 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19968 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19969 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19970 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19971 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19972 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19973 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19974 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19975 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19976 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19977 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19978 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19979 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19980 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19981 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19982 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19983 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19984 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19985 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19986 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19987 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19988 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19989 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19990 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19991 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19992 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19993 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19994 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19995 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19996 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19997 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19998 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19999 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20000 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20001 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20002 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20003 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20004 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20005 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20006 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20007 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20008 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
20009 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
20011 #define IWMMXT_BUILTIN2(code, builtin) \
20012 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20014 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20015 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20016 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20017 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20018 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20019 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20020 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
20021 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20022 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
20023 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20024 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
20025 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
20026 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
20027 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20028 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
20029 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20030 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
20031 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
20032 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
20033 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20034 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
20035 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20036 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
20037 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
20038 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
20039 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
20040 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
20041 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
20042 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
20043 IWMMXT_BUILTIN2 (rordi3, WRORDI)
20044 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20045 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20048 static const struct builtin_description bdesc_1arg[] =
20050 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20051 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20052 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20053 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20054 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20055 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20056 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20057 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20058 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20059 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20060 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20061 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20062 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20063 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20064 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20065 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20066 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20067 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20070 /* Set up all the iWMMXt builtins. This is not called if
20071 TARGET_IWMMXT is zero. */
20074 arm_init_iwmmxt_builtins (void)
20076 const struct builtin_description * d;
20079 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20080 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20081 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20084 = build_function_type_list (integer_type_node,
20085 integer_type_node, NULL_TREE);
20086 tree v8qi_ftype_v8qi_v8qi_int
20087 = build_function_type_list (V8QI_type_node,
20088 V8QI_type_node, V8QI_type_node,
20089 integer_type_node, NULL_TREE);
20090 tree v4hi_ftype_v4hi_int
20091 = build_function_type_list (V4HI_type_node,
20092 V4HI_type_node, integer_type_node, NULL_TREE);
20093 tree v2si_ftype_v2si_int
20094 = build_function_type_list (V2SI_type_node,
20095 V2SI_type_node, integer_type_node, NULL_TREE);
20096 tree v2si_ftype_di_di
20097 = build_function_type_list (V2SI_type_node,
20098 long_long_integer_type_node,
20099 long_long_integer_type_node,
20101 tree di_ftype_di_int
20102 = build_function_type_list (long_long_integer_type_node,
20103 long_long_integer_type_node,
20104 integer_type_node, NULL_TREE);
20105 tree di_ftype_di_int_int
20106 = build_function_type_list (long_long_integer_type_node,
20107 long_long_integer_type_node,
20109 integer_type_node, NULL_TREE);
20110 tree int_ftype_v8qi
20111 = build_function_type_list (integer_type_node,
20112 V8QI_type_node, NULL_TREE);
20113 tree int_ftype_v4hi
20114 = build_function_type_list (integer_type_node,
20115 V4HI_type_node, NULL_TREE);
20116 tree int_ftype_v2si
20117 = build_function_type_list (integer_type_node,
20118 V2SI_type_node, NULL_TREE);
20119 tree int_ftype_v8qi_int
20120 = build_function_type_list (integer_type_node,
20121 V8QI_type_node, integer_type_node, NULL_TREE);
20122 tree int_ftype_v4hi_int
20123 = build_function_type_list (integer_type_node,
20124 V4HI_type_node, integer_type_node, NULL_TREE);
20125 tree int_ftype_v2si_int
20126 = build_function_type_list (integer_type_node,
20127 V2SI_type_node, integer_type_node, NULL_TREE);
20128 tree v8qi_ftype_v8qi_int_int
20129 = build_function_type_list (V8QI_type_node,
20130 V8QI_type_node, integer_type_node,
20131 integer_type_node, NULL_TREE);
20132 tree v4hi_ftype_v4hi_int_int
20133 = build_function_type_list (V4HI_type_node,
20134 V4HI_type_node, integer_type_node,
20135 integer_type_node, NULL_TREE);
20136 tree v2si_ftype_v2si_int_int
20137 = build_function_type_list (V2SI_type_node,
20138 V2SI_type_node, integer_type_node,
20139 integer_type_node, NULL_TREE);
20140 /* Miscellaneous. */
20141 tree v8qi_ftype_v4hi_v4hi
20142 = build_function_type_list (V8QI_type_node,
20143 V4HI_type_node, V4HI_type_node, NULL_TREE);
20144 tree v4hi_ftype_v2si_v2si
20145 = build_function_type_list (V4HI_type_node,
20146 V2SI_type_node, V2SI_type_node, NULL_TREE);
20147 tree v2si_ftype_v4hi_v4hi
20148 = build_function_type_list (V2SI_type_node,
20149 V4HI_type_node, V4HI_type_node, NULL_TREE);
20150 tree v2si_ftype_v8qi_v8qi
20151 = build_function_type_list (V2SI_type_node,
20152 V8QI_type_node, V8QI_type_node, NULL_TREE);
20153 tree v4hi_ftype_v4hi_di
20154 = build_function_type_list (V4HI_type_node,
20155 V4HI_type_node, long_long_integer_type_node,
20157 tree v2si_ftype_v2si_di
20158 = build_function_type_list (V2SI_type_node,
20159 V2SI_type_node, long_long_integer_type_node,
20161 tree void_ftype_int_int
20162 = build_function_type_list (void_type_node,
20163 integer_type_node, integer_type_node,
20166 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20168 = build_function_type_list (long_long_integer_type_node,
20169 V8QI_type_node, NULL_TREE);
20171 = build_function_type_list (long_long_integer_type_node,
20172 V4HI_type_node, NULL_TREE);
20174 = build_function_type_list (long_long_integer_type_node,
20175 V2SI_type_node, NULL_TREE);
20176 tree v2si_ftype_v4hi
20177 = build_function_type_list (V2SI_type_node,
20178 V4HI_type_node, NULL_TREE);
20179 tree v4hi_ftype_v8qi
20180 = build_function_type_list (V4HI_type_node,
20181 V8QI_type_node, NULL_TREE);
20183 tree di_ftype_di_v4hi_v4hi
20184 = build_function_type_list (long_long_unsigned_type_node,
20185 long_long_unsigned_type_node,
20186 V4HI_type_node, V4HI_type_node,
20189 tree di_ftype_v4hi_v4hi
20190 = build_function_type_list (long_long_unsigned_type_node,
20191 V4HI_type_node,V4HI_type_node,
20194 /* Normal vector binops. */
20195 tree v8qi_ftype_v8qi_v8qi
20196 = build_function_type_list (V8QI_type_node,
20197 V8QI_type_node, V8QI_type_node, NULL_TREE);
20198 tree v4hi_ftype_v4hi_v4hi
20199 = build_function_type_list (V4HI_type_node,
20200 V4HI_type_node,V4HI_type_node, NULL_TREE);
20201 tree v2si_ftype_v2si_v2si
20202 = build_function_type_list (V2SI_type_node,
20203 V2SI_type_node, V2SI_type_node, NULL_TREE);
20204 tree di_ftype_di_di
20205 = build_function_type_list (long_long_unsigned_type_node,
20206 long_long_unsigned_type_node,
20207 long_long_unsigned_type_node,
20210 /* Add all builtins that are more or less simple operations on two
20212 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20214 /* Use one of the operands; the target can have a different mode for
20215 mask-generating compares. */
20216 enum machine_mode mode;
20222 mode = insn_data[d->icode].operand[1].mode;
20227 type = v8qi_ftype_v8qi_v8qi;
20230 type = v4hi_ftype_v4hi_v4hi;
20233 type = v2si_ftype_v2si_v2si;
20236 type = di_ftype_di_di;
20240 gcc_unreachable ();
20243 def_mbuiltin (d->mask, d->name, type, d->code);
20246 /* Add the remaining MMX insns with somewhat more complicated types. */
20247 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20249 ARM_BUILTIN_ ## CODE)
20251 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20252 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20253 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20255 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20256 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20257 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20258 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20259 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20260 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20262 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20263 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20264 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20265 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20266 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20267 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20269 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20270 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20271 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20272 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20273 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20274 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20276 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20277 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20278 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20279 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20280 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20281 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20283 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20285 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20286 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20287 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20288 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20290 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20291 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20292 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20293 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20294 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20295 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20296 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20297 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20298 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20300 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20301 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20302 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20304 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20305 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20306 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20308 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20309 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20310 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20311 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20312 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20313 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20315 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20316 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20317 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20318 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20319 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20320 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20321 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20322 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20323 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20324 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20325 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20326 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20328 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20329 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20330 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20331 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20333 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20334 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20335 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20336 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20337 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20338 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20339 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20341 #undef iwmmx_mbuiltin
20345 arm_init_tls_builtins (void)
20349 ftype = build_function_type (ptr_type_node, void_list_node);
20350 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20351 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20353 TREE_NOTHROW (decl) = 1;
20354 TREE_READONLY (decl) = 1;
20355 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20359 arm_init_fp16_builtins (void)
20361 tree fp16_type = make_node (REAL_TYPE);
20362 TYPE_PRECISION (fp16_type) = 16;
20363 layout_type (fp16_type);
20364 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20368 arm_init_builtins (void)
20370 arm_init_tls_builtins ();
20372 if (TARGET_REALLY_IWMMXT)
20373 arm_init_iwmmxt_builtins ();
20376 arm_init_neon_builtins ();
20378 if (arm_fp16_format)
20379 arm_init_fp16_builtins ();
20382 /* Return the ARM builtin for CODE. */
20385 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20387 if (code >= ARM_BUILTIN_MAX)
20388 return error_mark_node;
20390 return arm_builtin_decls[code];
20393 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20395 static const char *
20396 arm_invalid_parameter_type (const_tree t)
20398 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20399 return N_("function parameters cannot have __fp16 type");
20403 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20405 static const char *
20406 arm_invalid_return_type (const_tree t)
20408 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20409 return N_("functions cannot return __fp16 type");
20413 /* Implement TARGET_PROMOTED_TYPE. */
20416 arm_promoted_type (const_tree t)
20418 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20419 return float_type_node;
20423 /* Implement TARGET_CONVERT_TO_TYPE.
20424 Specifically, this hook implements the peculiarity of the ARM
20425 half-precision floating-point C semantics that requires conversions between
20426 __fp16 to or from double to do an intermediate conversion to float. */
20429 arm_convert_to_type (tree type, tree expr)
20431 tree fromtype = TREE_TYPE (expr);
20432 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20434 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20435 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20436 return convert (type, convert (float_type_node, expr));
20440 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20441 This simply adds HFmode as a supported mode; even though we don't
20442 implement arithmetic on this type directly, it's supported by
20443 optabs conversions, much the way the double-word arithmetic is
20444 special-cased in the default hook. */
20447 arm_scalar_mode_supported_p (enum machine_mode mode)
20449 if (mode == HFmode)
20450 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20451 else if (ALL_FIXED_POINT_MODE_P (mode))
20454 return default_scalar_mode_supported_p (mode);
20457 /* Errors in the source file can cause expand_expr to return const0_rtx
20458 where we expect a vector. To avoid crashing, use one of the vector
20459 clear instructions. */
20462 safe_vector_operand (rtx x, enum machine_mode mode)
20464 if (x != const0_rtx)
20466 x = gen_reg_rtx (mode);
20468 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20469 : gen_rtx_SUBREG (DImode, x, 0)));
20473 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20476 arm_expand_binop_builtin (enum insn_code icode,
20477 tree exp, rtx target)
20480 tree arg0 = CALL_EXPR_ARG (exp, 0);
20481 tree arg1 = CALL_EXPR_ARG (exp, 1);
20482 rtx op0 = expand_normal (arg0);
20483 rtx op1 = expand_normal (arg1);
20484 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20485 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20486 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20488 if (VECTOR_MODE_P (mode0))
20489 op0 = safe_vector_operand (op0, mode0);
20490 if (VECTOR_MODE_P (mode1))
20491 op1 = safe_vector_operand (op1, mode1);
20494 || GET_MODE (target) != tmode
20495 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20496 target = gen_reg_rtx (tmode);
20498 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20500 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20501 op0 = copy_to_mode_reg (mode0, op0);
20502 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20503 op1 = copy_to_mode_reg (mode1, op1);
20505 pat = GEN_FCN (icode) (target, op0, op1);
20512 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20515 arm_expand_unop_builtin (enum insn_code icode,
20516 tree exp, rtx target, int do_load)
20519 tree arg0 = CALL_EXPR_ARG (exp, 0);
20520 rtx op0 = expand_normal (arg0);
20521 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20522 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20525 || GET_MODE (target) != tmode
20526 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20527 target = gen_reg_rtx (tmode);
20529 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20532 if (VECTOR_MODE_P (mode0))
20533 op0 = safe_vector_operand (op0, mode0);
20535 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20536 op0 = copy_to_mode_reg (mode0, op0);
20539 pat = GEN_FCN (icode) (target, op0);
20547 NEON_ARG_COPY_TO_REG,
20553 #define NEON_MAX_BUILTIN_ARGS 5
20555 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20556 and return an expression for the accessed memory.
20558 The intrinsic function operates on a block of registers that has
20559 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20560 The function references the memory at EXP in mode MEM_MODE;
20561 this mode may be BLKmode if no more suitable mode is available. */
20564 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20565 enum machine_mode reg_mode,
20566 neon_builtin_type_mode type_mode)
20568 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20569 tree elem_type, upper_bound, array_type;
20571 /* Work out the size of the register block in bytes. */
20572 reg_size = GET_MODE_SIZE (reg_mode);
20574 /* Work out the size of each vector in bytes. */
20575 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20576 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20578 /* Work out how many vectors there are. */
20579 gcc_assert (reg_size % vector_size == 0);
20580 nvectors = reg_size / vector_size;
20582 /* Work out how many elements are being loaded or stored.
20583 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20584 and memory elements; anything else implies a lane load or store. */
20585 if (mem_mode == reg_mode)
20586 nelems = vector_size * nvectors;
20590 /* Work out the type of each element. */
20591 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20592 elem_type = TREE_TYPE (TREE_TYPE (exp));
20594 /* Create a type that describes the full access. */
20595 upper_bound = build_int_cst (size_type_node, nelems - 1);
20596 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20598 /* Dereference EXP using that type. */
20599 exp = convert (build_pointer_type (array_type), exp);
20600 return fold_build2 (MEM_REF, array_type, exp,
20601 build_int_cst (TREE_TYPE (exp), 0));
20604 /* Expand a Neon builtin. */
20606 arm_expand_neon_args (rtx target, int icode, int have_retval,
20607 neon_builtin_type_mode type_mode,
20612 tree arg[NEON_MAX_BUILTIN_ARGS];
20613 rtx op[NEON_MAX_BUILTIN_ARGS];
20614 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20615 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20616 enum machine_mode other_mode;
20622 || GET_MODE (target) != tmode
20623 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20624 target = gen_reg_rtx (tmode);
20626 va_start (ap, exp);
20630 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20632 if (thisarg == NEON_ARG_STOP)
20636 opno = argc + have_retval;
20637 mode[argc] = insn_data[icode].operand[opno].mode;
20638 arg[argc] = CALL_EXPR_ARG (exp, argc);
20639 if (thisarg == NEON_ARG_MEMORY)
20641 other_mode = insn_data[icode].operand[1 - opno].mode;
20642 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20643 other_mode, type_mode);
20645 op[argc] = expand_normal (arg[argc]);
20649 case NEON_ARG_COPY_TO_REG:
20650 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20651 if (!(*insn_data[icode].operand[opno].predicate)
20652 (op[argc], mode[argc]))
20653 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20656 case NEON_ARG_CONSTANT:
20657 /* FIXME: This error message is somewhat unhelpful. */
20658 if (!(*insn_data[icode].operand[opno].predicate)
20659 (op[argc], mode[argc]))
20660 error ("argument must be a constant");
20663 case NEON_ARG_MEMORY:
20664 gcc_assert (MEM_P (op[argc]));
20665 PUT_MODE (op[argc], mode[argc]);
20666 /* ??? arm_neon.h uses the same built-in functions for signed
20667 and unsigned accesses, casting where necessary. This isn't
20669 set_mem_alias_set (op[argc], 0);
20670 if (!(*insn_data[icode].operand[opno].predicate)
20671 (op[argc], mode[argc]))
20672 op[argc] = (replace_equiv_address
20673 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20676 case NEON_ARG_STOP:
20677 gcc_unreachable ();
20690 pat = GEN_FCN (icode) (target, op[0]);
20694 pat = GEN_FCN (icode) (target, op[0], op[1]);
20698 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20702 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20706 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20710 gcc_unreachable ();
20716 pat = GEN_FCN (icode) (op[0]);
20720 pat = GEN_FCN (icode) (op[0], op[1]);
20724 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20728 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20732 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20736 gcc_unreachable ();
20747 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20748 constants defined per-instruction or per instruction-variant. Instead, the
20749 required info is looked up in the table neon_builtin_data. */
20751 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20753 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20754 neon_itype itype = d->itype;
20755 enum insn_code icode = d->code;
20756 neon_builtin_type_mode type_mode = d->mode;
20763 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20764 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20768 case NEON_SCALARMUL:
20769 case NEON_SCALARMULL:
20770 case NEON_SCALARMULH:
20771 case NEON_SHIFTINSERT:
20772 case NEON_LOGICBINOP:
20773 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20774 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20778 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20779 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20780 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20784 case NEON_SHIFTIMM:
20785 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20786 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20790 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20791 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20795 case NEON_REINTERP:
20796 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20797 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20801 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20802 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20804 case NEON_RESULTPAIR:
20805 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20806 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20810 case NEON_LANEMULL:
20811 case NEON_LANEMULH:
20812 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20813 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20814 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20817 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20818 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20819 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20821 case NEON_SHIFTACC:
20822 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20823 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20824 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20826 case NEON_SCALARMAC:
20827 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20828 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20829 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20833 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20834 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20838 case NEON_LOADSTRUCT:
20839 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20840 NEON_ARG_MEMORY, NEON_ARG_STOP);
20842 case NEON_LOAD1LANE:
20843 case NEON_LOADSTRUCTLANE:
20844 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20845 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20849 case NEON_STORESTRUCT:
20850 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20851 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20853 case NEON_STORE1LANE:
20854 case NEON_STORESTRUCTLANE:
20855 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20856 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20860 gcc_unreachable ();
20863 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20865 neon_reinterpret (rtx dest, rtx src)
20867 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20870 /* Emit code to place a Neon pair result in memory locations (with equal
20873 neon_emit_pair_result_insn (enum machine_mode mode,
20874 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20877 rtx mem = gen_rtx_MEM (mode, destaddr);
20878 rtx tmp1 = gen_reg_rtx (mode);
20879 rtx tmp2 = gen_reg_rtx (mode);
20881 emit_insn (intfn (tmp1, op1, op2, tmp2));
20883 emit_move_insn (mem, tmp1);
20884 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20885 emit_move_insn (mem, tmp2);
20888 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20889 not to early-clobber SRC registers in the process.
20891 We assume that the operands described by SRC and DEST represent a
20892 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20893 number of components into which the copy has been decomposed. */
20895 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20899 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20900 || REGNO (operands[0]) < REGNO (operands[1]))
20902 for (i = 0; i < count; i++)
20904 operands[2 * i] = dest[i];
20905 operands[2 * i + 1] = src[i];
20910 for (i = 0; i < count; i++)
20912 operands[2 * i] = dest[count - i - 1];
20913 operands[2 * i + 1] = src[count - i - 1];
20918 /* Split operands into moves from op[1] + op[2] into op[0]. */
20921 neon_split_vcombine (rtx operands[3])
20923 unsigned int dest = REGNO (operands[0]);
20924 unsigned int src1 = REGNO (operands[1]);
20925 unsigned int src2 = REGNO (operands[2]);
20926 enum machine_mode halfmode = GET_MODE (operands[1]);
20927 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20928 rtx destlo, desthi;
20930 if (src1 == dest && src2 == dest + halfregs)
20932 /* No-op move. Can't split to nothing; emit something. */
20933 emit_note (NOTE_INSN_DELETED);
20937 /* Preserve register attributes for variable tracking. */
20938 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20939 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20940 GET_MODE_SIZE (halfmode));
20942 /* Special case of reversed high/low parts. Use VSWP. */
20943 if (src2 == dest && src1 == dest + halfregs)
20945 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20946 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20947 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20951 if (!reg_overlap_mentioned_p (operands[2], destlo))
20953 /* Try to avoid unnecessary moves if part of the result
20954 is in the right place already. */
20956 emit_move_insn (destlo, operands[1]);
20957 if (src2 != dest + halfregs)
20958 emit_move_insn (desthi, operands[2]);
20962 if (src2 != dest + halfregs)
20963 emit_move_insn (desthi, operands[2]);
20965 emit_move_insn (destlo, operands[1]);
20969 /* Expand an expression EXP that calls a built-in function,
20970 with result going to TARGET if that's convenient
20971 (and in mode MODE if that's convenient).
20972 SUBTARGET may be used as the target for computing one of EXP's operands.
20973 IGNORE is nonzero if the value is to be ignored. */
20976 arm_expand_builtin (tree exp,
20978 rtx subtarget ATTRIBUTE_UNUSED,
20979 enum machine_mode mode ATTRIBUTE_UNUSED,
20980 int ignore ATTRIBUTE_UNUSED)
20982 const struct builtin_description * d;
20983 enum insn_code icode;
20984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20992 int fcode = DECL_FUNCTION_CODE (fndecl);
20994 enum machine_mode tmode;
20995 enum machine_mode mode0;
20996 enum machine_mode mode1;
20997 enum machine_mode mode2;
20999 if (fcode >= ARM_BUILTIN_NEON_BASE)
21000 return arm_expand_neon_builtin (fcode, exp, target);
21004 case ARM_BUILTIN_TEXTRMSB:
21005 case ARM_BUILTIN_TEXTRMUB:
21006 case ARM_BUILTIN_TEXTRMSH:
21007 case ARM_BUILTIN_TEXTRMUH:
21008 case ARM_BUILTIN_TEXTRMSW:
21009 case ARM_BUILTIN_TEXTRMUW:
21010 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21011 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21012 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21013 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21014 : CODE_FOR_iwmmxt_textrmw);
21016 arg0 = CALL_EXPR_ARG (exp, 0);
21017 arg1 = CALL_EXPR_ARG (exp, 1);
21018 op0 = expand_normal (arg0);
21019 op1 = expand_normal (arg1);
21020 tmode = insn_data[icode].operand[0].mode;
21021 mode0 = insn_data[icode].operand[1].mode;
21022 mode1 = insn_data[icode].operand[2].mode;
21024 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21025 op0 = copy_to_mode_reg (mode0, op0);
21026 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21028 /* @@@ better error message */
21029 error ("selector must be an immediate");
21030 return gen_reg_rtx (tmode);
21033 || GET_MODE (target) != tmode
21034 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21035 target = gen_reg_rtx (tmode);
21036 pat = GEN_FCN (icode) (target, op0, op1);
21042 case ARM_BUILTIN_TINSRB:
21043 case ARM_BUILTIN_TINSRH:
21044 case ARM_BUILTIN_TINSRW:
21045 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21046 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21047 : CODE_FOR_iwmmxt_tinsrw);
21048 arg0 = CALL_EXPR_ARG (exp, 0);
21049 arg1 = CALL_EXPR_ARG (exp, 1);
21050 arg2 = CALL_EXPR_ARG (exp, 2);
21051 op0 = expand_normal (arg0);
21052 op1 = expand_normal (arg1);
21053 op2 = expand_normal (arg2);
21054 tmode = insn_data[icode].operand[0].mode;
21055 mode0 = insn_data[icode].operand[1].mode;
21056 mode1 = insn_data[icode].operand[2].mode;
21057 mode2 = insn_data[icode].operand[3].mode;
21059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21060 op0 = copy_to_mode_reg (mode0, op0);
21061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21062 op1 = copy_to_mode_reg (mode1, op1);
21063 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21065 /* @@@ better error message */
21066 error ("selector must be an immediate");
21070 || GET_MODE (target) != tmode
21071 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21072 target = gen_reg_rtx (tmode);
21073 pat = GEN_FCN (icode) (target, op0, op1, op2);
21079 case ARM_BUILTIN_SETWCX:
21080 arg0 = CALL_EXPR_ARG (exp, 0);
21081 arg1 = CALL_EXPR_ARG (exp, 1);
21082 op0 = force_reg (SImode, expand_normal (arg0));
21083 op1 = expand_normal (arg1);
21084 emit_insn (gen_iwmmxt_tmcr (op1, op0));
21087 case ARM_BUILTIN_GETWCX:
21088 arg0 = CALL_EXPR_ARG (exp, 0);
21089 op0 = expand_normal (arg0);
21090 target = gen_reg_rtx (SImode);
21091 emit_insn (gen_iwmmxt_tmrc (target, op0));
21094 case ARM_BUILTIN_WSHUFH:
21095 icode = CODE_FOR_iwmmxt_wshufh;
21096 arg0 = CALL_EXPR_ARG (exp, 0);
21097 arg1 = CALL_EXPR_ARG (exp, 1);
21098 op0 = expand_normal (arg0);
21099 op1 = expand_normal (arg1);
21100 tmode = insn_data[icode].operand[0].mode;
21101 mode1 = insn_data[icode].operand[1].mode;
21102 mode2 = insn_data[icode].operand[2].mode;
21104 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21105 op0 = copy_to_mode_reg (mode1, op0);
21106 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21108 /* @@@ better error message */
21109 error ("mask must be an immediate");
21113 || GET_MODE (target) != tmode
21114 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21115 target = gen_reg_rtx (tmode);
21116 pat = GEN_FCN (icode) (target, op0, op1);
21122 case ARM_BUILTIN_WSADB:
21123 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21124 case ARM_BUILTIN_WSADH:
21125 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21126 case ARM_BUILTIN_WSADBZ:
21127 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21128 case ARM_BUILTIN_WSADHZ:
21129 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21131 /* Several three-argument builtins. */
21132 case ARM_BUILTIN_WMACS:
21133 case ARM_BUILTIN_WMACU:
21134 case ARM_BUILTIN_WALIGN:
21135 case ARM_BUILTIN_TMIA:
21136 case ARM_BUILTIN_TMIAPH:
21137 case ARM_BUILTIN_TMIATT:
21138 case ARM_BUILTIN_TMIATB:
21139 case ARM_BUILTIN_TMIABT:
21140 case ARM_BUILTIN_TMIABB:
21141 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21142 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21143 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21144 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21145 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21146 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21147 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21148 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21149 : CODE_FOR_iwmmxt_walign);
21150 arg0 = CALL_EXPR_ARG (exp, 0);
21151 arg1 = CALL_EXPR_ARG (exp, 1);
21152 arg2 = CALL_EXPR_ARG (exp, 2);
21153 op0 = expand_normal (arg0);
21154 op1 = expand_normal (arg1);
21155 op2 = expand_normal (arg2);
21156 tmode = insn_data[icode].operand[0].mode;
21157 mode0 = insn_data[icode].operand[1].mode;
21158 mode1 = insn_data[icode].operand[2].mode;
21159 mode2 = insn_data[icode].operand[3].mode;
21161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21162 op0 = copy_to_mode_reg (mode0, op0);
21163 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21164 op1 = copy_to_mode_reg (mode1, op1);
21165 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21166 op2 = copy_to_mode_reg (mode2, op2);
21168 || GET_MODE (target) != tmode
21169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21170 target = gen_reg_rtx (tmode);
21171 pat = GEN_FCN (icode) (target, op0, op1, op2);
21177 case ARM_BUILTIN_WZERO:
21178 target = gen_reg_rtx (DImode);
21179 emit_insn (gen_iwmmxt_clrdi (target));
21182 case ARM_BUILTIN_THREAD_POINTER:
21183 return arm_load_tp (target);
21189 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21190 if (d->code == (const enum arm_builtins) fcode)
21191 return arm_expand_binop_builtin (d->icode, exp, target);
21193 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21194 if (d->code == (const enum arm_builtins) fcode)
21195 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21197 /* @@@ Should really do something sensible here. */
21201 /* Return the number (counting from 0) of
21202 the least significant set bit in MASK. */
21205 number_of_first_bit_set (unsigned mask)
21207 return ctz_hwi (mask);
21210 /* Like emit_multi_reg_push, but allowing for a different set of
21211 registers to be described as saved. MASK is the set of registers
21212 to be saved; REAL_REGS is the set of registers to be described as
21213 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21216 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21218 unsigned long regno;
21219 rtx par[10], tmp, reg, insn;
21222 /* Build the parallel of the registers actually being stored. */
21223 for (i = 0; mask; ++i, mask &= mask - 1)
21225 regno = ctz_hwi (mask);
21226 reg = gen_rtx_REG (SImode, regno);
21229 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21231 tmp = gen_rtx_USE (VOIDmode, reg);
21236 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21237 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21238 tmp = gen_frame_mem (BLKmode, tmp);
21239 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21242 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21243 insn = emit_insn (tmp);
21245 /* Always build the stack adjustment note for unwind info. */
21246 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21247 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21250 /* Build the parallel of the registers recorded as saved for unwind. */
21251 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21253 regno = ctz_hwi (real_regs);
21254 reg = gen_rtx_REG (SImode, regno);
21256 tmp = plus_constant (stack_pointer_rtx, j * 4);
21257 tmp = gen_frame_mem (SImode, tmp);
21258 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21259 RTX_FRAME_RELATED_P (tmp) = 1;
21267 RTX_FRAME_RELATED_P (par[0]) = 1;
21268 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21271 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21276 /* Emit code to push or pop registers to or from the stack. F is the
21277 assembly file. MASK is the registers to pop. */
21279 thumb_pop (FILE *f, unsigned long mask)
21282 int lo_mask = mask & 0xFF;
21283 int pushed_words = 0;
21287 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21289 /* Special case. Do not generate a POP PC statement here, do it in
21291 thumb_exit (f, -1);
21295 fprintf (f, "\tpop\t{");
21297 /* Look at the low registers first. */
21298 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21302 asm_fprintf (f, "%r", regno);
21304 if ((lo_mask & ~1) != 0)
21311 if (mask & (1 << PC_REGNUM))
21313 /* Catch popping the PC. */
21314 if (TARGET_INTERWORK || TARGET_BACKTRACE
21315 || crtl->calls_eh_return)
21317 /* The PC is never poped directly, instead
21318 it is popped into r3 and then BX is used. */
21319 fprintf (f, "}\n");
21321 thumb_exit (f, -1);
21330 asm_fprintf (f, "%r", PC_REGNUM);
21334 fprintf (f, "}\n");
21337 /* Generate code to return from a thumb function.
21338 If 'reg_containing_return_addr' is -1, then the return address is
21339 actually on the stack, at the stack pointer. */
21341 thumb_exit (FILE *f, int reg_containing_return_addr)
21343 unsigned regs_available_for_popping;
21344 unsigned regs_to_pop;
21346 unsigned available;
21350 int restore_a4 = FALSE;
21352 /* Compute the registers we need to pop. */
21356 if (reg_containing_return_addr == -1)
21358 regs_to_pop |= 1 << LR_REGNUM;
21362 if (TARGET_BACKTRACE)
21364 /* Restore the (ARM) frame pointer and stack pointer. */
21365 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21369 /* If there is nothing to pop then just emit the BX instruction and
21371 if (pops_needed == 0)
21373 if (crtl->calls_eh_return)
21374 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21376 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21379 /* Otherwise if we are not supporting interworking and we have not created
21380 a backtrace structure and the function was not entered in ARM mode then
21381 just pop the return address straight into the PC. */
21382 else if (!TARGET_INTERWORK
21383 && !TARGET_BACKTRACE
21384 && !is_called_in_ARM_mode (current_function_decl)
21385 && !crtl->calls_eh_return)
21387 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21391 /* Find out how many of the (return) argument registers we can corrupt. */
21392 regs_available_for_popping = 0;
21394 /* If returning via __builtin_eh_return, the bottom three registers
21395 all contain information needed for the return. */
21396 if (crtl->calls_eh_return)
21400 /* If we can deduce the registers used from the function's
21401 return value. This is more reliable that examining
21402 df_regs_ever_live_p () because that will be set if the register is
21403 ever used in the function, not just if the register is used
21404 to hold a return value. */
21406 if (crtl->return_rtx != 0)
21407 mode = GET_MODE (crtl->return_rtx);
21409 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21411 size = GET_MODE_SIZE (mode);
21415 /* In a void function we can use any argument register.
21416 In a function that returns a structure on the stack
21417 we can use the second and third argument registers. */
21418 if (mode == VOIDmode)
21419 regs_available_for_popping =
21420 (1 << ARG_REGISTER (1))
21421 | (1 << ARG_REGISTER (2))
21422 | (1 << ARG_REGISTER (3));
21424 regs_available_for_popping =
21425 (1 << ARG_REGISTER (2))
21426 | (1 << ARG_REGISTER (3));
21428 else if (size <= 4)
21429 regs_available_for_popping =
21430 (1 << ARG_REGISTER (2))
21431 | (1 << ARG_REGISTER (3));
21432 else if (size <= 8)
21433 regs_available_for_popping =
21434 (1 << ARG_REGISTER (3));
21437 /* Match registers to be popped with registers into which we pop them. */
21438 for (available = regs_available_for_popping,
21439 required = regs_to_pop;
21440 required != 0 && available != 0;
21441 available &= ~(available & - available),
21442 required &= ~(required & - required))
21445 /* If we have any popping registers left over, remove them. */
21447 regs_available_for_popping &= ~available;
21449 /* Otherwise if we need another popping register we can use
21450 the fourth argument register. */
21451 else if (pops_needed)
21453 /* If we have not found any free argument registers and
21454 reg a4 contains the return address, we must move it. */
21455 if (regs_available_for_popping == 0
21456 && reg_containing_return_addr == LAST_ARG_REGNUM)
21458 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21459 reg_containing_return_addr = LR_REGNUM;
21461 else if (size > 12)
21463 /* Register a4 is being used to hold part of the return value,
21464 but we have dire need of a free, low register. */
21467 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21470 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21472 /* The fourth argument register is available. */
21473 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21479 /* Pop as many registers as we can. */
21480 thumb_pop (f, regs_available_for_popping);
21482 /* Process the registers we popped. */
21483 if (reg_containing_return_addr == -1)
21485 /* The return address was popped into the lowest numbered register. */
21486 regs_to_pop &= ~(1 << LR_REGNUM);
21488 reg_containing_return_addr =
21489 number_of_first_bit_set (regs_available_for_popping);
21491 /* Remove this register for the mask of available registers, so that
21492 the return address will not be corrupted by further pops. */
21493 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21496 /* If we popped other registers then handle them here. */
21497 if (regs_available_for_popping)
21501 /* Work out which register currently contains the frame pointer. */
21502 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21504 /* Move it into the correct place. */
21505 asm_fprintf (f, "\tmov\t%r, %r\n",
21506 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21508 /* (Temporarily) remove it from the mask of popped registers. */
21509 regs_available_for_popping &= ~(1 << frame_pointer);
21510 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21512 if (regs_available_for_popping)
21516 /* We popped the stack pointer as well,
21517 find the register that contains it. */
21518 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21520 /* Move it into the stack register. */
21521 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21523 /* At this point we have popped all necessary registers, so
21524 do not worry about restoring regs_available_for_popping
21525 to its correct value:
21527 assert (pops_needed == 0)
21528 assert (regs_available_for_popping == (1 << frame_pointer))
21529 assert (regs_to_pop == (1 << STACK_POINTER)) */
21533 /* Since we have just move the popped value into the frame
21534 pointer, the popping register is available for reuse, and
21535 we know that we still have the stack pointer left to pop. */
21536 regs_available_for_popping |= (1 << frame_pointer);
21540 /* If we still have registers left on the stack, but we no longer have
21541 any registers into which we can pop them, then we must move the return
21542 address into the link register and make available the register that
21544 if (regs_available_for_popping == 0 && pops_needed > 0)
21546 regs_available_for_popping |= 1 << reg_containing_return_addr;
21548 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21549 reg_containing_return_addr);
21551 reg_containing_return_addr = LR_REGNUM;
21554 /* If we have registers left on the stack then pop some more.
21555 We know that at most we will want to pop FP and SP. */
21556 if (pops_needed > 0)
21561 thumb_pop (f, regs_available_for_popping);
21563 /* We have popped either FP or SP.
21564 Move whichever one it is into the correct register. */
21565 popped_into = number_of_first_bit_set (regs_available_for_popping);
21566 move_to = number_of_first_bit_set (regs_to_pop);
21568 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21570 regs_to_pop &= ~(1 << move_to);
21575 /* If we still have not popped everything then we must have only
21576 had one register available to us and we are now popping the SP. */
21577 if (pops_needed > 0)
21581 thumb_pop (f, regs_available_for_popping);
21583 popped_into = number_of_first_bit_set (regs_available_for_popping);
21585 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21587 assert (regs_to_pop == (1 << STACK_POINTER))
21588 assert (pops_needed == 1)
21592 /* If necessary restore the a4 register. */
21595 if (reg_containing_return_addr != LR_REGNUM)
21597 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21598 reg_containing_return_addr = LR_REGNUM;
21601 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21604 if (crtl->calls_eh_return)
21605 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21607 /* Return to caller. */
21608 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21611 /* Scan INSN just before assembler is output for it.
21612 For Thumb-1, we track the status of the condition codes; this
21613 information is used in the cbranchsi4_insn pattern. */
21615 thumb1_final_prescan_insn (rtx insn)
21617 if (flag_print_asm_name)
21618 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21619 INSN_ADDRESSES (INSN_UID (insn)));
21620 /* Don't overwrite the previous setter when we get to a cbranch. */
21621 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21623 enum attr_conds conds;
21625 if (cfun->machine->thumb1_cc_insn)
21627 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21628 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21631 conds = get_attr_conds (insn);
21632 if (conds == CONDS_SET)
21634 rtx set = single_set (insn);
21635 cfun->machine->thumb1_cc_insn = insn;
21636 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21637 cfun->machine->thumb1_cc_op1 = const0_rtx;
21638 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21639 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21641 rtx src1 = XEXP (SET_SRC (set), 1);
21642 if (src1 == const0_rtx)
21643 cfun->machine->thumb1_cc_mode = CCmode;
21646 else if (conds != CONDS_NOCOND)
21647 cfun->machine->thumb1_cc_insn = NULL_RTX;
21652 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21654 unsigned HOST_WIDE_INT mask = 0xff;
21657 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21658 if (val == 0) /* XXX */
21661 for (i = 0; i < 25; i++)
21662 if ((val & (mask << i)) == val)
21668 /* Returns nonzero if the current function contains,
21669 or might contain a far jump. */
21671 thumb_far_jump_used_p (void)
21675 /* This test is only important for leaf functions. */
21676 /* assert (!leaf_function_p ()); */
21678 /* If we have already decided that far jumps may be used,
21679 do not bother checking again, and always return true even if
21680 it turns out that they are not being used. Once we have made
21681 the decision that far jumps are present (and that hence the link
21682 register will be pushed onto the stack) we cannot go back on it. */
21683 if (cfun->machine->far_jump_used)
21686 /* If this function is not being called from the prologue/epilogue
21687 generation code then it must be being called from the
21688 INITIAL_ELIMINATION_OFFSET macro. */
21689 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21691 /* In this case we know that we are being asked about the elimination
21692 of the arg pointer register. If that register is not being used,
21693 then there are no arguments on the stack, and we do not have to
21694 worry that a far jump might force the prologue to push the link
21695 register, changing the stack offsets. In this case we can just
21696 return false, since the presence of far jumps in the function will
21697 not affect stack offsets.
21699 If the arg pointer is live (or if it was live, but has now been
21700 eliminated and so set to dead) then we do have to test to see if
21701 the function might contain a far jump. This test can lead to some
21702 false negatives, since before reload is completed, then length of
21703 branch instructions is not known, so gcc defaults to returning their
21704 longest length, which in turn sets the far jump attribute to true.
21706 A false negative will not result in bad code being generated, but it
21707 will result in a needless push and pop of the link register. We
21708 hope that this does not occur too often.
21710 If we need doubleword stack alignment this could affect the other
21711 elimination offsets so we can't risk getting it wrong. */
21712 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21713 cfun->machine->arg_pointer_live = 1;
21714 else if (!cfun->machine->arg_pointer_live)
21718 /* Check to see if the function contains a branch
21719 insn with the far jump attribute set. */
21720 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21722 if (GET_CODE (insn) == JUMP_INSN
21723 /* Ignore tablejump patterns. */
21724 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21725 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21726 && get_attr_far_jump (insn) == FAR_JUMP_YES
21729 /* Record the fact that we have decided that
21730 the function does use far jumps. */
21731 cfun->machine->far_jump_used = 1;
21739 /* Return nonzero if FUNC must be entered in ARM mode. */
21741 is_called_in_ARM_mode (tree func)
21743 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21745 /* Ignore the problem about functions whose address is taken. */
21746 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21750 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21756 /* Given the stack offsets and register mask in OFFSETS, decide how
21757 many additional registers to push instead of subtracting a constant
21758 from SP. For epilogues the principle is the same except we use pop.
21759 FOR_PROLOGUE indicates which we're generating. */
21761 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21763 HOST_WIDE_INT amount;
21764 unsigned long live_regs_mask = offsets->saved_regs_mask;
21765 /* Extract a mask of the ones we can give to the Thumb's push/pop
21767 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21768 /* Then count how many other high registers will need to be pushed. */
21769 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21770 int n_free, reg_base;
21772 if (!for_prologue && frame_pointer_needed)
21773 amount = offsets->locals_base - offsets->saved_regs;
21775 amount = offsets->outgoing_args - offsets->saved_regs;
21777 /* If the stack frame size is 512 exactly, we can save one load
21778 instruction, which should make this a win even when optimizing
21780 if (!optimize_size && amount != 512)
21783 /* Can't do this if there are high registers to push. */
21784 if (high_regs_pushed != 0)
21787 /* Shouldn't do it in the prologue if no registers would normally
21788 be pushed at all. In the epilogue, also allow it if we'll have
21789 a pop insn for the PC. */
21792 || TARGET_BACKTRACE
21793 || (live_regs_mask & 1 << LR_REGNUM) == 0
21794 || TARGET_INTERWORK
21795 || crtl->args.pretend_args_size != 0))
21798 /* Don't do this if thumb_expand_prologue wants to emit instructions
21799 between the push and the stack frame allocation. */
21801 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21802 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21809 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21810 live_regs_mask >>= reg_base;
21813 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21814 && (for_prologue || call_used_regs[reg_base + n_free]))
21816 live_regs_mask >>= 1;
21822 gcc_assert (amount / 4 * 4 == amount);
21824 if (amount >= 512 && (amount - n_free * 4) < 512)
21825 return (amount - 508) / 4;
21826 if (amount <= n_free * 4)
21831 /* The bits which aren't usefully expanded as rtl. */
21833 thumb_unexpanded_epilogue (void)
21835 arm_stack_offsets *offsets;
21837 unsigned long live_regs_mask = 0;
21838 int high_regs_pushed = 0;
21840 int had_to_push_lr;
21843 if (cfun->machine->return_used_this_function != 0)
21846 if (IS_NAKED (arm_current_func_type ()))
21849 offsets = arm_get_frame_offsets ();
21850 live_regs_mask = offsets->saved_regs_mask;
21851 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21853 /* If we can deduce the registers used from the function's return value.
21854 This is more reliable that examining df_regs_ever_live_p () because that
21855 will be set if the register is ever used in the function, not just if
21856 the register is used to hold a return value. */
21857 size = arm_size_return_regs ();
21859 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21862 unsigned long extra_mask = (1 << extra_pop) - 1;
21863 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21867 /* The prolog may have pushed some high registers to use as
21868 work registers. e.g. the testsuite file:
21869 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21870 compiles to produce:
21871 push {r4, r5, r6, r7, lr}
21875 as part of the prolog. We have to undo that pushing here. */
21877 if (high_regs_pushed)
21879 unsigned long mask = live_regs_mask & 0xff;
21882 /* The available low registers depend on the size of the value we are
21890 /* Oh dear! We have no low registers into which we can pop
21893 ("no low registers available for popping high registers");
21895 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21896 if (live_regs_mask & (1 << next_hi_reg))
21899 while (high_regs_pushed)
21901 /* Find lo register(s) into which the high register(s) can
21903 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21905 if (mask & (1 << regno))
21906 high_regs_pushed--;
21907 if (high_regs_pushed == 0)
21911 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21913 /* Pop the values into the low register(s). */
21914 thumb_pop (asm_out_file, mask);
21916 /* Move the value(s) into the high registers. */
21917 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21919 if (mask & (1 << regno))
21921 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21924 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21925 if (live_regs_mask & (1 << next_hi_reg))
21930 live_regs_mask &= ~0x0f00;
21933 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21934 live_regs_mask &= 0xff;
21936 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21938 /* Pop the return address into the PC. */
21939 if (had_to_push_lr)
21940 live_regs_mask |= 1 << PC_REGNUM;
21942 /* Either no argument registers were pushed or a backtrace
21943 structure was created which includes an adjusted stack
21944 pointer, so just pop everything. */
21945 if (live_regs_mask)
21946 thumb_pop (asm_out_file, live_regs_mask);
21948 /* We have either just popped the return address into the
21949 PC or it is was kept in LR for the entire function.
21950 Note that thumb_pop has already called thumb_exit if the
21951 PC was in the list. */
21952 if (!had_to_push_lr)
21953 thumb_exit (asm_out_file, LR_REGNUM);
21957 /* Pop everything but the return address. */
21958 if (live_regs_mask)
21959 thumb_pop (asm_out_file, live_regs_mask);
21961 if (had_to_push_lr)
21965 /* We have no free low regs, so save one. */
21966 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21970 /* Get the return address into a temporary register. */
21971 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21975 /* Move the return address to lr. */
21976 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21978 /* Restore the low register. */
21979 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21984 regno = LAST_ARG_REGNUM;
21989 /* Remove the argument registers that were pushed onto the stack. */
21990 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21991 SP_REGNUM, SP_REGNUM,
21992 crtl->args.pretend_args_size);
21994 thumb_exit (asm_out_file, regno);
22000 /* Functions to save and restore machine-specific function data. */
22001 static struct machine_function *
22002 arm_init_machine_status (void)
22004 struct machine_function *machine;
22005 machine = ggc_alloc_cleared_machine_function ();
22007 #if ARM_FT_UNKNOWN != 0
22008 machine->func_type = ARM_FT_UNKNOWN;
22013 /* Return an RTX indicating where the return address to the
22014 calling function can be found. */
22016 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22021 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22024 /* Do anything needed before RTL is emitted for each function. */
22026 arm_init_expanders (void)
22028 /* Arrange to initialize and mark the machine per-function status. */
22029 init_machine_status = arm_init_machine_status;
22031 /* This is to stop the combine pass optimizing away the alignment
22032 adjustment of va_arg. */
22033 /* ??? It is claimed that this should not be necessary. */
22035 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22039 /* Like arm_compute_initial_elimination offset. Simpler because there
22040 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22041 to point at the base of the local variables after static stack
22042 space for a function has been allocated. */
22045 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22047 arm_stack_offsets *offsets;
22049 offsets = arm_get_frame_offsets ();
22053 case ARG_POINTER_REGNUM:
22056 case STACK_POINTER_REGNUM:
22057 return offsets->outgoing_args - offsets->saved_args;
22059 case FRAME_POINTER_REGNUM:
22060 return offsets->soft_frame - offsets->saved_args;
22062 case ARM_HARD_FRAME_POINTER_REGNUM:
22063 return offsets->saved_regs - offsets->saved_args;
22065 case THUMB_HARD_FRAME_POINTER_REGNUM:
22066 return offsets->locals_base - offsets->saved_args;
22069 gcc_unreachable ();
22073 case FRAME_POINTER_REGNUM:
22076 case STACK_POINTER_REGNUM:
22077 return offsets->outgoing_args - offsets->soft_frame;
22079 case ARM_HARD_FRAME_POINTER_REGNUM:
22080 return offsets->saved_regs - offsets->soft_frame;
22082 case THUMB_HARD_FRAME_POINTER_REGNUM:
22083 return offsets->locals_base - offsets->soft_frame;
22086 gcc_unreachable ();
22091 gcc_unreachable ();
22095 /* Generate the function's prologue. */
22098 thumb1_expand_prologue (void)
22102 HOST_WIDE_INT amount;
22103 arm_stack_offsets *offsets;
22104 unsigned long func_type;
22106 unsigned long live_regs_mask;
22107 unsigned long l_mask;
22108 unsigned high_regs_pushed = 0;
22110 func_type = arm_current_func_type ();
22112 /* Naked functions don't have prologues. */
22113 if (IS_NAKED (func_type))
22116 if (IS_INTERRUPT (func_type))
22118 error ("interrupt Service Routines cannot be coded in Thumb mode");
22122 if (is_called_in_ARM_mode (current_function_decl))
22123 emit_insn (gen_prologue_thumb1_interwork ());
22125 offsets = arm_get_frame_offsets ();
22126 live_regs_mask = offsets->saved_regs_mask;
22128 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22129 l_mask = live_regs_mask & 0x40ff;
22130 /* Then count how many other high registers will need to be pushed. */
22131 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22133 if (crtl->args.pretend_args_size)
22135 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22137 if (cfun->machine->uses_anonymous_args)
22139 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22140 unsigned long mask;
22142 mask = 1ul << (LAST_ARG_REGNUM + 1);
22143 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22145 insn = thumb1_emit_multi_reg_push (mask, 0);
22149 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22150 stack_pointer_rtx, x));
22152 RTX_FRAME_RELATED_P (insn) = 1;
22155 if (TARGET_BACKTRACE)
22157 HOST_WIDE_INT offset = 0;
22158 unsigned work_register;
22159 rtx work_reg, x, arm_hfp_rtx;
22161 /* We have been asked to create a stack backtrace structure.
22162 The code looks like this:
22166 0 sub SP, #16 Reserve space for 4 registers.
22167 2 push {R7} Push low registers.
22168 4 add R7, SP, #20 Get the stack pointer before the push.
22169 6 str R7, [SP, #8] Store the stack pointer
22170 (before reserving the space).
22171 8 mov R7, PC Get hold of the start of this code + 12.
22172 10 str R7, [SP, #16] Store it.
22173 12 mov R7, FP Get hold of the current frame pointer.
22174 14 str R7, [SP, #4] Store it.
22175 16 mov R7, LR Get hold of the current return address.
22176 18 str R7, [SP, #12] Store it.
22177 20 add R7, SP, #16 Point at the start of the
22178 backtrace structure.
22179 22 mov FP, R7 Put this value into the frame pointer. */
22181 work_register = thumb_find_work_register (live_regs_mask);
22182 work_reg = gen_rtx_REG (SImode, work_register);
22183 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22185 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22186 stack_pointer_rtx, GEN_INT (-16)));
22187 RTX_FRAME_RELATED_P (insn) = 1;
22191 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22192 RTX_FRAME_RELATED_P (insn) = 1;
22194 offset = bit_count (l_mask) * UNITS_PER_WORD;
22197 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22198 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22200 x = plus_constant (stack_pointer_rtx, offset + 4);
22201 x = gen_frame_mem (SImode, x);
22202 emit_move_insn (x, work_reg);
22204 /* Make sure that the instruction fetching the PC is in the right place
22205 to calculate "start of backtrace creation code + 12". */
22206 /* ??? The stores using the common WORK_REG ought to be enough to
22207 prevent the scheduler from doing anything weird. Failing that
22208 we could always move all of the following into an UNSPEC_VOLATILE. */
22211 x = gen_rtx_REG (SImode, PC_REGNUM);
22212 emit_move_insn (work_reg, x);
22214 x = plus_constant (stack_pointer_rtx, offset + 12);
22215 x = gen_frame_mem (SImode, x);
22216 emit_move_insn (x, work_reg);
22218 emit_move_insn (work_reg, arm_hfp_rtx);
22220 x = plus_constant (stack_pointer_rtx, offset);
22221 x = gen_frame_mem (SImode, x);
22222 emit_move_insn (x, work_reg);
22226 emit_move_insn (work_reg, arm_hfp_rtx);
22228 x = plus_constant (stack_pointer_rtx, offset);
22229 x = gen_frame_mem (SImode, x);
22230 emit_move_insn (x, work_reg);
22232 x = gen_rtx_REG (SImode, PC_REGNUM);
22233 emit_move_insn (work_reg, x);
22235 x = plus_constant (stack_pointer_rtx, offset + 12);
22236 x = gen_frame_mem (SImode, x);
22237 emit_move_insn (x, work_reg);
22240 x = gen_rtx_REG (SImode, LR_REGNUM);
22241 emit_move_insn (work_reg, x);
22243 x = plus_constant (stack_pointer_rtx, offset + 8);
22244 x = gen_frame_mem (SImode, x);
22245 emit_move_insn (x, work_reg);
22247 x = GEN_INT (offset + 12);
22248 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22250 emit_move_insn (arm_hfp_rtx, work_reg);
22252 /* Optimization: If we are not pushing any low registers but we are going
22253 to push some high registers then delay our first push. This will just
22254 be a push of LR and we can combine it with the push of the first high
22256 else if ((l_mask & 0xff) != 0
22257 || (high_regs_pushed == 0 && l_mask))
22259 unsigned long mask = l_mask;
22260 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22261 insn = thumb1_emit_multi_reg_push (mask, mask);
22262 RTX_FRAME_RELATED_P (insn) = 1;
22265 if (high_regs_pushed)
22267 unsigned pushable_regs;
22268 unsigned next_hi_reg;
22270 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22271 if (live_regs_mask & (1 << next_hi_reg))
22274 pushable_regs = l_mask & 0xff;
22276 if (pushable_regs == 0)
22277 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22279 while (high_regs_pushed > 0)
22281 unsigned long real_regs_mask = 0;
22283 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22285 if (pushable_regs & (1 << regno))
22287 emit_move_insn (gen_rtx_REG (SImode, regno),
22288 gen_rtx_REG (SImode, next_hi_reg));
22290 high_regs_pushed --;
22291 real_regs_mask |= (1 << next_hi_reg);
22293 if (high_regs_pushed)
22295 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22297 if (live_regs_mask & (1 << next_hi_reg))
22302 pushable_regs &= ~((1 << regno) - 1);
22308 /* If we had to find a work register and we have not yet
22309 saved the LR then add it to the list of regs to push. */
22310 if (l_mask == (1 << LR_REGNUM))
22312 pushable_regs |= l_mask;
22313 real_regs_mask |= l_mask;
22317 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22318 RTX_FRAME_RELATED_P (insn) = 1;
22322 /* Load the pic register before setting the frame pointer,
22323 so we can use r7 as a temporary work register. */
22324 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22325 arm_load_pic_register (live_regs_mask);
22327 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22328 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22329 stack_pointer_rtx);
22331 if (flag_stack_usage_info)
22332 current_function_static_stack_size
22333 = offsets->outgoing_args - offsets->saved_args;
22335 amount = offsets->outgoing_args - offsets->saved_regs;
22336 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22341 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22342 GEN_INT (- amount)));
22343 RTX_FRAME_RELATED_P (insn) = 1;
22349 /* The stack decrement is too big for an immediate value in a single
22350 insn. In theory we could issue multiple subtracts, but after
22351 three of them it becomes more space efficient to place the full
22352 value in the constant pool and load into a register. (Also the
22353 ARM debugger really likes to see only one stack decrement per
22354 function). So instead we look for a scratch register into which
22355 we can load the decrement, and then we subtract this from the
22356 stack pointer. Unfortunately on the thumb the only available
22357 scratch registers are the argument registers, and we cannot use
22358 these as they may hold arguments to the function. Instead we
22359 attempt to locate a call preserved register which is used by this
22360 function. If we can find one, then we know that it will have
22361 been pushed at the start of the prologue and so we can corrupt
22363 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22364 if (live_regs_mask & (1 << regno))
22367 gcc_assert(regno <= LAST_LO_REGNUM);
22369 reg = gen_rtx_REG (SImode, regno);
22371 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22373 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22374 stack_pointer_rtx, reg));
22376 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22377 plus_constant (stack_pointer_rtx,
22379 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22380 RTX_FRAME_RELATED_P (insn) = 1;
22384 if (frame_pointer_needed)
22385 thumb_set_frame_pointer (offsets);
22387 /* If we are profiling, make sure no instructions are scheduled before
22388 the call to mcount. Similarly if the user has requested no
22389 scheduling in the prolog. Similarly if we want non-call exceptions
22390 using the EABI unwinder, to prevent faulting instructions from being
22391 swapped with a stack adjustment. */
22392 if (crtl->profile || !TARGET_SCHED_PROLOG
22393 || (arm_except_unwind_info (&global_options) == UI_TARGET
22394 && cfun->can_throw_non_call_exceptions))
22395 emit_insn (gen_blockage ());
22397 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22398 if (live_regs_mask & 0xff)
22399 cfun->machine->lr_save_eliminated = 0;
22404 thumb1_expand_epilogue (void)
22406 HOST_WIDE_INT amount;
22407 arm_stack_offsets *offsets;
22410 /* Naked functions don't have prologues. */
22411 if (IS_NAKED (arm_current_func_type ()))
22414 offsets = arm_get_frame_offsets ();
22415 amount = offsets->outgoing_args - offsets->saved_regs;
22417 if (frame_pointer_needed)
22419 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22420 amount = offsets->locals_base - offsets->saved_regs;
22422 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22424 gcc_assert (amount >= 0);
22427 emit_insn (gen_blockage ());
22430 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22431 GEN_INT (amount)));
22434 /* r3 is always free in the epilogue. */
22435 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22437 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22438 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22442 /* Emit a USE (stack_pointer_rtx), so that
22443 the stack adjustment will not be deleted. */
22444 emit_insn (gen_prologue_use (stack_pointer_rtx));
22446 if (crtl->profile || !TARGET_SCHED_PROLOG)
22447 emit_insn (gen_blockage ());
22449 /* Emit a clobber for each insn that will be restored in the epilogue,
22450 so that flow2 will get register lifetimes correct. */
22451 for (regno = 0; regno < 13; regno++)
22452 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22453 emit_clobber (gen_rtx_REG (SImode, regno));
22455 if (! df_regs_ever_live_p (LR_REGNUM))
22456 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22459 /* Implementation of insn prologue_thumb1_interwork. This is the first
22460 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22463 thumb1_output_interwork (void)
22466 FILE *f = asm_out_file;
22468 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22469 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22471 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22473 /* Generate code sequence to switch us into Thumb mode. */
22474 /* The .code 32 directive has already been emitted by
22475 ASM_DECLARE_FUNCTION_NAME. */
22476 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22477 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22479 /* Generate a label, so that the debugger will notice the
22480 change in instruction sets. This label is also used by
22481 the assembler to bypass the ARM code when this function
22482 is called from a Thumb encoded function elsewhere in the
22483 same file. Hence the definition of STUB_NAME here must
22484 agree with the definition in gas/config/tc-arm.c. */
22486 #define STUB_NAME ".real_start_of"
22488 fprintf (f, "\t.code\t16\n");
22490 if (arm_dllexport_name_p (name))
22491 name = arm_strip_name_encoding (name);
22493 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22494 fprintf (f, "\t.thumb_func\n");
22495 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22500 /* Handle the case of a double word load into a low register from
22501 a computed memory address. The computed address may involve a
22502 register which is overwritten by the load. */
22504 thumb_load_double_from_address (rtx *operands)
22512 gcc_assert (GET_CODE (operands[0]) == REG);
22513 gcc_assert (GET_CODE (operands[1]) == MEM);
22515 /* Get the memory address. */
22516 addr = XEXP (operands[1], 0);
22518 /* Work out how the memory address is computed. */
22519 switch (GET_CODE (addr))
22522 operands[2] = adjust_address (operands[1], SImode, 4);
22524 if (REGNO (operands[0]) == REGNO (addr))
22526 output_asm_insn ("ldr\t%H0, %2", operands);
22527 output_asm_insn ("ldr\t%0, %1", operands);
22531 output_asm_insn ("ldr\t%0, %1", operands);
22532 output_asm_insn ("ldr\t%H0, %2", operands);
22537 /* Compute <address> + 4 for the high order load. */
22538 operands[2] = adjust_address (operands[1], SImode, 4);
22540 output_asm_insn ("ldr\t%0, %1", operands);
22541 output_asm_insn ("ldr\t%H0, %2", operands);
22545 arg1 = XEXP (addr, 0);
22546 arg2 = XEXP (addr, 1);
22548 if (CONSTANT_P (arg1))
22549 base = arg2, offset = arg1;
22551 base = arg1, offset = arg2;
22553 gcc_assert (GET_CODE (base) == REG);
22555 /* Catch the case of <address> = <reg> + <reg> */
22556 if (GET_CODE (offset) == REG)
22558 int reg_offset = REGNO (offset);
22559 int reg_base = REGNO (base);
22560 int reg_dest = REGNO (operands[0]);
22562 /* Add the base and offset registers together into the
22563 higher destination register. */
22564 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22565 reg_dest + 1, reg_base, reg_offset);
22567 /* Load the lower destination register from the address in
22568 the higher destination register. */
22569 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22570 reg_dest, reg_dest + 1);
22572 /* Load the higher destination register from its own address
22574 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22575 reg_dest + 1, reg_dest + 1);
22579 /* Compute <address> + 4 for the high order load. */
22580 operands[2] = adjust_address (operands[1], SImode, 4);
22582 /* If the computed address is held in the low order register
22583 then load the high order register first, otherwise always
22584 load the low order register first. */
22585 if (REGNO (operands[0]) == REGNO (base))
22587 output_asm_insn ("ldr\t%H0, %2", operands);
22588 output_asm_insn ("ldr\t%0, %1", operands);
22592 output_asm_insn ("ldr\t%0, %1", operands);
22593 output_asm_insn ("ldr\t%H0, %2", operands);
22599 /* With no registers to worry about we can just load the value
22601 operands[2] = adjust_address (operands[1], SImode, 4);
22603 output_asm_insn ("ldr\t%H0, %2", operands);
22604 output_asm_insn ("ldr\t%0, %1", operands);
22608 gcc_unreachable ();
22615 thumb_output_move_mem_multiple (int n, rtx *operands)
22622 if (REGNO (operands[4]) > REGNO (operands[5]))
22625 operands[4] = operands[5];
22628 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22629 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22633 if (REGNO (operands[4]) > REGNO (operands[5]))
22636 operands[4] = operands[5];
22639 if (REGNO (operands[5]) > REGNO (operands[6]))
22642 operands[5] = operands[6];
22645 if (REGNO (operands[4]) > REGNO (operands[5]))
22648 operands[4] = operands[5];
22652 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22653 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22657 gcc_unreachable ();
22663 /* Output a call-via instruction for thumb state. */
22665 thumb_call_via_reg (rtx reg)
22667 int regno = REGNO (reg);
22670 gcc_assert (regno < LR_REGNUM);
22672 /* If we are in the normal text section we can use a single instance
22673 per compilation unit. If we are doing function sections, then we need
22674 an entry per section, since we can't rely on reachability. */
22675 if (in_section == text_section)
22677 thumb_call_reg_needed = 1;
22679 if (thumb_call_via_label[regno] == NULL)
22680 thumb_call_via_label[regno] = gen_label_rtx ();
22681 labelp = thumb_call_via_label + regno;
22685 if (cfun->machine->call_via[regno] == NULL)
22686 cfun->machine->call_via[regno] = gen_label_rtx ();
22687 labelp = cfun->machine->call_via + regno;
22690 output_asm_insn ("bl\t%a0", labelp);
22694 /* Routines for generating rtl. */
22696 thumb_expand_movmemqi (rtx *operands)
22698 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22699 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22700 HOST_WIDE_INT len = INTVAL (operands[2]);
22701 HOST_WIDE_INT offset = 0;
22705 emit_insn (gen_movmem12b (out, in, out, in));
22711 emit_insn (gen_movmem8b (out, in, out, in));
22717 rtx reg = gen_reg_rtx (SImode);
22718 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22719 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22726 rtx reg = gen_reg_rtx (HImode);
22727 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22728 plus_constant (in, offset))));
22729 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22737 rtx reg = gen_reg_rtx (QImode);
22738 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22739 plus_constant (in, offset))));
22740 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22746 thumb_reload_out_hi (rtx *operands)
22748 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22751 /* Handle reading a half-word from memory during reload. */
22753 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22755 gcc_unreachable ();
22758 /* Return the length of a function name prefix
22759 that starts with the character 'c'. */
22761 arm_get_strip_length (int c)
22765 ARM_NAME_ENCODING_LENGTHS
22770 /* Return a pointer to a function's name with any
22771 and all prefix encodings stripped from it. */
22773 arm_strip_name_encoding (const char *name)
22777 while ((skip = arm_get_strip_length (* name)))
22783 /* If there is a '*' anywhere in the name's prefix, then
22784 emit the stripped name verbatim, otherwise prepend an
22785 underscore if leading underscores are being used. */
22787 arm_asm_output_labelref (FILE *stream, const char *name)
22792 while ((skip = arm_get_strip_length (* name)))
22794 verbatim |= (*name == '*');
22799 fputs (name, stream);
22801 asm_fprintf (stream, "%U%s", name);
22805 arm_file_start (void)
22809 if (TARGET_UNIFIED_ASM)
22810 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22814 const char *fpu_name;
22815 if (arm_selected_arch)
22816 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22817 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22818 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22820 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22822 if (TARGET_SOFT_FLOAT)
22825 fpu_name = "softvfp";
22827 fpu_name = "softfpa";
22831 fpu_name = arm_fpu_desc->name;
22832 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22834 if (TARGET_HARD_FLOAT)
22835 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22836 if (TARGET_HARD_FLOAT_ABI)
22837 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22840 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22842 /* Some of these attributes only apply when the corresponding features
22843 are used. However we don't have any easy way of figuring this out.
22844 Conservatively record the setting that would have been used. */
22846 if (flag_rounding_math)
22847 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22849 if (!flag_unsafe_math_optimizations)
22851 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22852 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22854 if (flag_signaling_nans)
22855 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22857 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22858 flag_finite_math_only ? 1 : 3);
22860 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22861 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22862 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22864 /* Tag_ABI_optimization_goals. */
22867 else if (optimize >= 2)
22873 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22875 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22877 if (arm_fp16_format)
22878 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22880 if (arm_lang_output_object_attributes_hook)
22881 arm_lang_output_object_attributes_hook();
22884 default_file_start ();
22888 arm_file_end (void)
22892 if (NEED_INDICATE_EXEC_STACK)
22893 /* Add .note.GNU-stack. */
22894 file_end_indicate_exec_stack ();
22896 if (! thumb_call_reg_needed)
22899 switch_to_section (text_section);
22900 asm_fprintf (asm_out_file, "\t.code 16\n");
22901 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22903 for (regno = 0; regno < LR_REGNUM; regno++)
22905 rtx label = thumb_call_via_label[regno];
22909 targetm.asm_out.internal_label (asm_out_file, "L",
22910 CODE_LABEL_NUMBER (label));
22911 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22917 /* Symbols in the text segment can be accessed without indirecting via the
22918 constant pool; it may take an extra binary operation, but this is still
22919 faster than indirecting via memory. Don't do this when not optimizing,
22920 since we won't be calculating al of the offsets necessary to do this
22924 arm_encode_section_info (tree decl, rtx rtl, int first)
22926 if (optimize > 0 && TREE_CONSTANT (decl))
22927 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22929 default_encode_section_info (decl, rtl, first);
22931 #endif /* !ARM_PE */
22934 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22936 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22937 && !strcmp (prefix, "L"))
22939 arm_ccfsm_state = 0;
22940 arm_target_insn = NULL;
22942 default_internal_label (stream, prefix, labelno);
22945 /* Output code to add DELTA to the first argument, and then jump
22946 to FUNCTION. Used for C++ multiple inheritance. */
22948 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22949 HOST_WIDE_INT delta,
22950 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22953 static int thunk_label = 0;
22956 int mi_delta = delta;
22957 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22959 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22962 mi_delta = - mi_delta;
22966 int labelno = thunk_label++;
22967 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22968 /* Thunks are entered in arm mode when avaiable. */
22969 if (TARGET_THUMB1_ONLY)
22971 /* push r3 so we can use it as a temporary. */
22972 /* TODO: Omit this save if r3 is not used. */
22973 fputs ("\tpush {r3}\n", file);
22974 fputs ("\tldr\tr3, ", file);
22978 fputs ("\tldr\tr12, ", file);
22980 assemble_name (file, label);
22981 fputc ('\n', file);
22984 /* If we are generating PIC, the ldr instruction below loads
22985 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22986 the address of the add + 8, so we have:
22988 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22991 Note that we have "+ 1" because some versions of GNU ld
22992 don't set the low bit of the result for R_ARM_REL32
22993 relocations against thumb function symbols.
22994 On ARMv6M this is +4, not +8. */
22995 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22996 assemble_name (file, labelpc);
22997 fputs (":\n", file);
22998 if (TARGET_THUMB1_ONLY)
23000 /* This is 2 insns after the start of the thunk, so we know it
23001 is 4-byte aligned. */
23002 fputs ("\tadd\tr3, pc, r3\n", file);
23003 fputs ("\tmov r12, r3\n", file);
23006 fputs ("\tadd\tr12, pc, r12\n", file);
23008 else if (TARGET_THUMB1_ONLY)
23009 fputs ("\tmov r12, r3\n", file);
23011 if (TARGET_THUMB1_ONLY)
23013 if (mi_delta > 255)
23015 fputs ("\tldr\tr3, ", file);
23016 assemble_name (file, label);
23017 fputs ("+4\n", file);
23018 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23019 mi_op, this_regno, this_regno);
23021 else if (mi_delta != 0)
23023 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23024 mi_op, this_regno, this_regno,
23030 /* TODO: Use movw/movt for large constants when available. */
23031 while (mi_delta != 0)
23033 if ((mi_delta & (3 << shift)) == 0)
23037 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23038 mi_op, this_regno, this_regno,
23039 mi_delta & (0xff << shift));
23040 mi_delta &= ~(0xff << shift);
23047 if (TARGET_THUMB1_ONLY)
23048 fputs ("\tpop\t{r3}\n", file);
23050 fprintf (file, "\tbx\tr12\n");
23051 ASM_OUTPUT_ALIGN (file, 2);
23052 assemble_name (file, label);
23053 fputs (":\n", file);
23056 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23057 rtx tem = XEXP (DECL_RTL (function), 0);
23058 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23059 tem = gen_rtx_MINUS (GET_MODE (tem),
23061 gen_rtx_SYMBOL_REF (Pmode,
23062 ggc_strdup (labelpc)));
23063 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23066 /* Output ".word .LTHUNKn". */
23067 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23069 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23070 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23074 fputs ("\tb\t", file);
23075 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23076 if (NEED_PLT_RELOC)
23077 fputs ("(PLT)", file);
23078 fputc ('\n', file);
23083 arm_emit_vector_const (FILE *file, rtx x)
23086 const char * pattern;
23088 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23090 switch (GET_MODE (x))
23092 case V2SImode: pattern = "%08x"; break;
23093 case V4HImode: pattern = "%04x"; break;
23094 case V8QImode: pattern = "%02x"; break;
23095 default: gcc_unreachable ();
23098 fprintf (file, "0x");
23099 for (i = CONST_VECTOR_NUNITS (x); i--;)
23103 element = CONST_VECTOR_ELT (x, i);
23104 fprintf (file, pattern, INTVAL (element));
23110 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23111 HFmode constant pool entries are actually loaded with ldr. */
23113 arm_emit_fp16_const (rtx c)
23118 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23119 bits = real_to_target (NULL, &r, HFmode);
23120 if (WORDS_BIG_ENDIAN)
23121 assemble_zeros (2);
23122 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23123 if (!WORDS_BIG_ENDIAN)
23124 assemble_zeros (2);
23128 arm_output_load_gr (rtx *operands)
23135 if (GET_CODE (operands [1]) != MEM
23136 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23137 || GET_CODE (reg = XEXP (sum, 0)) != REG
23138 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23139 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23140 return "wldrw%?\t%0, %1";
23142 /* Fix up an out-of-range load of a GR register. */
23143 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23144 wcgr = operands[0];
23146 output_asm_insn ("ldr%?\t%0, %1", operands);
23148 operands[0] = wcgr;
23150 output_asm_insn ("tmcr%?\t%0, %1", operands);
23151 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23156 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23158 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23159 named arg and all anonymous args onto the stack.
23160 XXX I know the prologue shouldn't be pushing registers, but it is faster
23164 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23165 enum machine_mode mode,
23168 int second_time ATTRIBUTE_UNUSED)
23170 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23173 cfun->machine->uses_anonymous_args = 1;
23174 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23176 nregs = pcum->aapcs_ncrn;
23177 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23181 nregs = pcum->nregs;
23183 if (nregs < NUM_ARG_REGS)
23184 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23187 /* Return nonzero if the CONSUMER instruction (a store) does not need
23188 PRODUCER's value to calculate the address. */
23191 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23193 rtx value = PATTERN (producer);
23194 rtx addr = PATTERN (consumer);
23196 if (GET_CODE (value) == COND_EXEC)
23197 value = COND_EXEC_CODE (value);
23198 if (GET_CODE (value) == PARALLEL)
23199 value = XVECEXP (value, 0, 0);
23200 value = XEXP (value, 0);
23201 if (GET_CODE (addr) == COND_EXEC)
23202 addr = COND_EXEC_CODE (addr);
23203 if (GET_CODE (addr) == PARALLEL)
23204 addr = XVECEXP (addr, 0, 0);
23205 addr = XEXP (addr, 0);
23207 return !reg_overlap_mentioned_p (value, addr);
23210 /* Return nonzero if the CONSUMER instruction (a store) does need
23211 PRODUCER's value to calculate the address. */
23214 arm_early_store_addr_dep (rtx producer, rtx consumer)
23216 return !arm_no_early_store_addr_dep (producer, consumer);
23219 /* Return nonzero if the CONSUMER instruction (a load) does need
23220 PRODUCER's value to calculate the address. */
23223 arm_early_load_addr_dep (rtx producer, rtx consumer)
23225 rtx value = PATTERN (producer);
23226 rtx addr = PATTERN (consumer);
23228 if (GET_CODE (value) == COND_EXEC)
23229 value = COND_EXEC_CODE (value);
23230 if (GET_CODE (value) == PARALLEL)
23231 value = XVECEXP (value, 0, 0);
23232 value = XEXP (value, 0);
23233 if (GET_CODE (addr) == COND_EXEC)
23234 addr = COND_EXEC_CODE (addr);
23235 if (GET_CODE (addr) == PARALLEL)
23236 addr = XVECEXP (addr, 0, 0);
23237 addr = XEXP (addr, 1);
23239 return reg_overlap_mentioned_p (value, addr);
23242 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23243 have an early register shift value or amount dependency on the
23244 result of PRODUCER. */
23247 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23249 rtx value = PATTERN (producer);
23250 rtx op = PATTERN (consumer);
23253 if (GET_CODE (value) == COND_EXEC)
23254 value = COND_EXEC_CODE (value);
23255 if (GET_CODE (value) == PARALLEL)
23256 value = XVECEXP (value, 0, 0);
23257 value = XEXP (value, 0);
23258 if (GET_CODE (op) == COND_EXEC)
23259 op = COND_EXEC_CODE (op);
23260 if (GET_CODE (op) == PARALLEL)
23261 op = XVECEXP (op, 0, 0);
23264 early_op = XEXP (op, 0);
23265 /* This is either an actual independent shift, or a shift applied to
23266 the first operand of another operation. We want the whole shift
23268 if (GET_CODE (early_op) == REG)
23271 return !reg_overlap_mentioned_p (value, early_op);
23274 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23275 have an early register shift value dependency on the result of
23279 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23281 rtx value = PATTERN (producer);
23282 rtx op = PATTERN (consumer);
23285 if (GET_CODE (value) == COND_EXEC)
23286 value = COND_EXEC_CODE (value);
23287 if (GET_CODE (value) == PARALLEL)
23288 value = XVECEXP (value, 0, 0);
23289 value = XEXP (value, 0);
23290 if (GET_CODE (op) == COND_EXEC)
23291 op = COND_EXEC_CODE (op);
23292 if (GET_CODE (op) == PARALLEL)
23293 op = XVECEXP (op, 0, 0);
23296 early_op = XEXP (op, 0);
23298 /* This is either an actual independent shift, or a shift applied to
23299 the first operand of another operation. We want the value being
23300 shifted, in either case. */
23301 if (GET_CODE (early_op) != REG)
23302 early_op = XEXP (early_op, 0);
23304 return !reg_overlap_mentioned_p (value, early_op);
23307 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23308 have an early register mult dependency on the result of
23312 arm_no_early_mul_dep (rtx producer, rtx consumer)
23314 rtx value = PATTERN (producer);
23315 rtx op = PATTERN (consumer);
23317 if (GET_CODE (value) == COND_EXEC)
23318 value = COND_EXEC_CODE (value);
23319 if (GET_CODE (value) == PARALLEL)
23320 value = XVECEXP (value, 0, 0);
23321 value = XEXP (value, 0);
23322 if (GET_CODE (op) == COND_EXEC)
23323 op = COND_EXEC_CODE (op);
23324 if (GET_CODE (op) == PARALLEL)
23325 op = XVECEXP (op, 0, 0);
23328 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23330 if (GET_CODE (XEXP (op, 0)) == MULT)
23331 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23333 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23339 /* We can't rely on the caller doing the proper promotion when
23340 using APCS or ATPCS. */
23343 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23345 return !TARGET_AAPCS_BASED;
23348 static enum machine_mode
23349 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23350 enum machine_mode mode,
23351 int *punsignedp ATTRIBUTE_UNUSED,
23352 const_tree fntype ATTRIBUTE_UNUSED,
23353 int for_return ATTRIBUTE_UNUSED)
23355 if (GET_MODE_CLASS (mode) == MODE_INT
23356 && GET_MODE_SIZE (mode) < 4)
23362 /* AAPCS based ABIs use short enums by default. */
23365 arm_default_short_enums (void)
23367 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23371 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23374 arm_align_anon_bitfield (void)
23376 return TARGET_AAPCS_BASED;
23380 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23383 arm_cxx_guard_type (void)
23385 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23388 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23389 has an accumulator dependency on the result of the producer (a
23390 multiplication instruction) and no other dependency on that result. */
23392 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23394 rtx mul = PATTERN (producer);
23395 rtx mac = PATTERN (consumer);
23397 rtx mac_op0, mac_op1, mac_acc;
23399 if (GET_CODE (mul) == COND_EXEC)
23400 mul = COND_EXEC_CODE (mul);
23401 if (GET_CODE (mac) == COND_EXEC)
23402 mac = COND_EXEC_CODE (mac);
23404 /* Check that mul is of the form (set (...) (mult ...))
23405 and mla is of the form (set (...) (plus (mult ...) (...))). */
23406 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23407 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23408 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23411 mul_result = XEXP (mul, 0);
23412 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23413 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23414 mac_acc = XEXP (XEXP (mac, 1), 1);
23416 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23417 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23418 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23422 /* The EABI says test the least significant bit of a guard variable. */
23425 arm_cxx_guard_mask_bit (void)
23427 return TARGET_AAPCS_BASED;
23431 /* The EABI specifies that all array cookies are 8 bytes long. */
23434 arm_get_cookie_size (tree type)
23438 if (!TARGET_AAPCS_BASED)
23439 return default_cxx_get_cookie_size (type);
23441 size = build_int_cst (sizetype, 8);
23446 /* The EABI says that array cookies should also contain the element size. */
23449 arm_cookie_has_size (void)
23451 return TARGET_AAPCS_BASED;
23455 /* The EABI says constructors and destructors should return a pointer to
23456 the object constructed/destroyed. */
23459 arm_cxx_cdtor_returns_this (void)
23461 return TARGET_AAPCS_BASED;
23464 /* The EABI says that an inline function may never be the key
23468 arm_cxx_key_method_may_be_inline (void)
23470 return !TARGET_AAPCS_BASED;
23474 arm_cxx_determine_class_data_visibility (tree decl)
23476 if (!TARGET_AAPCS_BASED
23477 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23480 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23481 is exported. However, on systems without dynamic vague linkage,
23482 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23483 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23484 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23486 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23487 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23491 arm_cxx_class_data_always_comdat (void)
23493 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23494 vague linkage if the class has no key function. */
23495 return !TARGET_AAPCS_BASED;
23499 /* The EABI says __aeabi_atexit should be used to register static
23503 arm_cxx_use_aeabi_atexit (void)
23505 return TARGET_AAPCS_BASED;
23510 arm_set_return_address (rtx source, rtx scratch)
23512 arm_stack_offsets *offsets;
23513 HOST_WIDE_INT delta;
23515 unsigned long saved_regs;
23517 offsets = arm_get_frame_offsets ();
23518 saved_regs = offsets->saved_regs_mask;
23520 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23521 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23524 if (frame_pointer_needed)
23525 addr = plus_constant(hard_frame_pointer_rtx, -4);
23528 /* LR will be the first saved register. */
23529 delta = offsets->outgoing_args - (offsets->frame + 4);
23534 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23535 GEN_INT (delta & ~4095)));
23540 addr = stack_pointer_rtx;
23542 addr = plus_constant (addr, delta);
23544 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23550 thumb_set_return_address (rtx source, rtx scratch)
23552 arm_stack_offsets *offsets;
23553 HOST_WIDE_INT delta;
23554 HOST_WIDE_INT limit;
23557 unsigned long mask;
23561 offsets = arm_get_frame_offsets ();
23562 mask = offsets->saved_regs_mask;
23563 if (mask & (1 << LR_REGNUM))
23566 /* Find the saved regs. */
23567 if (frame_pointer_needed)
23569 delta = offsets->soft_frame - offsets->saved_args;
23570 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23576 delta = offsets->outgoing_args - offsets->saved_args;
23579 /* Allow for the stack frame. */
23580 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23582 /* The link register is always the first saved register. */
23585 /* Construct the address. */
23586 addr = gen_rtx_REG (SImode, reg);
23589 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23590 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23594 addr = plus_constant (addr, delta);
23596 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23599 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23602 /* Implements target hook vector_mode_supported_p. */
23604 arm_vector_mode_supported_p (enum machine_mode mode)
23606 /* Neon also supports V2SImode, etc. listed in the clause below. */
23607 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23608 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23611 if ((TARGET_NEON || TARGET_IWMMXT)
23612 && ((mode == V2SImode)
23613 || (mode == V4HImode)
23614 || (mode == V8QImode)))
23617 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23618 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23619 || mode == V2HAmode))
23625 /* Implements target hook array_mode_supported_p. */
23628 arm_array_mode_supported_p (enum machine_mode mode,
23629 unsigned HOST_WIDE_INT nelems)
23632 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23633 && (nelems >= 2 && nelems <= 4))
23639 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23640 registers when autovectorizing for Neon, at least until multiple vector
23641 widths are supported properly by the middle-end. */
23643 static enum machine_mode
23644 arm_preferred_simd_mode (enum machine_mode mode)
23650 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23652 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23654 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23656 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23658 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23665 if (TARGET_REALLY_IWMMXT)
23681 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23683 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23684 using r0-r4 for function arguments, r7 for the stack frame and don't have
23685 enough left over to do doubleword arithmetic. For Thumb-2 all the
23686 potentially problematic instructions accept high registers so this is not
23687 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23688 that require many low registers. */
23690 arm_class_likely_spilled_p (reg_class_t rclass)
23692 if ((TARGET_THUMB1 && rclass == LO_REGS)
23693 || rclass == CC_REG)
23699 /* Implements target hook small_register_classes_for_mode_p. */
23701 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23703 return TARGET_THUMB1;
23706 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23707 ARM insns and therefore guarantee that the shift count is modulo 256.
23708 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23709 guarantee no particular behavior for out-of-range counts. */
23711 static unsigned HOST_WIDE_INT
23712 arm_shift_truncation_mask (enum machine_mode mode)
23714 return mode == SImode ? 255 : 0;
23718 /* Map internal gcc register numbers to DWARF2 register numbers. */
23721 arm_dbx_register_number (unsigned int regno)
23726 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23727 compatibility. The EABI defines them as registers 96-103. */
23728 if (IS_FPA_REGNUM (regno))
23729 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23731 if (IS_VFP_REGNUM (regno))
23733 /* See comment in arm_dwarf_register_span. */
23734 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23735 return 64 + regno - FIRST_VFP_REGNUM;
23737 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23740 if (IS_IWMMXT_GR_REGNUM (regno))
23741 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23743 if (IS_IWMMXT_REGNUM (regno))
23744 return 112 + regno - FIRST_IWMMXT_REGNUM;
23746 gcc_unreachable ();
23749 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23750 GCC models tham as 64 32-bit registers, so we need to describe this to
23751 the DWARF generation code. Other registers can use the default. */
23753 arm_dwarf_register_span (rtx rtl)
23760 regno = REGNO (rtl);
23761 if (!IS_VFP_REGNUM (regno))
23764 /* XXX FIXME: The EABI defines two VFP register ranges:
23765 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23767 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23768 corresponding D register. Until GDB supports this, we shall use the
23769 legacy encodings. We also use these encodings for D0-D15 for
23770 compatibility with older debuggers. */
23771 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23774 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23775 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23776 regno = (regno - FIRST_VFP_REGNUM) / 2;
23777 for (i = 0; i < nregs; i++)
23778 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23783 #if ARM_UNWIND_INFO
23784 /* Emit unwind directives for a store-multiple instruction or stack pointer
23785 push during alignment.
23786 These should only ever be generated by the function prologue code, so
23787 expect them to have a particular form. */
23790 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23793 HOST_WIDE_INT offset;
23794 HOST_WIDE_INT nregs;
23800 e = XVECEXP (p, 0, 0);
23801 if (GET_CODE (e) != SET)
23804 /* First insn will adjust the stack pointer. */
23805 if (GET_CODE (e) != SET
23806 || GET_CODE (XEXP (e, 0)) != REG
23807 || REGNO (XEXP (e, 0)) != SP_REGNUM
23808 || GET_CODE (XEXP (e, 1)) != PLUS)
23811 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23812 nregs = XVECLEN (p, 0) - 1;
23814 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23817 /* The function prologue may also push pc, but not annotate it as it is
23818 never restored. We turn this into a stack pointer adjustment. */
23819 if (nregs * 4 == offset - 4)
23821 fprintf (asm_out_file, "\t.pad #4\n");
23825 fprintf (asm_out_file, "\t.save {");
23827 else if (IS_VFP_REGNUM (reg))
23830 fprintf (asm_out_file, "\t.vsave {");
23832 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23834 /* FPA registers are done differently. */
23835 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23839 /* Unknown register type. */
23842 /* If the stack increment doesn't match the size of the saved registers,
23843 something has gone horribly wrong. */
23844 if (offset != nregs * reg_size)
23849 /* The remaining insns will describe the stores. */
23850 for (i = 1; i <= nregs; i++)
23852 /* Expect (set (mem <addr>) (reg)).
23853 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23854 e = XVECEXP (p, 0, i);
23855 if (GET_CODE (e) != SET
23856 || GET_CODE (XEXP (e, 0)) != MEM
23857 || GET_CODE (XEXP (e, 1)) != REG)
23860 reg = REGNO (XEXP (e, 1));
23865 fprintf (asm_out_file, ", ");
23866 /* We can't use %r for vfp because we need to use the
23867 double precision register names. */
23868 if (IS_VFP_REGNUM (reg))
23869 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23871 asm_fprintf (asm_out_file, "%r", reg);
23873 #ifdef ENABLE_CHECKING
23874 /* Check that the addresses are consecutive. */
23875 e = XEXP (XEXP (e, 0), 0);
23876 if (GET_CODE (e) == PLUS)
23878 offset += reg_size;
23879 if (GET_CODE (XEXP (e, 0)) != REG
23880 || REGNO (XEXP (e, 0)) != SP_REGNUM
23881 || GET_CODE (XEXP (e, 1)) != CONST_INT
23882 || offset != INTVAL (XEXP (e, 1)))
23886 || GET_CODE (e) != REG
23887 || REGNO (e) != SP_REGNUM)
23891 fprintf (asm_out_file, "}\n");
23894 /* Emit unwind directives for a SET. */
23897 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23905 switch (GET_CODE (e0))
23908 /* Pushing a single register. */
23909 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23910 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23911 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23914 asm_fprintf (asm_out_file, "\t.save ");
23915 if (IS_VFP_REGNUM (REGNO (e1)))
23916 asm_fprintf(asm_out_file, "{d%d}\n",
23917 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23919 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23923 if (REGNO (e0) == SP_REGNUM)
23925 /* A stack increment. */
23926 if (GET_CODE (e1) != PLUS
23927 || GET_CODE (XEXP (e1, 0)) != REG
23928 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23929 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23932 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23933 -INTVAL (XEXP (e1, 1)));
23935 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23937 HOST_WIDE_INT offset;
23939 if (GET_CODE (e1) == PLUS)
23941 if (GET_CODE (XEXP (e1, 0)) != REG
23942 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23944 reg = REGNO (XEXP (e1, 0));
23945 offset = INTVAL (XEXP (e1, 1));
23946 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23947 HARD_FRAME_POINTER_REGNUM, reg,
23950 else if (GET_CODE (e1) == REG)
23953 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23954 HARD_FRAME_POINTER_REGNUM, reg);
23959 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23961 /* Move from sp to reg. */
23962 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23964 else if (GET_CODE (e1) == PLUS
23965 && GET_CODE (XEXP (e1, 0)) == REG
23966 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23967 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23969 /* Set reg to offset from sp. */
23970 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23971 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23983 /* Emit unwind directives for the given insn. */
23986 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23989 bool handled_one = false;
23991 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23994 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23995 && (TREE_NOTHROW (current_function_decl)
23996 || crtl->all_throwers_are_sibcalls))
23999 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24002 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24004 pat = XEXP (note, 0);
24005 switch (REG_NOTE_KIND (note))
24007 case REG_FRAME_RELATED_EXPR:
24010 case REG_CFA_REGISTER:
24013 pat = PATTERN (insn);
24014 if (GET_CODE (pat) == PARALLEL)
24015 pat = XVECEXP (pat, 0, 0);
24018 /* Only emitted for IS_STACKALIGN re-alignment. */
24023 src = SET_SRC (pat);
24024 dest = SET_DEST (pat);
24026 gcc_assert (src == stack_pointer_rtx);
24027 reg = REGNO (dest);
24028 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24031 handled_one = true;
24034 case REG_CFA_DEF_CFA:
24035 case REG_CFA_EXPRESSION:
24036 case REG_CFA_ADJUST_CFA:
24037 case REG_CFA_OFFSET:
24038 /* ??? Only handling here what we actually emit. */
24039 gcc_unreachable ();
24047 pat = PATTERN (insn);
24050 switch (GET_CODE (pat))
24053 arm_unwind_emit_set (asm_out_file, pat);
24057 /* Store multiple. */
24058 arm_unwind_emit_sequence (asm_out_file, pat);
24067 /* Output a reference from a function exception table to the type_info
24068 object X. The EABI specifies that the symbol should be relocated by
24069 an R_ARM_TARGET2 relocation. */
24072 arm_output_ttype (rtx x)
24074 fputs ("\t.word\t", asm_out_file);
24075 output_addr_const (asm_out_file, x);
24076 /* Use special relocations for symbol references. */
24077 if (GET_CODE (x) != CONST_INT)
24078 fputs ("(TARGET2)", asm_out_file);
24079 fputc ('\n', asm_out_file);
24084 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24087 arm_asm_emit_except_personality (rtx personality)
24089 fputs ("\t.personality\t", asm_out_file);
24090 output_addr_const (asm_out_file, personality);
24091 fputc ('\n', asm_out_file);
24094 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24097 arm_asm_init_sections (void)
24099 exception_section = get_unnamed_section (0, output_section_asm_op,
24102 #endif /* ARM_UNWIND_INFO */
24104 /* Output unwind directives for the start/end of a function. */
24107 arm_output_fn_unwind (FILE * f, bool prologue)
24109 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24113 fputs ("\t.fnstart\n", f);
24116 /* If this function will never be unwound, then mark it as such.
24117 The came condition is used in arm_unwind_emit to suppress
24118 the frame annotations. */
24119 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24120 && (TREE_NOTHROW (current_function_decl)
24121 || crtl->all_throwers_are_sibcalls))
24122 fputs("\t.cantunwind\n", f);
24124 fputs ("\t.fnend\n", f);
24129 arm_emit_tls_decoration (FILE *fp, rtx x)
24131 enum tls_reloc reloc;
24134 val = XVECEXP (x, 0, 0);
24135 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24137 output_addr_const (fp, val);
24142 fputs ("(tlsgd)", fp);
24145 fputs ("(tlsldm)", fp);
24148 fputs ("(tlsldo)", fp);
24151 fputs ("(gottpoff)", fp);
24154 fputs ("(tpoff)", fp);
24157 fputs ("(tlsdesc)", fp);
24160 gcc_unreachable ();
24169 fputs (" + (. - ", fp);
24170 output_addr_const (fp, XVECEXP (x, 0, 2));
24171 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24172 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24173 output_addr_const (fp, XVECEXP (x, 0, 3));
24183 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24186 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24188 gcc_assert (size == 4);
24189 fputs ("\t.word\t", file);
24190 output_addr_const (file, x);
24191 fputs ("(tlsldo)", file);
24194 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24197 arm_output_addr_const_extra (FILE *fp, rtx x)
24199 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24200 return arm_emit_tls_decoration (fp, x);
24201 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24204 int labelno = INTVAL (XVECEXP (x, 0, 0));
24206 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24207 assemble_name_raw (fp, label);
24211 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24213 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24217 output_addr_const (fp, XVECEXP (x, 0, 0));
24221 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24223 output_addr_const (fp, XVECEXP (x, 0, 0));
24227 output_addr_const (fp, XVECEXP (x, 0, 1));
24231 else if (GET_CODE (x) == CONST_VECTOR)
24232 return arm_emit_vector_const (fp, x);
24237 /* Output assembly for a shift instruction.
24238 SET_FLAGS determines how the instruction modifies the condition codes.
24239 0 - Do not set condition codes.
24240 1 - Set condition codes.
24241 2 - Use smallest instruction. */
24243 arm_output_shift(rtx * operands, int set_flags)
24246 static const char flag_chars[3] = {'?', '.', '!'};
24251 c = flag_chars[set_flags];
24252 if (TARGET_UNIFIED_ASM)
24254 shift = shift_op(operands[3], &val);
24258 operands[2] = GEN_INT(val);
24259 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24262 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24265 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24266 output_asm_insn (pattern, operands);
24270 /* Output a Thumb-1 casesi dispatch sequence. */
24272 thumb1_output_casesi (rtx *operands)
24274 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24276 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24278 switch (GET_MODE(diff_vec))
24281 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24282 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24284 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24285 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24287 return "bl\t%___gnu_thumb1_case_si";
24289 gcc_unreachable ();
24293 /* Output a Thumb-2 casesi instruction. */
24295 thumb2_output_casesi (rtx *operands)
24297 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24299 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24301 output_asm_insn ("cmp\t%0, %1", operands);
24302 output_asm_insn ("bhi\t%l3", operands);
24303 switch (GET_MODE(diff_vec))
24306 return "tbb\t[%|pc, %0]";
24308 return "tbh\t[%|pc, %0, lsl #1]";
24312 output_asm_insn ("adr\t%4, %l2", operands);
24313 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24314 output_asm_insn ("add\t%4, %4, %5", operands);
24319 output_asm_insn ("adr\t%4, %l2", operands);
24320 return "ldr\t%|pc, [%4, %0, lsl #2]";
24323 gcc_unreachable ();
24327 /* Most ARM cores are single issue, but some newer ones can dual issue.
24328 The scheduler descriptions rely on this being correct. */
24330 arm_issue_rate (void)
24352 /* A table and a function to perform ARM-specific name mangling for
24353 NEON vector types in order to conform to the AAPCS (see "Procedure
24354 Call Standard for the ARM Architecture", Appendix A). To qualify
24355 for emission with the mangled names defined in that document, a
24356 vector type must not only be of the correct mode but also be
24357 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24360 enum machine_mode mode;
24361 const char *element_type_name;
24362 const char *aapcs_name;
24363 } arm_mangle_map_entry;
24365 static arm_mangle_map_entry arm_mangle_map[] = {
24366 /* 64-bit containerized types. */
24367 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24368 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24369 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24370 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24371 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24372 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24373 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24374 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24375 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24376 /* 128-bit containerized types. */
24377 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24378 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24379 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24380 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24381 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24382 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24383 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24384 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24385 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24386 { VOIDmode, NULL, NULL }
24390 arm_mangle_type (const_tree type)
24392 arm_mangle_map_entry *pos = arm_mangle_map;
24394 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24395 has to be managled as if it is in the "std" namespace. */
24396 if (TARGET_AAPCS_BASED
24397 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24399 static bool warned;
24400 if (!warned && warn_psabi && !in_system_header)
24403 inform (input_location,
24404 "the mangling of %<va_list%> has changed in GCC 4.4");
24406 return "St9__va_list";
24409 /* Half-precision float. */
24410 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24413 if (TREE_CODE (type) != VECTOR_TYPE)
24416 /* Check the mode of the vector type, and the name of the vector
24417 element type, against the table. */
24418 while (pos->mode != VOIDmode)
24420 tree elt_type = TREE_TYPE (type);
24422 if (pos->mode == TYPE_MODE (type)
24423 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24424 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24425 pos->element_type_name))
24426 return pos->aapcs_name;
24431 /* Use the default mangling for unrecognized (possibly user-defined)
24436 /* Order of allocation of core registers for Thumb: this allocation is
24437 written over the corresponding initial entries of the array
24438 initialized with REG_ALLOC_ORDER. We allocate all low registers
24439 first. Saving and restoring a low register is usually cheaper than
24440 using a call-clobbered high register. */
24442 static const int thumb_core_reg_alloc_order[] =
24444 3, 2, 1, 0, 4, 5, 6, 7,
24445 14, 12, 8, 9, 10, 11, 13, 15
24448 /* Adjust register allocation order when compiling for Thumb. */
24451 arm_order_regs_for_local_alloc (void)
24453 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24454 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24456 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24457 sizeof (thumb_core_reg_alloc_order));
24460 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24463 arm_frame_pointer_required (void)
24465 return (cfun->has_nonlocal_label
24466 || SUBTARGET_FRAME_POINTER_REQUIRED
24467 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24470 /* Only thumb1 can't support conditional execution, so return true if
24471 the target is not thumb1. */
24473 arm_have_conditional_execution (void)
24475 return !TARGET_THUMB1;
24478 static unsigned int
24479 arm_autovectorize_vector_sizes (void)
24481 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24485 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24487 /* Vectors which aren't in packed structures will not be less aligned than
24488 the natural alignment of their element type, so this is safe. */
24489 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24492 return default_builtin_vector_alignment_reachable (type, is_packed);
24496 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24497 const_tree type, int misalignment,
24500 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24502 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24507 /* If the misalignment is unknown, we should be able to handle the access
24508 so long as it is not to a member of a packed data structure. */
24509 if (misalignment == -1)
24512 /* Return true if the misalignment is a multiple of the natural alignment
24513 of the vector's element type. This is probably always going to be
24514 true in practice, since we've already established that this isn't a
24516 return ((misalignment % align) == 0);
24519 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24524 arm_conditional_register_usage (void)
24528 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24530 for (regno = FIRST_FPA_REGNUM;
24531 regno <= LAST_FPA_REGNUM; ++regno)
24532 fixed_regs[regno] = call_used_regs[regno] = 1;
24535 if (TARGET_THUMB1 && optimize_size)
24537 /* When optimizing for size on Thumb-1, it's better not
24538 to use the HI regs, because of the overhead of
24540 for (regno = FIRST_HI_REGNUM;
24541 regno <= LAST_HI_REGNUM; ++regno)
24542 fixed_regs[regno] = call_used_regs[regno] = 1;
24545 /* The link register can be clobbered by any branch insn,
24546 but we have no way to track that at present, so mark
24547 it as unavailable. */
24549 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24551 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24553 if (TARGET_MAVERICK)
24555 for (regno = FIRST_FPA_REGNUM;
24556 regno <= LAST_FPA_REGNUM; ++ regno)
24557 fixed_regs[regno] = call_used_regs[regno] = 1;
24558 for (regno = FIRST_CIRRUS_FP_REGNUM;
24559 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24561 fixed_regs[regno] = 0;
24562 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24567 /* VFPv3 registers are disabled when earlier VFP
24568 versions are selected due to the definition of
24569 LAST_VFP_REGNUM. */
24570 for (regno = FIRST_VFP_REGNUM;
24571 regno <= LAST_VFP_REGNUM; ++ regno)
24573 fixed_regs[regno] = 0;
24574 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24575 || regno >= FIRST_VFP_REGNUM + 32;
24580 if (TARGET_REALLY_IWMMXT)
24582 regno = FIRST_IWMMXT_GR_REGNUM;
24583 /* The 2002/10/09 revision of the XScale ABI has wCG0
24584 and wCG1 as call-preserved registers. The 2002/11/21
24585 revision changed this so that all wCG registers are
24586 scratch registers. */
24587 for (regno = FIRST_IWMMXT_GR_REGNUM;
24588 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24589 fixed_regs[regno] = 0;
24590 /* The XScale ABI has wR0 - wR9 as scratch registers,
24591 the rest as call-preserved registers. */
24592 for (regno = FIRST_IWMMXT_REGNUM;
24593 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24595 fixed_regs[regno] = 0;
24596 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24600 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24602 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24603 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24605 else if (TARGET_APCS_STACK)
24607 fixed_regs[10] = 1;
24608 call_used_regs[10] = 1;
24610 /* -mcaller-super-interworking reserves r11 for calls to
24611 _interwork_r11_call_via_rN(). Making the register global
24612 is an easy way of ensuring that it remains valid for all
24614 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24615 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24617 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24618 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24619 if (TARGET_CALLER_INTERWORKING)
24620 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24622 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24626 arm_preferred_rename_class (reg_class_t rclass)
24628 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24629 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24630 and code size can be reduced. */
24631 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24637 /* Compute the atrribute "length" of insn "*push_multi".
24638 So this function MUST be kept in sync with that insn pattern. */
24640 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24642 int i, regno, hi_reg;
24643 int num_saves = XVECLEN (parallel_op, 0);
24653 regno = REGNO (first_op);
24654 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24655 for (i = 1; i < num_saves && !hi_reg; i++)
24657 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24658 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24666 /* Compute the number of instructions emitted by output_move_double. */
24668 arm_count_output_move_double_insns (rtx *operands)
24672 /* output_move_double may modify the operands array, so call it
24673 here on a copy of the array. */
24674 ops[0] = operands[0];
24675 ops[1] = operands[1];
24676 output_move_double (ops, false, &count);
24681 vfp3_const_double_for_fract_bits (rtx operand)
24683 REAL_VALUE_TYPE r0;
24685 if (GET_CODE (operand) != CONST_DOUBLE)
24688 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
24689 if (exact_real_inverse (DFmode, &r0))
24691 if (exact_real_truncate (DFmode, &r0))
24693 HOST_WIDE_INT value = real_to_integer (&r0);
24694 value = value & 0xffffffff;
24695 if ((value != 0) && ( (value & (value - 1)) == 0))
24696 return int_log2 (value);
24702 /* Emit a memory barrier around an atomic sequence according to MODEL. */
24705 arm_pre_atomic_barrier (enum memmodel model)
24709 case MEMMODEL_RELAXED:
24710 case MEMMODEL_CONSUME:
24711 case MEMMODEL_ACQUIRE:
24713 case MEMMODEL_RELEASE:
24714 case MEMMODEL_ACQ_REL:
24715 case MEMMODEL_SEQ_CST:
24716 emit_insn (gen_memory_barrier ());
24719 gcc_unreachable ();
24724 arm_post_atomic_barrier (enum memmodel model)
24728 case MEMMODEL_RELAXED:
24729 case MEMMODEL_CONSUME:
24730 case MEMMODEL_RELEASE:
24732 case MEMMODEL_ACQUIRE:
24733 case MEMMODEL_ACQ_REL:
24734 case MEMMODEL_SEQ_CST:
24735 emit_insn (gen_memory_barrier ());
24738 gcc_unreachable ();
24742 /* Emit the load-exclusive and store-exclusive instructions. */
24745 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
24747 rtx (*gen) (rtx, rtx);
24751 case QImode: gen = gen_arm_load_exclusiveqi; break;
24752 case HImode: gen = gen_arm_load_exclusivehi; break;
24753 case SImode: gen = gen_arm_load_exclusivesi; break;
24754 case DImode: gen = gen_arm_load_exclusivedi; break;
24756 gcc_unreachable ();
24759 emit_insn (gen (rval, mem));
24763 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
24765 rtx (*gen) (rtx, rtx, rtx);
24769 case QImode: gen = gen_arm_store_exclusiveqi; break;
24770 case HImode: gen = gen_arm_store_exclusivehi; break;
24771 case SImode: gen = gen_arm_store_exclusivesi; break;
24772 case DImode: gen = gen_arm_store_exclusivedi; break;
24774 gcc_unreachable ();
24777 emit_insn (gen (bval, rval, mem));
24780 /* Mark the previous jump instruction as unlikely. */
24783 emit_unlikely_jump (rtx insn)
24785 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
24787 insn = emit_jump_insn (insn);
24788 add_reg_note (insn, REG_BR_PROB, very_unlikely);
24791 /* Expand a compare and swap pattern. */
24794 arm_expand_compare_and_swap (rtx operands[])
24796 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
24797 enum machine_mode mode;
24798 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
24800 bval = operands[0];
24801 rval = operands[1];
24803 oldval = operands[3];
24804 newval = operands[4];
24805 is_weak = operands[5];
24806 mod_s = operands[6];
24807 mod_f = operands[7];
24808 mode = GET_MODE (mem);
24814 /* For narrow modes, we're going to perform the comparison in SImode,
24815 so do the zero-extension now. */
24816 rval = gen_reg_rtx (SImode);
24817 oldval = convert_modes (SImode, mode, oldval, true);
24821 /* Force the value into a register if needed. We waited until after
24822 the zero-extension above to do this properly. */
24823 if (!arm_add_operand (oldval, mode))
24824 oldval = force_reg (mode, oldval);
24828 if (!cmpdi_operand (oldval, mode))
24829 oldval = force_reg (mode, oldval);
24833 gcc_unreachable ();
24838 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
24839 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
24840 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
24841 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
24843 gcc_unreachable ();
24846 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
24848 if (mode == QImode || mode == HImode)
24849 emit_move_insn (operands[1], gen_lowpart (mode, rval));
24851 /* In all cases, we arrange for success to be signaled by Z set.
24852 This arrangement allows for the boolean result to be used directly
24853 in a subsequent branch, post optimization. */
24854 x = gen_rtx_REG (CCmode, CC_REGNUM);
24855 x = gen_rtx_EQ (SImode, x, const0_rtx);
24856 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
24859 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
24860 another memory store between the load-exclusive and store-exclusive can
24861 reset the monitor from Exclusive to Open state. This means we must wait
24862 until after reload to split the pattern, lest we get a register spill in
24863 the middle of the atomic sequence. */
24866 arm_split_compare_and_swap (rtx operands[])
24868 rtx rval, mem, oldval, newval, scratch;
24869 enum machine_mode mode;
24870 enum memmodel mod_s, mod_f;
24872 rtx label1, label2, x, cond;
24874 rval = operands[0];
24876 oldval = operands[2];
24877 newval = operands[3];
24878 is_weak = (operands[4] != const0_rtx);
24879 mod_s = (enum memmodel) INTVAL (operands[5]);
24880 mod_f = (enum memmodel) INTVAL (operands[6]);
24881 scratch = operands[7];
24882 mode = GET_MODE (mem);
24884 arm_pre_atomic_barrier (mod_s);
24889 label1 = gen_label_rtx ();
24890 emit_label (label1);
24892 label2 = gen_label_rtx ();
24894 arm_emit_load_exclusive (mode, rval, mem);
24896 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
24897 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24898 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24899 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
24900 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24902 arm_emit_store_exclusive (mode, scratch, mem, newval);
24904 /* Weak or strong, we want EQ to be true for success, so that we
24905 match the flags that we got from the compare above. */
24906 cond = gen_rtx_REG (CCmode, CC_REGNUM);
24907 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
24908 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
24912 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24913 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24914 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
24915 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24918 if (mod_f != MEMMODEL_RELAXED)
24919 emit_label (label2);
24921 arm_post_atomic_barrier (mod_s);
24923 if (mod_f == MEMMODEL_RELAXED)
24924 emit_label (label2);
24928 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
24929 rtx value, rtx model_rtx, rtx cond)
24931 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
24932 enum machine_mode mode = GET_MODE (mem);
24933 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
24936 arm_pre_atomic_barrier (model);
24938 label = gen_label_rtx ();
24939 emit_label (label);
24942 new_out = gen_lowpart (wmode, new_out);
24944 old_out = gen_lowpart (wmode, old_out);
24947 value = simplify_gen_subreg (wmode, value, mode, 0);
24949 arm_emit_load_exclusive (mode, old_out, mem);
24958 x = gen_rtx_AND (wmode, old_out, value);
24959 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24960 x = gen_rtx_NOT (wmode, new_out);
24961 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24965 if (CONST_INT_P (value))
24967 value = GEN_INT (-INTVAL (value));
24973 if (mode == DImode)
24975 /* DImode plus/minus need to clobber flags. */
24976 /* The adddi3 and subdi3 patterns are incorrectly written so that
24977 they require matching operands, even when we could easily support
24978 three operands. Thankfully, this can be fixed up post-splitting,
24979 as the individual add+adc patterns do accept three operands and
24980 post-reload cprop can make these moves go away. */
24981 emit_move_insn (new_out, old_out);
24983 x = gen_adddi3 (new_out, new_out, value);
24985 x = gen_subdi3 (new_out, new_out, value);
24992 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
24993 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24997 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
24999 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25000 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25002 arm_post_atomic_barrier (model);
25005 #define MAX_VECT_LEN 16
25007 struct expand_vec_perm_d
25009 rtx target, op0, op1;
25010 unsigned char perm[MAX_VECT_LEN];
25011 enum machine_mode vmode;
25012 unsigned char nelt;
25017 /* Generate a variable permutation. */
25020 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25022 enum machine_mode vmode = GET_MODE (target);
25023 bool one_vector_p = rtx_equal_p (op0, op1);
25025 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25026 gcc_checking_assert (GET_MODE (op0) == vmode);
25027 gcc_checking_assert (GET_MODE (op1) == vmode);
25028 gcc_checking_assert (GET_MODE (sel) == vmode);
25029 gcc_checking_assert (TARGET_NEON);
25033 if (vmode == V8QImode)
25034 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25036 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25042 if (vmode == V8QImode)
25044 pair = gen_reg_rtx (V16QImode);
25045 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25046 pair = gen_lowpart (TImode, pair);
25047 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25051 pair = gen_reg_rtx (OImode);
25052 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25053 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25059 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25061 enum machine_mode vmode = GET_MODE (target);
25062 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25063 bool one_vector_p = rtx_equal_p (op0, op1);
25064 rtx rmask[MAX_VECT_LEN], mask;
25066 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25067 numbering of elements for big-endian, we must reverse the order. */
25068 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25070 /* The VTBL instruction does not use a modulo index, so we must take care
25071 of that ourselves. */
25072 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25073 for (i = 0; i < nelt; ++i)
25075 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25076 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25078 arm_expand_vec_perm_1 (target, op0, op1, sel);
25081 /* Generate or test for an insn that supports a constant permutation. */
25083 /* Recognize patterns for the VUZP insns. */
25086 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25088 unsigned int i, odd, mask, nelt = d->nelt;
25089 rtx out0, out1, in0, in1, x;
25090 rtx (*gen)(rtx, rtx, rtx, rtx);
25092 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25095 /* Note that these are little-endian tests. Adjust for big-endian later. */
25096 if (d->perm[0] == 0)
25098 else if (d->perm[0] == 1)
25102 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25104 for (i = 0; i < nelt; i++)
25106 unsigned elt = (i * 2 + odd) & mask;
25107 if (d->perm[i] != elt)
25117 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25118 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25119 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25120 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25121 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25122 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25123 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25124 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25126 gcc_unreachable ();
25131 if (BYTES_BIG_ENDIAN)
25133 x = in0, in0 = in1, in1 = x;
25138 out1 = gen_reg_rtx (d->vmode);
25140 x = out0, out0 = out1, out1 = x;
25142 emit_insn (gen (out0, in0, in1, out1));
25146 /* Recognize patterns for the VZIP insns. */
25149 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25151 unsigned int i, high, mask, nelt = d->nelt;
25152 rtx out0, out1, in0, in1, x;
25153 rtx (*gen)(rtx, rtx, rtx, rtx);
25155 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25158 /* Note that these are little-endian tests. Adjust for big-endian later. */
25160 if (d->perm[0] == high)
25162 else if (d->perm[0] == 0)
25166 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25168 for (i = 0; i < nelt / 2; i++)
25170 unsigned elt = (i + high) & mask;
25171 if (d->perm[i * 2] != elt)
25173 elt = (elt + nelt) & mask;
25174 if (d->perm[i * 2 + 1] != elt)
25184 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25185 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25186 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25187 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25188 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25189 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25190 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25191 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25193 gcc_unreachable ();
25198 if (BYTES_BIG_ENDIAN)
25200 x = in0, in0 = in1, in1 = x;
25205 out1 = gen_reg_rtx (d->vmode);
25207 x = out0, out0 = out1, out1 = x;
25209 emit_insn (gen (out0, in0, in1, out1));
25213 /* Recognize patterns for the VREV insns. */
25216 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25218 unsigned int i, j, diff, nelt = d->nelt;
25219 rtx (*gen)(rtx, rtx, rtx);
25221 if (!d->one_vector_p)
25230 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25231 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25239 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25240 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25241 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25242 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25250 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25251 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25252 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25253 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25254 case V4SImode: gen = gen_neon_vrev64v4si; break;
25255 case V2SImode: gen = gen_neon_vrev64v2si; break;
25256 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25257 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25266 for (i = 0; i < nelt; i += diff)
25267 for (j = 0; j <= diff; j += 1)
25268 if (d->perm[i + j] != i + diff - j)
25275 /* ??? The third operand is an artifact of the builtin infrastructure
25276 and is ignored by the actual instruction. */
25277 emit_insn (gen (d->target, d->op0, const0_rtx));
25281 /* Recognize patterns for the VTRN insns. */
25284 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25286 unsigned int i, odd, mask, nelt = d->nelt;
25287 rtx out0, out1, in0, in1, x;
25288 rtx (*gen)(rtx, rtx, rtx, rtx);
25290 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25293 /* Note that these are little-endian tests. Adjust for big-endian later. */
25294 if (d->perm[0] == 0)
25296 else if (d->perm[0] == 1)
25300 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25302 for (i = 0; i < nelt; i += 2)
25304 if (d->perm[i] != i + odd)
25306 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25316 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25317 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25318 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25319 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25320 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25321 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25322 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25323 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25325 gcc_unreachable ();
25330 if (BYTES_BIG_ENDIAN)
25332 x = in0, in0 = in1, in1 = x;
25337 out1 = gen_reg_rtx (d->vmode);
25339 x = out0, out0 = out1, out1 = x;
25341 emit_insn (gen (out0, in0, in1, out1));
25345 /* The NEON VTBL instruction is a fully variable permuation that's even
25346 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25347 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25348 can do slightly better by expanding this as a constant where we don't
25349 have to apply a mask. */
25352 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25354 rtx rperm[MAX_VECT_LEN], sel;
25355 enum machine_mode vmode = d->vmode;
25356 unsigned int i, nelt = d->nelt;
25358 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25359 numbering of elements for big-endian, we must reverse the order. */
25360 if (BYTES_BIG_ENDIAN)
25366 /* Generic code will try constant permutation twice. Once with the
25367 original mode and again with the elements lowered to QImode.
25368 So wait and don't do the selector expansion ourselves. */
25369 if (vmode != V8QImode && vmode != V16QImode)
25372 for (i = 0; i < nelt; ++i)
25373 rperm[i] = GEN_INT (d->perm[i]);
25374 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25375 sel = force_reg (vmode, sel);
25377 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25382 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25384 /* The pattern matching functions above are written to look for a small
25385 number to begin the sequence (0, 1, N/2). If we begin with an index
25386 from the second operand, we can swap the operands. */
25387 if (d->perm[0] >= d->nelt)
25389 unsigned i, nelt = d->nelt;
25392 for (i = 0; i < nelt; ++i)
25393 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25402 if (arm_evpc_neon_vuzp (d))
25404 if (arm_evpc_neon_vzip (d))
25406 if (arm_evpc_neon_vrev (d))
25408 if (arm_evpc_neon_vtrn (d))
25410 return arm_evpc_neon_vtbl (d);
25415 /* Expand a vec_perm_const pattern. */
25418 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25420 struct expand_vec_perm_d d;
25421 int i, nelt, which;
25427 d.vmode = GET_MODE (target);
25428 gcc_assert (VECTOR_MODE_P (d.vmode));
25429 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25430 d.testing_p = false;
25432 for (i = which = 0; i < nelt; ++i)
25434 rtx e = XVECEXP (sel, 0, i);
25435 int ei = INTVAL (e) & (2 * nelt - 1);
25436 which |= (ei < nelt ? 1 : 2);
25446 d.one_vector_p = false;
25447 if (!rtx_equal_p (op0, op1))
25450 /* The elements of PERM do not suggest that only the first operand
25451 is used, but both operands are identical. Allow easier matching
25452 of the permutation by folding the permutation into the single
25456 for (i = 0; i < nelt; ++i)
25457 d.perm[i] &= nelt - 1;
25459 d.one_vector_p = true;
25464 d.one_vector_p = true;
25468 return arm_expand_vec_perm_const_1 (&d);
25471 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25474 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25475 const unsigned char *sel)
25477 struct expand_vec_perm_d d;
25478 unsigned int i, nelt, which;
25482 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25483 d.testing_p = true;
25484 memcpy (d.perm, sel, nelt);
25486 /* Categorize the set of elements in the selector. */
25487 for (i = which = 0; i < nelt; ++i)
25489 unsigned char e = d.perm[i];
25490 gcc_assert (e < 2 * nelt);
25491 which |= (e < nelt ? 1 : 2);
25494 /* For all elements from second vector, fold the elements to first. */
25496 for (i = 0; i < nelt; ++i)
25499 /* Check whether the mask can be applied to the vector type. */
25500 d.one_vector_p = (which != 3);
25502 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25503 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25504 if (!d.one_vector_p)
25505 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25508 ret = arm_expand_vec_perm_const_1 (&d);
25515 #include "gt-arm.h"