1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static bool arm_memory_load_p (rtx);
170 static bool arm_cirrus_insn_p (rtx);
171 static void cirrus_reorg (rtx);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_encode_section_info (tree, rtx, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
266 static void arm_conditional_register_usage (void);
267 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
272 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
273 const unsigned char *sel);
276 /* Table of machine attributes. */
277 static const struct attribute_spec arm_attribute_table[] =
279 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
280 affects_type_identity } */
281 /* Function calls made to this symbol must be done indirectly, because
282 it may lie outside of the 26 bit addressing range of a normal function
284 { "long_call", 0, 0, false, true, true, NULL, false },
285 /* Whereas these functions are always known to reside within the 26 bit
287 { "short_call", 0, 0, false, true, true, NULL, false },
288 /* Specify the procedure call conventions for a function. */
289 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
291 /* Interrupt Service Routines have special prologue and epilogue requirements. */
292 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
294 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
296 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
299 /* ARM/PE has three new attributes:
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
308 { "dllimport", 0, 0, true, false, false, NULL, false },
309 { "dllexport", 0, 0, true, false, false, NULL, false },
310 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
313 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
314 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
315 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
318 { NULL, 0, 0, false, false, false, NULL, false }
321 /* Initialize the GCC target structure. */
322 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
323 #undef TARGET_MERGE_DECL_ATTRIBUTES
324 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
327 #undef TARGET_LEGITIMIZE_ADDRESS
328 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
330 #undef TARGET_ATTRIBUTE_TABLE
331 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
333 #undef TARGET_ASM_FILE_START
334 #define TARGET_ASM_FILE_START arm_file_start
335 #undef TARGET_ASM_FILE_END
336 #define TARGET_ASM_FILE_END arm_file_end
338 #undef TARGET_ASM_ALIGNED_SI_OP
339 #define TARGET_ASM_ALIGNED_SI_OP NULL
340 #undef TARGET_ASM_INTEGER
341 #define TARGET_ASM_INTEGER arm_assemble_integer
343 #undef TARGET_PRINT_OPERAND
344 #define TARGET_PRINT_OPERAND arm_print_operand
345 #undef TARGET_PRINT_OPERAND_ADDRESS
346 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
347 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
348 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
350 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
351 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
353 #undef TARGET_ASM_FUNCTION_PROLOGUE
354 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
356 #undef TARGET_ASM_FUNCTION_EPILOGUE
357 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE arm_option_override
362 #undef TARGET_COMP_TYPE_ATTRIBUTES
363 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
365 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
366 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
368 #undef TARGET_SCHED_ADJUST_COST
369 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
371 #undef TARGET_REGISTER_MOVE_COST
372 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
374 #undef TARGET_MEMORY_MOVE_COST
375 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
377 #undef TARGET_ENCODE_SECTION_INFO
379 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
381 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
384 #undef TARGET_STRIP_NAME_ENCODING
385 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
387 #undef TARGET_ASM_INTERNAL_LABEL
388 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
393 #undef TARGET_FUNCTION_VALUE
394 #define TARGET_FUNCTION_VALUE arm_function_value
396 #undef TARGET_LIBCALL_VALUE
397 #define TARGET_LIBCALL_VALUE arm_libcall_value
399 #undef TARGET_FUNCTION_VALUE_REGNO_P
400 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
402 #undef TARGET_ASM_OUTPUT_MI_THUNK
403 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
404 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
405 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
407 #undef TARGET_RTX_COSTS
408 #define TARGET_RTX_COSTS arm_rtx_costs
409 #undef TARGET_ADDRESS_COST
410 #define TARGET_ADDRESS_COST arm_address_cost
412 #undef TARGET_SHIFT_TRUNCATION_MASK
413 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
414 #undef TARGET_VECTOR_MODE_SUPPORTED_P
415 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
416 #undef TARGET_ARRAY_MODE_SUPPORTED_P
417 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
418 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
419 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
420 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
421 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
422 arm_autovectorize_vector_sizes
424 #undef TARGET_MACHINE_DEPENDENT_REORG
425 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS arm_init_builtins
429 #undef TARGET_EXPAND_BUILTIN
430 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
431 #undef TARGET_BUILTIN_DECL
432 #define TARGET_BUILTIN_DECL arm_builtin_decl
434 #undef TARGET_INIT_LIBFUNCS
435 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
437 #undef TARGET_PROMOTE_FUNCTION_MODE
438 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
439 #undef TARGET_PROMOTE_PROTOTYPES
440 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
441 #undef TARGET_PASS_BY_REFERENCE
442 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
443 #undef TARGET_ARG_PARTIAL_BYTES
444 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
445 #undef TARGET_FUNCTION_ARG
446 #define TARGET_FUNCTION_ARG arm_function_arg
447 #undef TARGET_FUNCTION_ARG_ADVANCE
448 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
449 #undef TARGET_FUNCTION_ARG_BOUNDARY
450 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
455 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
456 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
458 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
459 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
460 #undef TARGET_TRAMPOLINE_INIT
461 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
462 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
463 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
465 #undef TARGET_DEFAULT_SHORT_ENUMS
466 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468 #undef TARGET_ALIGN_ANON_BITFIELD
469 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471 #undef TARGET_NARROW_VOLATILE_BITFIELD
472 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474 #undef TARGET_CXX_GUARD_TYPE
475 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477 #undef TARGET_CXX_GUARD_MASK_BIT
478 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480 #undef TARGET_CXX_GET_COOKIE_SIZE
481 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483 #undef TARGET_CXX_COOKIE_HAS_SIZE
484 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486 #undef TARGET_CXX_CDTOR_RETURNS_THIS
487 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
490 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492 #undef TARGET_CXX_USE_AEABI_ATEXIT
493 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
496 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
497 arm_cxx_determine_class_data_visibility
499 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
500 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502 #undef TARGET_RETURN_IN_MSB
503 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505 #undef TARGET_RETURN_IN_MEMORY
506 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508 #undef TARGET_MUST_PASS_IN_STACK
509 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
512 #undef TARGET_ASM_UNWIND_EMIT
513 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515 /* EABI unwinding tables use a different format for the typeinfo tables. */
516 #undef TARGET_ASM_TTYPE
517 #define TARGET_ASM_TTYPE arm_output_ttype
519 #undef TARGET_ARM_EABI_UNWINDER
520 #define TARGET_ARM_EABI_UNWINDER true
522 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
523 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525 #undef TARGET_ASM_INIT_SECTIONS
526 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
527 #endif /* ARM_UNWIND_INFO */
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532 #undef TARGET_CANNOT_COPY_INSN_P
533 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
536 #undef TARGET_HAVE_TLS
537 #define TARGET_HAVE_TLS true
540 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
541 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543 #undef TARGET_LEGITIMATE_CONSTANT_P
544 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546 #undef TARGET_CANNOT_FORCE_CONST_MEM
547 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549 #undef TARGET_MAX_ANCHOR_OFFSET
550 #define TARGET_MAX_ANCHOR_OFFSET 4095
552 /* The minimum is set such that the total size of the block
553 for a particular anchor is -4088 + 1 + 4095 bytes, which is
554 divisible by eight, ensuring natural spacing of anchors. */
555 #undef TARGET_MIN_ANCHOR_OFFSET
556 #define TARGET_MIN_ANCHOR_OFFSET -4088
558 #undef TARGET_SCHED_ISSUE_RATE
559 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561 #undef TARGET_MANGLE_TYPE
562 #define TARGET_MANGLE_TYPE arm_mangle_type
564 #undef TARGET_BUILD_BUILTIN_VA_LIST
565 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
566 #undef TARGET_EXPAND_BUILTIN_VA_START
567 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
568 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
569 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
572 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
573 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
576 #undef TARGET_LEGITIMATE_ADDRESS_P
577 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579 #undef TARGET_INVALID_PARAMETER_TYPE
580 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
582 #undef TARGET_INVALID_RETURN_TYPE
583 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
585 #undef TARGET_PROMOTED_TYPE
586 #define TARGET_PROMOTED_TYPE arm_promoted_type
588 #undef TARGET_CONVERT_TO_TYPE
589 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
591 #undef TARGET_SCALAR_MODE_SUPPORTED_P
592 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
594 #undef TARGET_FRAME_POINTER_REQUIRED
595 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
597 #undef TARGET_CAN_ELIMINATE
598 #define TARGET_CAN_ELIMINATE arm_can_eliminate
600 #undef TARGET_CONDITIONAL_REGISTER_USAGE
601 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
603 #undef TARGET_CLASS_LIKELY_SPILLED_P
604 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
606 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
607 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
608 arm_vector_alignment_reachable
610 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
611 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
612 arm_builtin_support_vector_misalignment
614 #undef TARGET_PREFERRED_RENAME_CLASS
615 #define TARGET_PREFERRED_RENAME_CLASS \
616 arm_preferred_rename_class
618 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
619 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
620 arm_vectorize_vec_perm_const_ok
622 struct gcc_target targetm = TARGET_INITIALIZER;
624 /* Obstack for minipool constant handling. */
625 static struct obstack minipool_obstack;
626 static char * minipool_startobj;
628 /* The maximum number of insns skipped which
629 will be conditionalised if possible. */
630 static int max_insns_skipped = 5;
632 extern FILE * asm_out_file;
634 /* True if we are currently building a constant table. */
635 int making_const_table;
637 /* The processor for which instructions should be scheduled. */
638 enum processor_type arm_tune = arm_none;
640 /* The current tuning set. */
641 const struct tune_params *current_tune;
643 /* Which floating point hardware to schedule for. */
646 /* Which floating popint hardware to use. */
647 const struct arm_fpu_desc *arm_fpu_desc;
649 /* Used for Thumb call_via trampolines. */
650 rtx thumb_call_via_label[14];
651 static int thumb_call_reg_needed;
653 /* Bit values used to identify processor capabilities. */
654 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
655 #define FL_ARCH3M (1 << 1) /* Extended multiply */
656 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
657 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
658 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
659 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
660 #define FL_THUMB (1 << 6) /* Thumb aware */
661 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
662 #define FL_STRONG (1 << 8) /* StrongARM */
663 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
664 #define FL_XSCALE (1 << 10) /* XScale */
665 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
666 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
667 media instructions. */
668 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
669 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
670 Note: ARM6 & 7 derivatives only. */
671 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
672 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
673 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
675 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
676 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
677 #define FL_NEON (1 << 20) /* Neon instructions. */
678 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
680 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
681 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
683 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
716 /* The bits in this mask specify which instruction scheduling options should
718 static unsigned long tune_flags = 0;
720 /* The following are used in the arm.md file as equivalents to bits
721 in the above two flag variables. */
723 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
726 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
729 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
732 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
735 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
738 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
741 /* Nonzero if this chip supports the ARM 6K extensions. */
744 /* Nonzero if this chip supports the ARM 7 extensions. */
747 /* Nonzero if instructions not present in the 'M' profile can be used. */
748 int arm_arch_notm = 0;
750 /* Nonzero if instructions present in ARMv7E-M can be used. */
753 /* Nonzero if this chip can benefit from load scheduling. */
754 int arm_ld_sched = 0;
756 /* Nonzero if this chip is a StrongARM. */
757 int arm_tune_strongarm = 0;
759 /* Nonzero if this chip is a Cirrus variant. */
760 int arm_arch_cirrus = 0;
762 /* Nonzero if this chip supports Intel Wireless MMX technology. */
763 int arm_arch_iwmmxt = 0;
765 /* Nonzero if this chip is an XScale. */
766 int arm_arch_xscale = 0;
768 /* Nonzero if tuning for XScale */
769 int arm_tune_xscale = 0;
771 /* Nonzero if we want to tune for stores that access the write-buffer.
772 This typically means an ARM6 or ARM7 with MMU or MPU. */
773 int arm_tune_wbuf = 0;
775 /* Nonzero if tuning for Cortex-A9. */
776 int arm_tune_cortex_a9 = 0;
778 /* Nonzero if generating Thumb instructions. */
781 /* Nonzero if generating Thumb-1 instructions. */
784 /* Nonzero if we should define __THUMB_INTERWORK__ in the
786 XXX This is a bit of a hack, it's intended to help work around
787 problems in GLD which doesn't understand that armv5t code is
788 interworking clean. */
789 int arm_cpp_interwork = 0;
791 /* Nonzero if chip supports Thumb 2. */
794 /* Nonzero if chip supports integer division instruction. */
795 int arm_arch_arm_hwdiv;
796 int arm_arch_thumb_hwdiv;
798 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
799 we must report the mode of the memory reference from
800 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
801 enum machine_mode output_memory_reference_mode;
803 /* The register number to be used for the PIC offset register. */
804 unsigned arm_pic_register = INVALID_REGNUM;
806 /* Set to 1 after arm_reorg has started. Reset to start at the start of
807 the next function. */
808 static int after_arm_reorg = 0;
810 enum arm_pcs arm_pcs_default;
812 /* For an explanation of these variables, see final_prescan_insn below. */
814 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
815 enum arm_cond_code arm_current_cc;
818 int arm_target_label;
819 /* The number of conditionally executed insns, including the current insn. */
820 int arm_condexec_count = 0;
821 /* A bitmask specifying the patterns for the IT block.
822 Zero means do not output an IT block before this insn. */
823 int arm_condexec_mask = 0;
824 /* The number of bits used in arm_condexec_mask. */
825 int arm_condexec_masklen = 0;
827 /* The condition codes of the ARM, and the inverse function. */
828 static const char * const arm_condition_codes[] =
830 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
831 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
834 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
835 int arm_regs_in_sequence[] =
837 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
840 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
841 #define streq(string1, string2) (strcmp (string1, string2) == 0)
843 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
844 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
845 | (1 << PIC_OFFSET_TABLE_REGNUM)))
847 /* Initialization code. */
851 const char *const name;
852 enum processor_type core;
854 const unsigned long flags;
855 const struct tune_params *const tune;
859 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
860 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
865 const struct tune_params arm_slowmul_tune =
867 arm_slowmul_rtx_costs,
869 3, /* Constant limit. */
870 5, /* Max cond insns. */
871 ARM_PREFETCH_NOT_BENEFICIAL,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
876 const struct tune_params arm_fastmul_tune =
878 arm_fastmul_rtx_costs,
880 1, /* Constant limit. */
881 5, /* Max cond insns. */
882 ARM_PREFETCH_NOT_BENEFICIAL,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
887 /* StrongARM has early execution of branches, so a sequence that is worth
888 skipping is shorter. Set max_insns_skipped to a lower value. */
890 const struct tune_params arm_strongarm_tune =
892 arm_fastmul_rtx_costs,
894 1, /* Constant limit. */
895 3, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost
901 const struct tune_params arm_xscale_tune =
903 arm_xscale_rtx_costs,
904 xscale_sched_adjust_cost,
905 2, /* Constant limit. */
906 3, /* Max cond insns. */
907 ARM_PREFETCH_NOT_BENEFICIAL,
908 true, /* Prefer constant pool. */
909 arm_default_branch_cost
912 const struct tune_params arm_9e_tune =
916 1, /* Constant limit. */
917 5, /* Max cond insns. */
918 ARM_PREFETCH_NOT_BENEFICIAL,
919 true, /* Prefer constant pool. */
920 arm_default_branch_cost
923 const struct tune_params arm_v6t2_tune =
927 1, /* Constant limit. */
928 5, /* Max cond insns. */
929 ARM_PREFETCH_NOT_BENEFICIAL,
930 false, /* Prefer constant pool. */
931 arm_default_branch_cost
934 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
935 const struct tune_params arm_cortex_tune =
939 1, /* Constant limit. */
940 5, /* Max cond insns. */
941 ARM_PREFETCH_NOT_BENEFICIAL,
942 false, /* Prefer constant pool. */
943 arm_default_branch_cost
946 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
947 less appealing. Set max_insns_skipped to a low value. */
949 const struct tune_params arm_cortex_a5_tune =
953 1, /* Constant limit. */
954 1, /* Max cond insns. */
955 ARM_PREFETCH_NOT_BENEFICIAL,
956 false, /* Prefer constant pool. */
957 arm_cortex_a5_branch_cost
960 const struct tune_params arm_cortex_a9_tune =
963 cortex_a9_sched_adjust_cost,
964 1, /* Constant limit. */
965 5, /* Max cond insns. */
966 ARM_PREFETCH_BENEFICIAL(4,32,32),
967 false, /* Prefer constant pool. */
968 arm_default_branch_cost
971 const struct tune_params arm_cortex_a15_tune =
975 1, /* Constant limit. */
976 1, /* Max cond insns. */
977 ARM_PREFETCH_NOT_BENEFICIAL, /* TODO: Calculate correct values. */
978 false, /* Prefer constant pool. */
979 arm_cortex_a5_branch_cost
982 const struct tune_params arm_fa726te_tune =
985 fa726te_sched_adjust_cost,
986 1, /* Constant limit. */
987 5, /* Max cond insns. */
988 ARM_PREFETCH_NOT_BENEFICIAL,
989 true, /* Prefer constant pool. */
990 arm_default_branch_cost
994 /* Not all of these give usefully different compilation alternatives,
995 but there is no simple way of generalizing them. */
996 static const struct processors all_cores[] =
999 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1000 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1001 #include "arm-cores.def"
1003 {NULL, arm_none, NULL, 0, NULL}
1006 static const struct processors all_architectures[] =
1008 /* ARM Architectures */
1009 /* We don't specify tuning costs here as it will be figured out
1012 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1013 {NAME, CORE, #ARCH, FLAGS, NULL},
1014 #include "arm-arches.def"
1016 {NULL, arm_none, NULL, 0 , NULL}
1020 /* These are populated as commandline arguments are processed, or NULL
1021 if not specified. */
1022 static const struct processors *arm_selected_arch;
1023 static const struct processors *arm_selected_cpu;
1024 static const struct processors *arm_selected_tune;
1026 /* The name of the preprocessor macro to define for this architecture. */
1028 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1030 /* Available values for -mfpu=. */
1032 static const struct arm_fpu_desc all_fpus[] =
1034 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1035 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1036 #include "arm-fpus.def"
1041 /* Supported TLS relocations. */
1049 TLS_DESCSEQ /* GNU scheme */
1052 /* The maximum number of insns to be used when loading a constant. */
1054 arm_constant_limit (bool size_p)
1056 return size_p ? 1 : current_tune->constant_limit;
1059 /* Emit an insn that's a simple single-set. Both the operands must be known
1062 emit_set_insn (rtx x, rtx y)
1064 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1067 /* Return the number of bits set in VALUE. */
1069 bit_count (unsigned long value)
1071 unsigned long count = 0;
1076 value &= value - 1; /* Clear the least-significant set bit. */
1084 enum machine_mode mode;
1086 } arm_fixed_mode_set;
1088 /* A small helper for setting fixed-point library libfuncs. */
1091 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1092 const char *funcname, const char *modename,
1097 if (num_suffix == 0)
1098 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1100 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1102 set_optab_libfunc (optable, mode, buffer);
1106 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1107 enum machine_mode from, const char *funcname,
1108 const char *toname, const char *fromname)
1111 const char *maybe_suffix_2 = "";
1113 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1114 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1115 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1116 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1117 maybe_suffix_2 = "2";
1119 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1122 set_conv_libfunc (optable, to, from, buffer);
1125 /* Set up library functions unique to ARM. */
1128 arm_init_libfuncs (void)
1130 /* For Linux, we have access to kernel support for atomic operations. */
1131 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1132 init_sync_libfuncs (2 * UNITS_PER_WORD);
1134 /* There are no special library functions unless we are using the
1139 /* The functions below are described in Section 4 of the "Run-Time
1140 ABI for the ARM architecture", Version 1.0. */
1142 /* Double-precision floating-point arithmetic. Table 2. */
1143 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1144 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1145 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1146 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1147 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1149 /* Double-precision comparisons. Table 3. */
1150 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1151 set_optab_libfunc (ne_optab, DFmode, NULL);
1152 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1153 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1154 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1155 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1156 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1158 /* Single-precision floating-point arithmetic. Table 4. */
1159 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1160 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1161 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1162 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1163 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1165 /* Single-precision comparisons. Table 5. */
1166 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1167 set_optab_libfunc (ne_optab, SFmode, NULL);
1168 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1169 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1170 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1171 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1172 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1174 /* Floating-point to integer conversions. Table 6. */
1175 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1176 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1177 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1178 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1179 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1180 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1181 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1182 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1184 /* Conversions between floating types. Table 7. */
1185 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1186 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1188 /* Integer to floating-point conversions. Table 8. */
1189 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1190 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1191 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1192 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1193 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1194 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1195 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1196 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1198 /* Long long. Table 9. */
1199 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1200 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1201 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1202 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1203 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1204 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1205 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1206 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1208 /* Integer (32/32->32) division. \S 4.3.1. */
1209 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1210 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1212 /* The divmod functions are designed so that they can be used for
1213 plain division, even though they return both the quotient and the
1214 remainder. The quotient is returned in the usual location (i.e.,
1215 r0 for SImode, {r0, r1} for DImode), just as would be expected
1216 for an ordinary division routine. Because the AAPCS calling
1217 conventions specify that all of { r0, r1, r2, r3 } are
1218 callee-saved registers, there is no need to tell the compiler
1219 explicitly that those registers are clobbered by these
1221 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1222 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1224 /* For SImode division the ABI provides div-without-mod routines,
1225 which are faster. */
1226 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1227 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1229 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1230 divmod libcalls instead. */
1231 set_optab_libfunc (smod_optab, DImode, NULL);
1232 set_optab_libfunc (umod_optab, DImode, NULL);
1233 set_optab_libfunc (smod_optab, SImode, NULL);
1234 set_optab_libfunc (umod_optab, SImode, NULL);
1236 /* Half-precision float operations. The compiler handles all operations
1237 with NULL libfuncs by converting the SFmode. */
1238 switch (arm_fp16_format)
1240 case ARM_FP16_FORMAT_IEEE:
1241 case ARM_FP16_FORMAT_ALTERNATIVE:
1244 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1245 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1247 : "__gnu_f2h_alternative"));
1248 set_conv_libfunc (sext_optab, SFmode, HFmode,
1249 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1251 : "__gnu_h2f_alternative"));
1254 set_optab_libfunc (add_optab, HFmode, NULL);
1255 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1256 set_optab_libfunc (smul_optab, HFmode, NULL);
1257 set_optab_libfunc (neg_optab, HFmode, NULL);
1258 set_optab_libfunc (sub_optab, HFmode, NULL);
1261 set_optab_libfunc (eq_optab, HFmode, NULL);
1262 set_optab_libfunc (ne_optab, HFmode, NULL);
1263 set_optab_libfunc (lt_optab, HFmode, NULL);
1264 set_optab_libfunc (le_optab, HFmode, NULL);
1265 set_optab_libfunc (ge_optab, HFmode, NULL);
1266 set_optab_libfunc (gt_optab, HFmode, NULL);
1267 set_optab_libfunc (unord_optab, HFmode, NULL);
1274 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1276 const arm_fixed_mode_set fixed_arith_modes[] =
1297 const arm_fixed_mode_set fixed_conv_modes[] =
1327 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1329 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1330 "add", fixed_arith_modes[i].name, 3);
1331 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1332 "ssadd", fixed_arith_modes[i].name, 3);
1333 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1334 "usadd", fixed_arith_modes[i].name, 3);
1335 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1336 "sub", fixed_arith_modes[i].name, 3);
1337 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1338 "sssub", fixed_arith_modes[i].name, 3);
1339 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1340 "ussub", fixed_arith_modes[i].name, 3);
1341 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1342 "mul", fixed_arith_modes[i].name, 3);
1343 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1344 "ssmul", fixed_arith_modes[i].name, 3);
1345 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1346 "usmul", fixed_arith_modes[i].name, 3);
1347 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1348 "div", fixed_arith_modes[i].name, 3);
1349 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1350 "udiv", fixed_arith_modes[i].name, 3);
1351 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1352 "ssdiv", fixed_arith_modes[i].name, 3);
1353 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1354 "usdiv", fixed_arith_modes[i].name, 3);
1355 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1356 "neg", fixed_arith_modes[i].name, 2);
1357 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1358 "ssneg", fixed_arith_modes[i].name, 2);
1359 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1360 "usneg", fixed_arith_modes[i].name, 2);
1361 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1362 "ashl", fixed_arith_modes[i].name, 3);
1363 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1364 "ashr", fixed_arith_modes[i].name, 3);
1365 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1366 "lshr", fixed_arith_modes[i].name, 3);
1367 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1368 "ssashl", fixed_arith_modes[i].name, 3);
1369 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1370 "usashl", fixed_arith_modes[i].name, 3);
1371 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1372 "cmp", fixed_arith_modes[i].name, 2);
1375 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1376 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1379 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1380 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1383 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1384 fixed_conv_modes[j].mode, "fract",
1385 fixed_conv_modes[i].name,
1386 fixed_conv_modes[j].name);
1387 arm_set_fixed_conv_libfunc (satfract_optab,
1388 fixed_conv_modes[i].mode,
1389 fixed_conv_modes[j].mode, "satfract",
1390 fixed_conv_modes[i].name,
1391 fixed_conv_modes[j].name);
1392 arm_set_fixed_conv_libfunc (fractuns_optab,
1393 fixed_conv_modes[i].mode,
1394 fixed_conv_modes[j].mode, "fractuns",
1395 fixed_conv_modes[i].name,
1396 fixed_conv_modes[j].name);
1397 arm_set_fixed_conv_libfunc (satfractuns_optab,
1398 fixed_conv_modes[i].mode,
1399 fixed_conv_modes[j].mode, "satfractuns",
1400 fixed_conv_modes[i].name,
1401 fixed_conv_modes[j].name);
1405 if (TARGET_AAPCS_BASED)
1406 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1409 /* On AAPCS systems, this is the "struct __va_list". */
1410 static GTY(()) tree va_list_type;
1412 /* Return the type to use as __builtin_va_list. */
1414 arm_build_builtin_va_list (void)
1419 if (!TARGET_AAPCS_BASED)
1420 return std_build_builtin_va_list ();
1422 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1430 The C Library ABI further reinforces this definition in \S
1433 We must follow this definition exactly. The structure tag
1434 name is visible in C++ mangled names, and thus forms a part
1435 of the ABI. The field name may be used by people who
1436 #include <stdarg.h>. */
1437 /* Create the type. */
1438 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1439 /* Give it the required name. */
1440 va_list_name = build_decl (BUILTINS_LOCATION,
1442 get_identifier ("__va_list"),
1444 DECL_ARTIFICIAL (va_list_name) = 1;
1445 TYPE_NAME (va_list_type) = va_list_name;
1446 TYPE_STUB_DECL (va_list_type) = va_list_name;
1447 /* Create the __ap field. */
1448 ap_field = build_decl (BUILTINS_LOCATION,
1450 get_identifier ("__ap"),
1452 DECL_ARTIFICIAL (ap_field) = 1;
1453 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1454 TYPE_FIELDS (va_list_type) = ap_field;
1455 /* Compute its layout. */
1456 layout_type (va_list_type);
1458 return va_list_type;
1461 /* Return an expression of type "void *" pointing to the next
1462 available argument in a variable-argument list. VALIST is the
1463 user-level va_list object, of type __builtin_va_list. */
1465 arm_extract_valist_ptr (tree valist)
1467 if (TREE_TYPE (valist) == error_mark_node)
1468 return error_mark_node;
1470 /* On an AAPCS target, the pointer is stored within "struct
1472 if (TARGET_AAPCS_BASED)
1474 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1475 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1476 valist, ap_field, NULL_TREE);
1482 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1484 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1486 valist = arm_extract_valist_ptr (valist);
1487 std_expand_builtin_va_start (valist, nextarg);
1490 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1492 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1495 valist = arm_extract_valist_ptr (valist);
1496 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1499 /* Fix up any incompatible options that the user has specified. */
1501 arm_option_override (void)
1503 if (global_options_set.x_arm_arch_option)
1504 arm_selected_arch = &all_architectures[arm_arch_option];
1506 if (global_options_set.x_arm_cpu_option)
1507 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1509 if (global_options_set.x_arm_tune_option)
1510 arm_selected_tune = &all_cores[(int) arm_tune_option];
1512 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1513 SUBTARGET_OVERRIDE_OPTIONS;
1516 if (arm_selected_arch)
1518 if (arm_selected_cpu)
1520 /* Check for conflict between mcpu and march. */
1521 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1523 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1524 arm_selected_cpu->name, arm_selected_arch->name);
1525 /* -march wins for code generation.
1526 -mcpu wins for default tuning. */
1527 if (!arm_selected_tune)
1528 arm_selected_tune = arm_selected_cpu;
1530 arm_selected_cpu = arm_selected_arch;
1534 arm_selected_arch = NULL;
1537 /* Pick a CPU based on the architecture. */
1538 arm_selected_cpu = arm_selected_arch;
1541 /* If the user did not specify a processor, choose one for them. */
1542 if (!arm_selected_cpu)
1544 const struct processors * sel;
1545 unsigned int sought;
1547 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1548 if (!arm_selected_cpu->name)
1550 #ifdef SUBTARGET_CPU_DEFAULT
1551 /* Use the subtarget default CPU if none was specified by
1553 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1555 /* Default to ARM6. */
1556 if (!arm_selected_cpu->name)
1557 arm_selected_cpu = &all_cores[arm6];
1560 sel = arm_selected_cpu;
1561 insn_flags = sel->flags;
1563 /* Now check to see if the user has specified some command line
1564 switch that require certain abilities from the cpu. */
1567 if (TARGET_INTERWORK || TARGET_THUMB)
1569 sought |= (FL_THUMB | FL_MODE32);
1571 /* There are no ARM processors that support both APCS-26 and
1572 interworking. Therefore we force FL_MODE26 to be removed
1573 from insn_flags here (if it was set), so that the search
1574 below will always be able to find a compatible processor. */
1575 insn_flags &= ~FL_MODE26;
1578 if (sought != 0 && ((sought & insn_flags) != sought))
1580 /* Try to locate a CPU type that supports all of the abilities
1581 of the default CPU, plus the extra abilities requested by
1583 for (sel = all_cores; sel->name != NULL; sel++)
1584 if ((sel->flags & sought) == (sought | insn_flags))
1587 if (sel->name == NULL)
1589 unsigned current_bit_count = 0;
1590 const struct processors * best_fit = NULL;
1592 /* Ideally we would like to issue an error message here
1593 saying that it was not possible to find a CPU compatible
1594 with the default CPU, but which also supports the command
1595 line options specified by the programmer, and so they
1596 ought to use the -mcpu=<name> command line option to
1597 override the default CPU type.
1599 If we cannot find a cpu that has both the
1600 characteristics of the default cpu and the given
1601 command line options we scan the array again looking
1602 for a best match. */
1603 for (sel = all_cores; sel->name != NULL; sel++)
1604 if ((sel->flags & sought) == sought)
1608 count = bit_count (sel->flags & insn_flags);
1610 if (count >= current_bit_count)
1613 current_bit_count = count;
1617 gcc_assert (best_fit);
1621 arm_selected_cpu = sel;
1625 gcc_assert (arm_selected_cpu);
1626 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1627 if (!arm_selected_tune)
1628 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1630 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1631 insn_flags = arm_selected_cpu->flags;
1633 arm_tune = arm_selected_tune->core;
1634 tune_flags = arm_selected_tune->flags;
1635 current_tune = arm_selected_tune->tune;
1637 /* Make sure that the processor choice does not conflict with any of the
1638 other command line choices. */
1639 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1640 error ("target CPU does not support ARM mode");
1642 /* BPABI targets use linker tricks to allow interworking on cores
1643 without thumb support. */
1644 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1646 warning (0, "target CPU does not support interworking" );
1647 target_flags &= ~MASK_INTERWORK;
1650 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1652 warning (0, "target CPU does not support THUMB instructions");
1653 target_flags &= ~MASK_THUMB;
1656 if (TARGET_APCS_FRAME && TARGET_THUMB)
1658 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1659 target_flags &= ~MASK_APCS_FRAME;
1662 /* Callee super interworking implies thumb interworking. Adding
1663 this to the flags here simplifies the logic elsewhere. */
1664 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1665 target_flags |= MASK_INTERWORK;
1667 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1668 from here where no function is being compiled currently. */
1669 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1670 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1672 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1673 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1675 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1677 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1678 target_flags |= MASK_APCS_FRAME;
1681 if (TARGET_POKE_FUNCTION_NAME)
1682 target_flags |= MASK_APCS_FRAME;
1684 if (TARGET_APCS_REENT && flag_pic)
1685 error ("-fpic and -mapcs-reent are incompatible");
1687 if (TARGET_APCS_REENT)
1688 warning (0, "APCS reentrant code not supported. Ignored");
1690 /* If this target is normally configured to use APCS frames, warn if they
1691 are turned off and debugging is turned on. */
1693 && write_symbols != NO_DEBUG
1694 && !TARGET_APCS_FRAME
1695 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1696 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1698 if (TARGET_APCS_FLOAT)
1699 warning (0, "passing floating point arguments in fp regs not yet supported");
1701 if (TARGET_LITTLE_WORDS)
1702 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1703 "will be removed in a future release");
1705 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1706 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1707 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1708 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1709 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1710 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1711 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1712 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1713 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1714 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1715 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1716 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1717 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1718 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1720 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1721 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1722 thumb_code = TARGET_ARM == 0;
1723 thumb1_code = TARGET_THUMB1 != 0;
1724 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1725 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1726 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1727 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1728 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1729 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1731 /* If we are not using the default (ARM mode) section anchor offset
1732 ranges, then set the correct ranges now. */
1735 /* Thumb-1 LDR instructions cannot have negative offsets.
1736 Permissible positive offset ranges are 5-bit (for byte loads),
1737 6-bit (for halfword loads), or 7-bit (for word loads).
1738 Empirical results suggest a 7-bit anchor range gives the best
1739 overall code size. */
1740 targetm.min_anchor_offset = 0;
1741 targetm.max_anchor_offset = 127;
1743 else if (TARGET_THUMB2)
1745 /* The minimum is set such that the total size of the block
1746 for a particular anchor is 248 + 1 + 4095 bytes, which is
1747 divisible by eight, ensuring natural spacing of anchors. */
1748 targetm.min_anchor_offset = -248;
1749 targetm.max_anchor_offset = 4095;
1752 /* V5 code we generate is completely interworking capable, so we turn off
1753 TARGET_INTERWORK here to avoid many tests later on. */
1755 /* XXX However, we must pass the right pre-processor defines to CPP
1756 or GLD can get confused. This is a hack. */
1757 if (TARGET_INTERWORK)
1758 arm_cpp_interwork = 1;
1761 target_flags &= ~MASK_INTERWORK;
1763 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1764 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1766 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1767 error ("iwmmxt abi requires an iwmmxt capable cpu");
1769 if (!global_options_set.x_arm_fpu_index)
1771 const char *target_fpu_name;
1774 #ifdef FPUTYPE_DEFAULT
1775 target_fpu_name = FPUTYPE_DEFAULT;
1777 if (arm_arch_cirrus)
1778 target_fpu_name = "maverick";
1780 target_fpu_name = "fpe2";
1783 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1788 arm_fpu_desc = &all_fpus[arm_fpu_index];
1790 switch (arm_fpu_desc->model)
1792 case ARM_FP_MODEL_FPA:
1793 if (arm_fpu_desc->rev == 2)
1794 arm_fpu_attr = FPU_FPE2;
1795 else if (arm_fpu_desc->rev == 3)
1796 arm_fpu_attr = FPU_FPE3;
1798 arm_fpu_attr = FPU_FPA;
1801 case ARM_FP_MODEL_MAVERICK:
1802 arm_fpu_attr = FPU_MAVERICK;
1805 case ARM_FP_MODEL_VFP:
1806 arm_fpu_attr = FPU_VFP;
1813 if (TARGET_AAPCS_BASED
1814 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1815 error ("FPA is unsupported in the AAPCS");
1817 if (TARGET_AAPCS_BASED)
1819 if (TARGET_CALLER_INTERWORKING)
1820 error ("AAPCS does not support -mcaller-super-interworking");
1822 if (TARGET_CALLEE_INTERWORKING)
1823 error ("AAPCS does not support -mcallee-super-interworking");
1826 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1827 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1828 will ever exist. GCC makes no attempt to support this combination. */
1829 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1830 sorry ("iWMMXt and hardware floating point");
1832 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1833 if (TARGET_THUMB2 && TARGET_IWMMXT)
1834 sorry ("Thumb-2 iWMMXt");
1836 /* __fp16 support currently assumes the core has ldrh. */
1837 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1838 sorry ("__fp16 and no ldrh");
1840 /* If soft-float is specified then don't use FPU. */
1841 if (TARGET_SOFT_FLOAT)
1842 arm_fpu_attr = FPU_NONE;
1844 if (TARGET_AAPCS_BASED)
1846 if (arm_abi == ARM_ABI_IWMMXT)
1847 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1848 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1849 && TARGET_HARD_FLOAT
1851 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1853 arm_pcs_default = ARM_PCS_AAPCS;
1857 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1858 sorry ("-mfloat-abi=hard and VFP");
1860 if (arm_abi == ARM_ABI_APCS)
1861 arm_pcs_default = ARM_PCS_APCS;
1863 arm_pcs_default = ARM_PCS_ATPCS;
1866 /* For arm2/3 there is no need to do any scheduling if there is only
1867 a floating point emulator, or we are doing software floating-point. */
1868 if ((TARGET_SOFT_FLOAT
1869 || (TARGET_FPA && arm_fpu_desc->rev))
1870 && (tune_flags & FL_MODE32) == 0)
1871 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1873 /* Use the cp15 method if it is available. */
1874 if (target_thread_pointer == TP_AUTO)
1876 if (arm_arch6k && !TARGET_THUMB1)
1877 target_thread_pointer = TP_CP15;
1879 target_thread_pointer = TP_SOFT;
1882 if (TARGET_HARD_TP && TARGET_THUMB1)
1883 error ("can not use -mtp=cp15 with 16-bit Thumb");
1885 /* Override the default structure alignment for AAPCS ABI. */
1886 if (!global_options_set.x_arm_structure_size_boundary)
1888 if (TARGET_AAPCS_BASED)
1889 arm_structure_size_boundary = 8;
1893 if (arm_structure_size_boundary != 8
1894 && arm_structure_size_boundary != 32
1895 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1897 if (ARM_DOUBLEWORD_ALIGN)
1899 "structure size boundary can only be set to 8, 32 or 64");
1901 warning (0, "structure size boundary can only be set to 8 or 32");
1902 arm_structure_size_boundary
1903 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1907 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1909 error ("RTP PIC is incompatible with Thumb");
1913 /* If stack checking is disabled, we can use r10 as the PIC register,
1914 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1915 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1917 if (TARGET_VXWORKS_RTP)
1918 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1919 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1922 if (flag_pic && TARGET_VXWORKS_RTP)
1923 arm_pic_register = 9;
1925 if (arm_pic_register_string != NULL)
1927 int pic_register = decode_reg_name (arm_pic_register_string);
1930 warning (0, "-mpic-register= is useless without -fpic");
1932 /* Prevent the user from choosing an obviously stupid PIC register. */
1933 else if (pic_register < 0 || call_used_regs[pic_register]
1934 || pic_register == HARD_FRAME_POINTER_REGNUM
1935 || pic_register == STACK_POINTER_REGNUM
1936 || pic_register >= PC_REGNUM
1937 || (TARGET_VXWORKS_RTP
1938 && (unsigned int) pic_register != arm_pic_register))
1939 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1941 arm_pic_register = pic_register;
1944 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1945 if (fix_cm3_ldrd == 2)
1947 if (arm_selected_cpu->core == cortexm3)
1953 /* Enable -munaligned-access by default for
1954 - all ARMv6 architecture-based processors
1955 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1957 Disable -munaligned-access by default for
1958 - all pre-ARMv6 architecture-based processors
1959 - ARMv6-M architecture-based processors. */
1961 if (unaligned_access == 2)
1963 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1964 unaligned_access = 1;
1966 unaligned_access = 0;
1968 else if (unaligned_access == 1
1969 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1971 warning (0, "target CPU does not support unaligned accesses");
1972 unaligned_access = 0;
1975 if (TARGET_THUMB1 && flag_schedule_insns)
1977 /* Don't warn since it's on by default in -O2. */
1978 flag_schedule_insns = 0;
1983 /* If optimizing for size, bump the number of instructions that we
1984 are prepared to conditionally execute (even on a StrongARM). */
1985 max_insns_skipped = 6;
1988 max_insns_skipped = current_tune->max_insns_skipped;
1990 /* Hot/Cold partitioning is not currently supported, since we can't
1991 handle literal pool placement in that case. */
1992 if (flag_reorder_blocks_and_partition)
1994 inform (input_location,
1995 "-freorder-blocks-and-partition not supported on this architecture");
1996 flag_reorder_blocks_and_partition = 0;
1997 flag_reorder_blocks = 1;
2001 /* Hoisting PIC address calculations more aggressively provides a small,
2002 but measurable, size reduction for PIC code. Therefore, we decrease
2003 the bar for unrestricted expression hoisting to the cost of PIC address
2004 calculation, which is 2 instructions. */
2005 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2006 global_options.x_param_values,
2007 global_options_set.x_param_values);
2009 /* ARM EABI defaults to strict volatile bitfields. */
2010 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2011 && abi_version_at_least(2))
2012 flag_strict_volatile_bitfields = 1;
2014 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2015 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2016 if (flag_prefetch_loop_arrays < 0
2019 && current_tune->num_prefetch_slots > 0)
2020 flag_prefetch_loop_arrays = 1;
2022 /* Set up parameters to be used in prefetching algorithm. Do not override the
2023 defaults unless we are tuning for a core we have researched values for. */
2024 if (current_tune->num_prefetch_slots > 0)
2025 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2026 current_tune->num_prefetch_slots,
2027 global_options.x_param_values,
2028 global_options_set.x_param_values);
2029 if (current_tune->l1_cache_line_size >= 0)
2030 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2031 current_tune->l1_cache_line_size,
2032 global_options.x_param_values,
2033 global_options_set.x_param_values);
2034 if (current_tune->l1_cache_size >= 0)
2035 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2036 current_tune->l1_cache_size,
2037 global_options.x_param_values,
2038 global_options_set.x_param_values);
2040 /* Register global variables with the garbage collector. */
2041 arm_add_gc_roots ();
2045 arm_add_gc_roots (void)
2047 gcc_obstack_init(&minipool_obstack);
2048 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2051 /* A table of known ARM exception types.
2052 For use with the interrupt function attribute. */
2056 const char *const arg;
2057 const unsigned long return_value;
2061 static const isr_attribute_arg isr_attribute_args [] =
2063 { "IRQ", ARM_FT_ISR },
2064 { "irq", ARM_FT_ISR },
2065 { "FIQ", ARM_FT_FIQ },
2066 { "fiq", ARM_FT_FIQ },
2067 { "ABORT", ARM_FT_ISR },
2068 { "abort", ARM_FT_ISR },
2069 { "ABORT", ARM_FT_ISR },
2070 { "abort", ARM_FT_ISR },
2071 { "UNDEF", ARM_FT_EXCEPTION },
2072 { "undef", ARM_FT_EXCEPTION },
2073 { "SWI", ARM_FT_EXCEPTION },
2074 { "swi", ARM_FT_EXCEPTION },
2075 { NULL, ARM_FT_NORMAL }
2078 /* Returns the (interrupt) function type of the current
2079 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2081 static unsigned long
2082 arm_isr_value (tree argument)
2084 const isr_attribute_arg * ptr;
2088 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2090 /* No argument - default to IRQ. */
2091 if (argument == NULL_TREE)
2094 /* Get the value of the argument. */
2095 if (TREE_VALUE (argument) == NULL_TREE
2096 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2097 return ARM_FT_UNKNOWN;
2099 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2101 /* Check it against the list of known arguments. */
2102 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2103 if (streq (arg, ptr->arg))
2104 return ptr->return_value;
2106 /* An unrecognized interrupt type. */
2107 return ARM_FT_UNKNOWN;
2110 /* Computes the type of the current function. */
2112 static unsigned long
2113 arm_compute_func_type (void)
2115 unsigned long type = ARM_FT_UNKNOWN;
2119 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2121 /* Decide if the current function is volatile. Such functions
2122 never return, and many memory cycles can be saved by not storing
2123 register values that will never be needed again. This optimization
2124 was added to speed up context switching in a kernel application. */
2126 && (TREE_NOTHROW (current_function_decl)
2127 || !(flag_unwind_tables
2129 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2130 && TREE_THIS_VOLATILE (current_function_decl))
2131 type |= ARM_FT_VOLATILE;
2133 if (cfun->static_chain_decl != NULL)
2134 type |= ARM_FT_NESTED;
2136 attr = DECL_ATTRIBUTES (current_function_decl);
2138 a = lookup_attribute ("naked", attr);
2140 type |= ARM_FT_NAKED;
2142 a = lookup_attribute ("isr", attr);
2144 a = lookup_attribute ("interrupt", attr);
2147 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2149 type |= arm_isr_value (TREE_VALUE (a));
2154 /* Returns the type of the current function. */
2157 arm_current_func_type (void)
2159 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2160 cfun->machine->func_type = arm_compute_func_type ();
2162 return cfun->machine->func_type;
2166 arm_allocate_stack_slots_for_args (void)
2168 /* Naked functions should not allocate stack slots for arguments. */
2169 return !IS_NAKED (arm_current_func_type ());
2173 /* Output assembler code for a block containing the constant parts
2174 of a trampoline, leaving space for the variable parts.
2176 On the ARM, (if r8 is the static chain regnum, and remembering that
2177 referencing pc adds an offset of 8) the trampoline looks like:
2180 .word static chain value
2181 .word function's address
2182 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2185 arm_asm_trampoline_template (FILE *f)
2189 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2190 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2192 else if (TARGET_THUMB2)
2194 /* The Thumb-2 trampoline is similar to the arm implementation.
2195 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2196 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2197 STATIC_CHAIN_REGNUM, PC_REGNUM);
2198 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2202 ASM_OUTPUT_ALIGN (f, 2);
2203 fprintf (f, "\t.code\t16\n");
2204 fprintf (f, ".Ltrampoline_start:\n");
2205 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2206 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2207 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2208 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2209 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2210 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2212 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2213 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2216 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2219 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2221 rtx fnaddr, mem, a_tramp;
2223 emit_block_move (m_tramp, assemble_trampoline_template (),
2224 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2226 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2227 emit_move_insn (mem, chain_value);
2229 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2230 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2231 emit_move_insn (mem, fnaddr);
2233 a_tramp = XEXP (m_tramp, 0);
2234 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2235 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2236 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2239 /* Thumb trampolines should be entered in thumb mode, so set
2240 the bottom bit of the address. */
2243 arm_trampoline_adjust_address (rtx addr)
2246 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2247 NULL, 0, OPTAB_LIB_WIDEN);
2251 /* Return 1 if it is possible to return using a single instruction.
2252 If SIBLING is non-null, this is a test for a return before a sibling
2253 call. SIBLING is the call insn, so we can examine its register usage. */
2256 use_return_insn (int iscond, rtx sibling)
2259 unsigned int func_type;
2260 unsigned long saved_int_regs;
2261 unsigned HOST_WIDE_INT stack_adjust;
2262 arm_stack_offsets *offsets;
2264 /* Never use a return instruction before reload has run. */
2265 if (!reload_completed)
2268 func_type = arm_current_func_type ();
2270 /* Naked, volatile and stack alignment functions need special
2272 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2275 /* So do interrupt functions that use the frame pointer and Thumb
2276 interrupt functions. */
2277 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2280 offsets = arm_get_frame_offsets ();
2281 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2283 /* As do variadic functions. */
2284 if (crtl->args.pretend_args_size
2285 || cfun->machine->uses_anonymous_args
2286 /* Or if the function calls __builtin_eh_return () */
2287 || crtl->calls_eh_return
2288 /* Or if the function calls alloca */
2289 || cfun->calls_alloca
2290 /* Or if there is a stack adjustment. However, if the stack pointer
2291 is saved on the stack, we can use a pre-incrementing stack load. */
2292 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2293 && stack_adjust == 4)))
2296 saved_int_regs = offsets->saved_regs_mask;
2298 /* Unfortunately, the insn
2300 ldmib sp, {..., sp, ...}
2302 triggers a bug on most SA-110 based devices, such that the stack
2303 pointer won't be correctly restored if the instruction takes a
2304 page fault. We work around this problem by popping r3 along with
2305 the other registers, since that is never slower than executing
2306 another instruction.
2308 We test for !arm_arch5 here, because code for any architecture
2309 less than this could potentially be run on one of the buggy
2311 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2313 /* Validate that r3 is a call-clobbered register (always true in
2314 the default abi) ... */
2315 if (!call_used_regs[3])
2318 /* ... that it isn't being used for a return value ... */
2319 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2322 /* ... or for a tail-call argument ... */
2325 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2327 if (find_regno_fusage (sibling, USE, 3))
2331 /* ... and that there are no call-saved registers in r0-r2
2332 (always true in the default ABI). */
2333 if (saved_int_regs & 0x7)
2337 /* Can't be done if interworking with Thumb, and any registers have been
2339 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2342 /* On StrongARM, conditional returns are expensive if they aren't
2343 taken and multiple registers have been stacked. */
2344 if (iscond && arm_tune_strongarm)
2346 /* Conditional return when just the LR is stored is a simple
2347 conditional-load instruction, that's not expensive. */
2348 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2352 && arm_pic_register != INVALID_REGNUM
2353 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2357 /* If there are saved registers but the LR isn't saved, then we need
2358 two instructions for the return. */
2359 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2362 /* Can't be done if any of the FPA regs are pushed,
2363 since this also requires an insn. */
2364 if (TARGET_HARD_FLOAT && TARGET_FPA)
2365 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2366 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2369 /* Likewise VFP regs. */
2370 if (TARGET_HARD_FLOAT && TARGET_VFP)
2371 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2372 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2375 if (TARGET_REALLY_IWMMXT)
2376 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2377 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2383 /* Return TRUE if int I is a valid immediate ARM constant. */
2386 const_ok_for_arm (HOST_WIDE_INT i)
2390 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2391 be all zero, or all one. */
2392 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2393 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2394 != ((~(unsigned HOST_WIDE_INT) 0)
2395 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2398 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2400 /* Fast return for 0 and small values. We must do this for zero, since
2401 the code below can't handle that one case. */
2402 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2405 /* Get the number of trailing zeros. */
2406 lowbit = ffs((int) i) - 1;
2408 /* Only even shifts are allowed in ARM mode so round down to the
2409 nearest even number. */
2413 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2418 /* Allow rotated constants in ARM mode. */
2420 && ((i & ~0xc000003f) == 0
2421 || (i & ~0xf000000f) == 0
2422 || (i & ~0xfc000003) == 0))
2429 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2432 if (i == v || i == (v | (v << 8)))
2435 /* Allow repeated pattern 0xXY00XY00. */
2445 /* Return true if I is a valid constant for the operation CODE. */
2447 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2449 if (const_ok_for_arm (i))
2455 /* See if we can use movw. */
2456 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2459 /* Otherwise, try mvn. */
2460 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2463 /* See if we can use addw or subw. */
2465 && ((i & 0xfffff000) == 0
2466 || ((-i) & 0xfffff000) == 0))
2468 /* else fall through. */
2488 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2490 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2496 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2500 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2507 /* Emit a sequence of insns to handle a large constant.
2508 CODE is the code of the operation required, it can be any of SET, PLUS,
2509 IOR, AND, XOR, MINUS;
2510 MODE is the mode in which the operation is being performed;
2511 VAL is the integer to operate on;
2512 SOURCE is the other operand (a register, or a null-pointer for SET);
2513 SUBTARGETS means it is safe to create scratch registers if that will
2514 either produce a simpler sequence, or we will want to cse the values.
2515 Return value is the number of insns emitted. */
2517 /* ??? Tweak this for thumb2. */
2519 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2520 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2524 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2525 cond = COND_EXEC_TEST (PATTERN (insn));
2529 if (subtargets || code == SET
2530 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2531 && REGNO (target) != REGNO (source)))
2533 /* After arm_reorg has been called, we can't fix up expensive
2534 constants by pushing them into memory so we must synthesize
2535 them in-line, regardless of the cost. This is only likely to
2536 be more costly on chips that have load delay slots and we are
2537 compiling without running the scheduler (so no splitting
2538 occurred before the final instruction emission).
2540 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2542 if (!after_arm_reorg
2544 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2546 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2551 /* Currently SET is the only monadic value for CODE, all
2552 the rest are diadic. */
2553 if (TARGET_USE_MOVT)
2554 arm_emit_movpair (target, GEN_INT (val));
2556 emit_set_insn (target, GEN_INT (val));
2562 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2564 if (TARGET_USE_MOVT)
2565 arm_emit_movpair (temp, GEN_INT (val));
2567 emit_set_insn (temp, GEN_INT (val));
2569 /* For MINUS, the value is subtracted from, since we never
2570 have subtraction of a constant. */
2572 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2574 emit_set_insn (target,
2575 gen_rtx_fmt_ee (code, mode, source, temp));
2581 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2585 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2586 ARM/THUMB2 immediates, and add up to VAL.
2587 Thr function return value gives the number of insns required. */
2589 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2590 struct four_ints *return_sequence)
2592 int best_consecutive_zeros = 0;
2596 struct four_ints tmp_sequence;
2598 /* If we aren't targetting ARM, the best place to start is always at
2599 the bottom, otherwise look more closely. */
2602 for (i = 0; i < 32; i += 2)
2604 int consecutive_zeros = 0;
2606 if (!(val & (3 << i)))
2608 while ((i < 32) && !(val & (3 << i)))
2610 consecutive_zeros += 2;
2613 if (consecutive_zeros > best_consecutive_zeros)
2615 best_consecutive_zeros = consecutive_zeros;
2616 best_start = i - consecutive_zeros;
2623 /* So long as it won't require any more insns to do so, it's
2624 desirable to emit a small constant (in bits 0...9) in the last
2625 insn. This way there is more chance that it can be combined with
2626 a later addressing insn to form a pre-indexed load or store
2627 operation. Consider:
2629 *((volatile int *)0xe0000100) = 1;
2630 *((volatile int *)0xe0000110) = 2;
2632 We want this to wind up as:
2636 str rB, [rA, #0x100]
2638 str rB, [rA, #0x110]
2640 rather than having to synthesize both large constants from scratch.
2642 Therefore, we calculate how many insns would be required to emit
2643 the constant starting from `best_start', and also starting from
2644 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2645 yield a shorter sequence, we may as well use zero. */
2646 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2648 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2650 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2651 if (insns2 <= insns1)
2653 *return_sequence = tmp_sequence;
2661 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2663 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2664 struct four_ints *return_sequence, int i)
2666 int remainder = val & 0xffffffff;
2669 /* Try and find a way of doing the job in either two or three
2672 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2673 location. We start at position I. This may be the MSB, or
2674 optimial_immediate_sequence may have positioned it at the largest block
2675 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2676 wrapping around to the top of the word when we drop off the bottom.
2677 In the worst case this code should produce no more than four insns.
2679 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2680 constants, shifted to any arbitrary location. We should always start
2685 unsigned int b1, b2, b3, b4;
2686 unsigned HOST_WIDE_INT result;
2689 gcc_assert (insns < 4);
2694 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2695 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2698 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2699 /* We can use addw/subw for the last 12 bits. */
2703 /* Use an 8-bit shifted/rotated immediate. */
2707 result = remainder & ((0x0ff << end)
2708 | ((i < end) ? (0xff >> (32 - end))
2715 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2716 arbitrary shifts. */
2717 i -= TARGET_ARM ? 2 : 1;
2721 /* Next, see if we can do a better job with a thumb2 replicated
2724 We do it this way around to catch the cases like 0x01F001E0 where
2725 two 8-bit immediates would work, but a replicated constant would
2728 TODO: 16-bit constants that don't clear all the bits, but still win.
2729 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2732 b1 = (remainder & 0xff000000) >> 24;
2733 b2 = (remainder & 0x00ff0000) >> 16;
2734 b3 = (remainder & 0x0000ff00) >> 8;
2735 b4 = remainder & 0xff;
2739 /* The 8-bit immediate already found clears b1 (and maybe b2),
2740 but must leave b3 and b4 alone. */
2742 /* First try to find a 32-bit replicated constant that clears
2743 almost everything. We can assume that we can't do it in one,
2744 or else we wouldn't be here. */
2745 unsigned int tmp = b1 & b2 & b3 & b4;
2746 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2748 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2749 + (tmp == b3) + (tmp == b4);
2751 && (matching_bytes >= 3
2752 || (matching_bytes == 2
2753 && const_ok_for_op (remainder & ~tmp2, code))))
2755 /* At least 3 of the bytes match, and the fourth has at
2756 least as many bits set, or two of the bytes match
2757 and it will only require one more insn to finish. */
2765 /* Second, try to find a 16-bit replicated constant that can
2766 leave three of the bytes clear. If b2 or b4 is already
2767 zero, then we can. If the 8-bit from above would not
2768 clear b2 anyway, then we still win. */
2769 else if (b1 == b3 && (!b2 || !b4
2770 || (remainder & 0x00ff0000 & ~result)))
2772 result = remainder & 0xff00ff00;
2778 /* The 8-bit immediate already found clears b2 (and maybe b3)
2779 and we don't get here unless b1 is alredy clear, but it will
2780 leave b4 unchanged. */
2782 /* If we can clear b2 and b4 at once, then we win, since the
2783 8-bits couldn't possibly reach that far. */
2786 result = remainder & 0x00ff00ff;
2792 return_sequence->i[insns++] = result;
2793 remainder &= ~result;
2795 if (code == SET || code == MINUS)
2803 /* Emit an instruction with the indicated PATTERN. If COND is
2804 non-NULL, conditionalize the execution of the instruction on COND
2808 emit_constant_insn (rtx cond, rtx pattern)
2811 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2812 emit_insn (pattern);
2815 /* As above, but extra parameter GENERATE which, if clear, suppresses
2819 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2820 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2825 int final_invert = 0;
2827 int set_sign_bit_copies = 0;
2828 int clear_sign_bit_copies = 0;
2829 int clear_zero_bit_copies = 0;
2830 int set_zero_bit_copies = 0;
2831 int insns = 0, neg_insns, inv_insns;
2832 unsigned HOST_WIDE_INT temp1, temp2;
2833 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2834 struct four_ints *immediates;
2835 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2837 /* Find out which operations are safe for a given CODE. Also do a quick
2838 check for degenerate cases; these can occur when DImode operations
2851 if (remainder == 0xffffffff)
2854 emit_constant_insn (cond,
2855 gen_rtx_SET (VOIDmode, target,
2856 GEN_INT (ARM_SIGN_EXTEND (val))));
2862 if (reload_completed && rtx_equal_p (target, source))
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, target, source));
2876 emit_constant_insn (cond,
2877 gen_rtx_SET (VOIDmode, target, const0_rtx));
2880 if (remainder == 0xffffffff)
2882 if (reload_completed && rtx_equal_p (target, source))
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target, source));
2895 if (reload_completed && rtx_equal_p (target, source))
2898 emit_constant_insn (cond,
2899 gen_rtx_SET (VOIDmode, target, source));
2903 if (remainder == 0xffffffff)
2906 emit_constant_insn (cond,
2907 gen_rtx_SET (VOIDmode, target,
2908 gen_rtx_NOT (mode, source)));
2915 /* We treat MINUS as (val - source), since (source - val) is always
2916 passed as (source + (-val)). */
2920 emit_constant_insn (cond,
2921 gen_rtx_SET (VOIDmode, target,
2922 gen_rtx_NEG (mode, source)));
2925 if (const_ok_for_arm (val))
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, target,
2930 gen_rtx_MINUS (mode, GEN_INT (val),
2941 /* If we can do it in one insn get out quickly. */
2942 if (const_ok_for_op (val, code))
2945 emit_constant_insn (cond,
2946 gen_rtx_SET (VOIDmode, target,
2948 ? gen_rtx_fmt_ee (code, mode, source,
2954 /* Calculate a few attributes that may be useful for specific
2956 /* Count number of leading zeros. */
2957 for (i = 31; i >= 0; i--)
2959 if ((remainder & (1 << i)) == 0)
2960 clear_sign_bit_copies++;
2965 /* Count number of leading 1's. */
2966 for (i = 31; i >= 0; i--)
2968 if ((remainder & (1 << i)) != 0)
2969 set_sign_bit_copies++;
2974 /* Count number of trailing zero's. */
2975 for (i = 0; i <= 31; i++)
2977 if ((remainder & (1 << i)) == 0)
2978 clear_zero_bit_copies++;
2983 /* Count number of trailing 1's. */
2984 for (i = 0; i <= 31; i++)
2986 if ((remainder & (1 << i)) != 0)
2987 set_zero_bit_copies++;
2995 /* See if we can do this by sign_extending a constant that is known
2996 to be negative. This is a good, way of doing it, since the shift
2997 may well merge into a subsequent insn. */
2998 if (set_sign_bit_copies > 1)
3000 if (const_ok_for_arm
3001 (temp1 = ARM_SIGN_EXTEND (remainder
3002 << (set_sign_bit_copies - 1))))
3006 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3007 emit_constant_insn (cond,
3008 gen_rtx_SET (VOIDmode, new_src,
3010 emit_constant_insn (cond,
3011 gen_ashrsi3 (target, new_src,
3012 GEN_INT (set_sign_bit_copies - 1)));
3016 /* For an inverted constant, we will need to set the low bits,
3017 these will be shifted out of harm's way. */
3018 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3019 if (const_ok_for_arm (~temp1))
3023 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3024 emit_constant_insn (cond,
3025 gen_rtx_SET (VOIDmode, new_src,
3027 emit_constant_insn (cond,
3028 gen_ashrsi3 (target, new_src,
3029 GEN_INT (set_sign_bit_copies - 1)));
3035 /* See if we can calculate the value as the difference between two
3036 valid immediates. */
3037 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3039 int topshift = clear_sign_bit_copies & ~1;
3041 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3042 & (0xff000000 >> topshift));
3044 /* If temp1 is zero, then that means the 9 most significant
3045 bits of remainder were 1 and we've caused it to overflow.
3046 When topshift is 0 we don't need to do anything since we
3047 can borrow from 'bit 32'. */
3048 if (temp1 == 0 && topshift != 0)
3049 temp1 = 0x80000000 >> (topshift - 1);
3051 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3053 if (const_ok_for_arm (temp2))
3057 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3058 emit_constant_insn (cond,
3059 gen_rtx_SET (VOIDmode, new_src,
3061 emit_constant_insn (cond,
3062 gen_addsi3 (target, new_src,
3070 /* See if we can generate this by setting the bottom (or the top)
3071 16 bits, and then shifting these into the other half of the
3072 word. We only look for the simplest cases, to do more would cost
3073 too much. Be careful, however, not to generate this when the
3074 alternative would take fewer insns. */
3075 if (val & 0xffff0000)
3077 temp1 = remainder & 0xffff0000;
3078 temp2 = remainder & 0x0000ffff;
3080 /* Overlaps outside this range are best done using other methods. */
3081 for (i = 9; i < 24; i++)
3083 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3084 && !const_ok_for_arm (temp2))
3086 rtx new_src = (subtargets
3087 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3089 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3090 source, subtargets, generate);
3098 gen_rtx_ASHIFT (mode, source,
3105 /* Don't duplicate cases already considered. */
3106 for (i = 17; i < 24; i++)
3108 if (((temp1 | (temp1 >> i)) == remainder)
3109 && !const_ok_for_arm (temp1))
3111 rtx new_src = (subtargets
3112 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3114 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3115 source, subtargets, generate);
3120 gen_rtx_SET (VOIDmode, target,
3123 gen_rtx_LSHIFTRT (mode, source,
3134 /* If we have IOR or XOR, and the constant can be loaded in a
3135 single instruction, and we can find a temporary to put it in,
3136 then this can be done in two instructions instead of 3-4. */
3138 /* TARGET can't be NULL if SUBTARGETS is 0 */
3139 || (reload_completed && !reg_mentioned_p (target, source)))
3141 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3145 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, sub,
3150 emit_constant_insn (cond,
3151 gen_rtx_SET (VOIDmode, target,
3152 gen_rtx_fmt_ee (code, mode,
3163 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3164 and the remainder 0s for e.g. 0xfff00000)
3165 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3167 This can be done in 2 instructions by using shifts with mov or mvn.
3172 mvn r0, r0, lsr #12 */
3173 if (set_sign_bit_copies > 8
3174 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3178 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3179 rtx shift = GEN_INT (set_sign_bit_copies);
3183 gen_rtx_SET (VOIDmode, sub,
3185 gen_rtx_ASHIFT (mode,
3190 gen_rtx_SET (VOIDmode, target,
3192 gen_rtx_LSHIFTRT (mode, sub,
3199 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3201 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3203 For eg. r0 = r0 | 0xfff
3208 if (set_zero_bit_copies > 8
3209 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3213 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3214 rtx shift = GEN_INT (set_zero_bit_copies);
3218 gen_rtx_SET (VOIDmode, sub,
3220 gen_rtx_LSHIFTRT (mode,
3225 gen_rtx_SET (VOIDmode, target,
3227 gen_rtx_ASHIFT (mode, sub,
3233 /* This will never be reached for Thumb2 because orn is a valid
3234 instruction. This is for Thumb1 and the ARM 32 bit cases.
3236 x = y | constant (such that ~constant is a valid constant)
3238 x = ~(~y & ~constant).
3240 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3244 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3245 emit_constant_insn (cond,
3246 gen_rtx_SET (VOIDmode, sub,
3247 gen_rtx_NOT (mode, source)));
3250 sub = gen_reg_rtx (mode);
3251 emit_constant_insn (cond,
3252 gen_rtx_SET (VOIDmode, sub,
3253 gen_rtx_AND (mode, source,
3255 emit_constant_insn (cond,
3256 gen_rtx_SET (VOIDmode, target,
3257 gen_rtx_NOT (mode, sub)));
3264 /* See if two shifts will do 2 or more insn's worth of work. */
3265 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3267 HOST_WIDE_INT shift_mask = ((0xffffffff
3268 << (32 - clear_sign_bit_copies))
3271 if ((remainder | shift_mask) != 0xffffffff)
3275 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3276 insns = arm_gen_constant (AND, mode, cond,
3277 remainder | shift_mask,
3278 new_src, source, subtargets, 1);
3283 rtx targ = subtargets ? NULL_RTX : target;
3284 insns = arm_gen_constant (AND, mode, cond,
3285 remainder | shift_mask,
3286 targ, source, subtargets, 0);
3292 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3293 rtx shift = GEN_INT (clear_sign_bit_copies);
3295 emit_insn (gen_ashlsi3 (new_src, source, shift));
3296 emit_insn (gen_lshrsi3 (target, new_src, shift));
3302 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3304 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3306 if ((remainder | shift_mask) != 0xffffffff)
3310 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3312 insns = arm_gen_constant (AND, mode, cond,
3313 remainder | shift_mask,
3314 new_src, source, subtargets, 1);
3319 rtx targ = subtargets ? NULL_RTX : target;
3321 insns = arm_gen_constant (AND, mode, cond,
3322 remainder | shift_mask,
3323 targ, source, subtargets, 0);
3329 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3330 rtx shift = GEN_INT (clear_zero_bit_copies);
3332 emit_insn (gen_lshrsi3 (new_src, source, shift));
3333 emit_insn (gen_ashlsi3 (target, new_src, shift));
3345 /* Calculate what the instruction sequences would be if we generated it
3346 normally, negated, or inverted. */
3348 /* AND cannot be split into multiple insns, so invert and use BIC. */
3351 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3354 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3359 if (can_invert || final_invert)
3360 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3365 immediates = &pos_immediates;
3367 /* Is the negated immediate sequence more efficient? */
3368 if (neg_insns < insns && neg_insns <= inv_insns)
3371 immediates = &neg_immediates;
3376 /* Is the inverted immediate sequence more efficient?
3377 We must allow for an extra NOT instruction for XOR operations, although
3378 there is some chance that the final 'mvn' will get optimized later. */
3379 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3382 immediates = &inv_immediates;
3390 /* Now output the chosen sequence as instructions. */
3393 for (i = 0; i < insns; i++)
3395 rtx new_src, temp1_rtx;
3397 temp1 = immediates->i[i];
3399 if (code == SET || code == MINUS)
3400 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3401 else if ((final_invert || i < (insns - 1)) && subtargets)
3402 new_src = gen_reg_rtx (mode);
3408 else if (can_negate)
3411 temp1 = trunc_int_for_mode (temp1, mode);
3412 temp1_rtx = GEN_INT (temp1);
3416 else if (code == MINUS)
3417 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3419 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3421 emit_constant_insn (cond,
3422 gen_rtx_SET (VOIDmode, new_src,
3428 can_negate = can_invert;
3432 else if (code == MINUS)
3440 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3441 gen_rtx_NOT (mode, source)));
3448 /* Canonicalize a comparison so that we are more likely to recognize it.
3449 This can be done for a few constant compares, where we can make the
3450 immediate value easier to load. */
3453 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3455 enum machine_mode mode;
3456 unsigned HOST_WIDE_INT i, maxval;
3458 mode = GET_MODE (*op0);
3459 if (mode == VOIDmode)
3460 mode = GET_MODE (*op1);
3462 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3464 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3465 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3466 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3467 for GTU/LEU in Thumb mode. */
3472 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3474 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3477 if (code == GT || code == LE
3478 || (!TARGET_ARM && (code == GTU || code == LEU)))
3480 /* Missing comparison. First try to use an available
3482 if (GET_CODE (*op1) == CONST_INT)
3490 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3492 *op1 = GEN_INT (i + 1);
3493 return code == GT ? GE : LT;
3498 if (i != ~((unsigned HOST_WIDE_INT) 0)
3499 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3501 *op1 = GEN_INT (i + 1);
3502 return code == GTU ? GEU : LTU;
3510 /* If that did not work, reverse the condition. */
3514 return swap_condition (code);
3520 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3521 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3522 to facilitate possible combining with a cmp into 'ands'. */
3524 && GET_CODE (*op0) == ZERO_EXTEND
3525 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3526 && GET_MODE (XEXP (*op0, 0)) == QImode
3527 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3528 && subreg_lowpart_p (XEXP (*op0, 0))
3529 && *op1 == const0_rtx)
3530 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3533 /* Comparisons smaller than DImode. Only adjust comparisons against
3534 an out-of-range constant. */
3535 if (GET_CODE (*op1) != CONST_INT
3536 || const_ok_for_arm (INTVAL (*op1))
3537 || const_ok_for_arm (- INTVAL (*op1)))
3551 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3553 *op1 = GEN_INT (i + 1);
3554 return code == GT ? GE : LT;
3561 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3563 *op1 = GEN_INT (i - 1);
3564 return code == GE ? GT : LE;
3570 if (i != ~((unsigned HOST_WIDE_INT) 0)
3571 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3573 *op1 = GEN_INT (i + 1);
3574 return code == GTU ? GEU : LTU;
3581 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3583 *op1 = GEN_INT (i - 1);
3584 return code == GEU ? GTU : LEU;
3596 /* Define how to find the value returned by a function. */
3599 arm_function_value(const_tree type, const_tree func,
3600 bool outgoing ATTRIBUTE_UNUSED)
3602 enum machine_mode mode;
3603 int unsignedp ATTRIBUTE_UNUSED;
3604 rtx r ATTRIBUTE_UNUSED;
3606 mode = TYPE_MODE (type);
3608 if (TARGET_AAPCS_BASED)
3609 return aapcs_allocate_return_reg (mode, type, func);
3611 /* Promote integer types. */
3612 if (INTEGRAL_TYPE_P (type))
3613 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3615 /* Promotes small structs returned in a register to full-word size
3616 for big-endian AAPCS. */
3617 if (arm_return_in_msb (type))
3619 HOST_WIDE_INT size = int_size_in_bytes (type);
3620 if (size % UNITS_PER_WORD != 0)
3622 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3623 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3627 return arm_libcall_value_1 (mode);
3631 libcall_eq (const void *p1, const void *p2)
3633 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3637 libcall_hash (const void *p1)
3639 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3643 add_libcall (htab_t htab, rtx libcall)
3645 *htab_find_slot (htab, libcall, INSERT) = libcall;
3649 arm_libcall_uses_aapcs_base (const_rtx libcall)
3651 static bool init_done = false;
3652 static htab_t libcall_htab;
3658 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3660 add_libcall (libcall_htab,
3661 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3666 add_libcall (libcall_htab,
3667 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3669 add_libcall (libcall_htab,
3670 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3671 add_libcall (libcall_htab,
3672 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3673 add_libcall (libcall_htab,
3674 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3675 add_libcall (libcall_htab,
3676 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3678 add_libcall (libcall_htab,
3679 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3680 add_libcall (libcall_htab,
3681 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3682 add_libcall (libcall_htab,
3683 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3684 add_libcall (libcall_htab,
3685 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3686 add_libcall (libcall_htab,
3687 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3688 add_libcall (libcall_htab,
3689 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3690 add_libcall (libcall_htab,
3691 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3692 add_libcall (libcall_htab,
3693 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3695 /* Values from double-precision helper functions are returned in core
3696 registers if the selected core only supports single-precision
3697 arithmetic, even if we are using the hard-float ABI. The same is
3698 true for single-precision helpers, but we will never be using the
3699 hard-float ABI on a CPU which doesn't support single-precision
3700 operations in hardware. */
3701 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3702 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3703 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3704 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3705 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3706 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3707 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3708 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3709 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3710 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3711 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3712 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3714 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3718 return libcall && htab_find (libcall_htab, libcall) != NULL;
3722 arm_libcall_value_1 (enum machine_mode mode)
3724 if (TARGET_AAPCS_BASED)
3725 return aapcs_libcall_value (mode);
3726 else if (TARGET_32BIT
3727 && TARGET_HARD_FLOAT_ABI
3729 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3730 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3731 else if (TARGET_32BIT
3732 && TARGET_HARD_FLOAT_ABI
3734 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3735 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3736 else if (TARGET_IWMMXT_ABI
3737 && arm_vector_mode_supported_p (mode))
3738 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3740 return gen_rtx_REG (mode, ARG_REGISTER (1));
3743 /* Define how to find the value returned by a library function
3744 assuming the value has mode MODE. */
3747 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3749 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3750 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3752 /* The following libcalls return their result in integer registers,
3753 even though they return a floating point value. */
3754 if (arm_libcall_uses_aapcs_base (libcall))
3755 return gen_rtx_REG (mode, ARG_REGISTER(1));
3759 return arm_libcall_value_1 (mode);
3762 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3765 arm_function_value_regno_p (const unsigned int regno)
3767 if (regno == ARG_REGISTER (1)
3769 && TARGET_AAPCS_BASED
3771 && TARGET_HARD_FLOAT
3772 && regno == FIRST_VFP_REGNUM)
3774 && TARGET_HARD_FLOAT_ABI
3776 && regno == FIRST_CIRRUS_FP_REGNUM)
3777 || (TARGET_IWMMXT_ABI
3778 && regno == FIRST_IWMMXT_REGNUM)
3780 && TARGET_HARD_FLOAT_ABI
3782 && regno == FIRST_FPA_REGNUM))
3788 /* Determine the amount of memory needed to store the possible return
3789 registers of an untyped call. */
3791 arm_apply_result_size (void)
3797 if (TARGET_HARD_FLOAT_ABI)
3803 if (TARGET_MAVERICK)
3806 if (TARGET_IWMMXT_ABI)
3813 /* Decide whether TYPE should be returned in memory (true)
3814 or in a register (false). FNTYPE is the type of the function making
3817 arm_return_in_memory (const_tree type, const_tree fntype)
3821 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3823 if (TARGET_AAPCS_BASED)
3825 /* Simple, non-aggregate types (ie not including vectors and
3826 complex) are always returned in a register (or registers).
3827 We don't care about which register here, so we can short-cut
3828 some of the detail. */
3829 if (!AGGREGATE_TYPE_P (type)
3830 && TREE_CODE (type) != VECTOR_TYPE
3831 && TREE_CODE (type) != COMPLEX_TYPE)
3834 /* Any return value that is no larger than one word can be
3836 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3839 /* Check any available co-processors to see if they accept the
3840 type as a register candidate (VFP, for example, can return
3841 some aggregates in consecutive registers). These aren't
3842 available if the call is variadic. */
3843 if (aapcs_select_return_coproc (type, fntype) >= 0)
3846 /* Vector values should be returned using ARM registers, not
3847 memory (unless they're over 16 bytes, which will break since
3848 we only have four call-clobbered registers to play with). */
3849 if (TREE_CODE (type) == VECTOR_TYPE)
3850 return (size < 0 || size > (4 * UNITS_PER_WORD));
3852 /* The rest go in memory. */
3856 if (TREE_CODE (type) == VECTOR_TYPE)
3857 return (size < 0 || size > (4 * UNITS_PER_WORD));
3859 if (!AGGREGATE_TYPE_P (type) &&
3860 (TREE_CODE (type) != VECTOR_TYPE))
3861 /* All simple types are returned in registers. */
3864 if (arm_abi != ARM_ABI_APCS)
3866 /* ATPCS and later return aggregate types in memory only if they are
3867 larger than a word (or are variable size). */
3868 return (size < 0 || size > UNITS_PER_WORD);
3871 /* For the arm-wince targets we choose to be compatible with Microsoft's
3872 ARM and Thumb compilers, which always return aggregates in memory. */
3874 /* All structures/unions bigger than one word are returned in memory.
3875 Also catch the case where int_size_in_bytes returns -1. In this case
3876 the aggregate is either huge or of variable size, and in either case
3877 we will want to return it via memory and not in a register. */
3878 if (size < 0 || size > UNITS_PER_WORD)
3881 if (TREE_CODE (type) == RECORD_TYPE)
3885 /* For a struct the APCS says that we only return in a register
3886 if the type is 'integer like' and every addressable element
3887 has an offset of zero. For practical purposes this means
3888 that the structure can have at most one non bit-field element
3889 and that this element must be the first one in the structure. */
3891 /* Find the first field, ignoring non FIELD_DECL things which will
3892 have been created by C++. */
3893 for (field = TYPE_FIELDS (type);
3894 field && TREE_CODE (field) != FIELD_DECL;
3895 field = DECL_CHAIN (field))
3899 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3901 /* Check that the first field is valid for returning in a register. */
3903 /* ... Floats are not allowed */
3904 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3907 /* ... Aggregates that are not themselves valid for returning in
3908 a register are not allowed. */
3909 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3912 /* Now check the remaining fields, if any. Only bitfields are allowed,
3913 since they are not addressable. */
3914 for (field = DECL_CHAIN (field);
3916 field = DECL_CHAIN (field))
3918 if (TREE_CODE (field) != FIELD_DECL)
3921 if (!DECL_BIT_FIELD_TYPE (field))
3928 if (TREE_CODE (type) == UNION_TYPE)
3932 /* Unions can be returned in registers if every element is
3933 integral, or can be returned in an integer register. */
3934 for (field = TYPE_FIELDS (type);
3936 field = DECL_CHAIN (field))
3938 if (TREE_CODE (field) != FIELD_DECL)
3941 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3944 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3950 #endif /* not ARM_WINCE */
3952 /* Return all other types in memory. */
3956 /* Indicate whether or not words of a double are in big-endian order. */
3959 arm_float_words_big_endian (void)
3961 if (TARGET_MAVERICK)
3964 /* For FPA, float words are always big-endian. For VFP, floats words
3965 follow the memory system mode. */
3973 return (TARGET_BIG_END ? 1 : 0);
3978 const struct pcs_attribute_arg
3982 } pcs_attribute_args[] =
3984 {"aapcs", ARM_PCS_AAPCS},
3985 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3987 /* We could recognize these, but changes would be needed elsewhere
3988 * to implement them. */
3989 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3990 {"atpcs", ARM_PCS_ATPCS},
3991 {"apcs", ARM_PCS_APCS},
3993 {NULL, ARM_PCS_UNKNOWN}
3997 arm_pcs_from_attribute (tree attr)
3999 const struct pcs_attribute_arg *ptr;
4002 /* Get the value of the argument. */
4003 if (TREE_VALUE (attr) == NULL_TREE
4004 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4005 return ARM_PCS_UNKNOWN;
4007 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4009 /* Check it against the list of known arguments. */
4010 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4011 if (streq (arg, ptr->arg))
4014 /* An unrecognized interrupt type. */
4015 return ARM_PCS_UNKNOWN;
4018 /* Get the PCS variant to use for this call. TYPE is the function's type
4019 specification, DECL is the specific declartion. DECL may be null if
4020 the call could be indirect or if this is a library call. */
4022 arm_get_pcs_model (const_tree type, const_tree decl)
4024 bool user_convention = false;
4025 enum arm_pcs user_pcs = arm_pcs_default;
4030 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4033 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4034 user_convention = true;
4037 if (TARGET_AAPCS_BASED)
4039 /* Detect varargs functions. These always use the base rules
4040 (no argument is ever a candidate for a co-processor
4042 bool base_rules = stdarg_p (type);
4044 if (user_convention)
4046 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4047 sorry ("non-AAPCS derived PCS variant");
4048 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4049 error ("variadic functions must use the base AAPCS variant");
4053 return ARM_PCS_AAPCS;
4054 else if (user_convention)
4056 else if (decl && flag_unit_at_a_time)
4058 /* Local functions never leak outside this compilation unit,
4059 so we are free to use whatever conventions are
4061 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4062 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4064 return ARM_PCS_AAPCS_LOCAL;
4067 else if (user_convention && user_pcs != arm_pcs_default)
4068 sorry ("PCS variant");
4070 /* For everything else we use the target's default. */
4071 return arm_pcs_default;
4076 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4077 const_tree fntype ATTRIBUTE_UNUSED,
4078 rtx libcall ATTRIBUTE_UNUSED,
4079 const_tree fndecl ATTRIBUTE_UNUSED)
4081 /* Record the unallocated VFP registers. */
4082 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4083 pcum->aapcs_vfp_reg_alloc = 0;
4086 /* Walk down the type tree of TYPE counting consecutive base elements.
4087 If *MODEP is VOIDmode, then set it to the first valid floating point
4088 type. If a non-floating point type is found, or if a floating point
4089 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4090 otherwise return the count in the sub-tree. */
4092 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4094 enum machine_mode mode;
4097 switch (TREE_CODE (type))
4100 mode = TYPE_MODE (type);
4101 if (mode != DFmode && mode != SFmode)
4104 if (*modep == VOIDmode)
4113 mode = TYPE_MODE (TREE_TYPE (type));
4114 if (mode != DFmode && mode != SFmode)
4117 if (*modep == VOIDmode)
4126 /* Use V2SImode and V4SImode as representatives of all 64-bit
4127 and 128-bit vector types, whether or not those modes are
4128 supported with the present options. */
4129 size = int_size_in_bytes (type);
4142 if (*modep == VOIDmode)
4145 /* Vector modes are considered to be opaque: two vectors are
4146 equivalent for the purposes of being homogeneous aggregates
4147 if they are the same size. */
4156 tree index = TYPE_DOMAIN (type);
4158 /* Can't handle incomplete types. */
4159 if (!COMPLETE_TYPE_P(type))
4162 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4165 || !TYPE_MAX_VALUE (index)
4166 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4167 || !TYPE_MIN_VALUE (index)
4168 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4172 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4173 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4175 /* There must be no padding. */
4176 if (!host_integerp (TYPE_SIZE (type), 1)
4177 || (tree_low_cst (TYPE_SIZE (type), 1)
4178 != count * GET_MODE_BITSIZE (*modep)))
4190 /* Can't handle incomplete types. */
4191 if (!COMPLETE_TYPE_P(type))
4194 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4196 if (TREE_CODE (field) != FIELD_DECL)
4199 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4205 /* There must be no padding. */
4206 if (!host_integerp (TYPE_SIZE (type), 1)
4207 || (tree_low_cst (TYPE_SIZE (type), 1)
4208 != count * GET_MODE_BITSIZE (*modep)))
4215 case QUAL_UNION_TYPE:
4217 /* These aren't very interesting except in a degenerate case. */
4222 /* Can't handle incomplete types. */
4223 if (!COMPLETE_TYPE_P(type))
4226 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4228 if (TREE_CODE (field) != FIELD_DECL)
4231 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4234 count = count > sub_count ? count : sub_count;
4237 /* There must be no padding. */
4238 if (!host_integerp (TYPE_SIZE (type), 1)
4239 || (tree_low_cst (TYPE_SIZE (type), 1)
4240 != count * GET_MODE_BITSIZE (*modep)))
4253 /* Return true if PCS_VARIANT should use VFP registers. */
4255 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4257 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4259 static bool seen_thumb1_vfp = false;
4261 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4263 sorry ("Thumb-1 hard-float VFP ABI");
4264 /* sorry() is not immediately fatal, so only display this once. */
4265 seen_thumb1_vfp = true;
4271 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4274 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4275 (TARGET_VFP_DOUBLE || !is_double));
4279 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4280 enum machine_mode mode, const_tree type,
4281 enum machine_mode *base_mode, int *count)
4283 enum machine_mode new_mode = VOIDmode;
4285 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4286 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4287 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4292 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4295 new_mode = (mode == DCmode ? DFmode : SFmode);
4297 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4299 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4301 if (ag_count > 0 && ag_count <= 4)
4310 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4313 *base_mode = new_mode;
4318 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4319 enum machine_mode mode, const_tree type)
4321 int count ATTRIBUTE_UNUSED;
4322 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4324 if (!use_vfp_abi (pcs_variant, false))
4326 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4331 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4334 if (!use_vfp_abi (pcum->pcs_variant, false))
4337 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4338 &pcum->aapcs_vfp_rmode,
4339 &pcum->aapcs_vfp_rcount);
4343 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4344 const_tree type ATTRIBUTE_UNUSED)
4346 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4347 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4350 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4351 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4353 pcum->aapcs_vfp_reg_alloc = mask << regno;
4354 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4357 int rcount = pcum->aapcs_vfp_rcount;
4359 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4363 /* Avoid using unsupported vector modes. */
4364 if (rmode == V2SImode)
4366 else if (rmode == V4SImode)
4373 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4374 for (i = 0; i < rcount; i++)
4376 rtx tmp = gen_rtx_REG (rmode,
4377 FIRST_VFP_REGNUM + regno + i * rshift);
4378 tmp = gen_rtx_EXPR_LIST
4380 GEN_INT (i * GET_MODE_SIZE (rmode)));
4381 XVECEXP (par, 0, i) = tmp;
4384 pcum->aapcs_reg = par;
4387 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4394 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4395 enum machine_mode mode,
4396 const_tree type ATTRIBUTE_UNUSED)
4398 if (!use_vfp_abi (pcs_variant, false))
4401 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4404 enum machine_mode ag_mode;
4409 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4414 if (ag_mode == V2SImode)
4416 else if (ag_mode == V4SImode)
4422 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4423 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4424 for (i = 0; i < count; i++)
4426 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4427 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4428 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4429 XVECEXP (par, 0, i) = tmp;
4435 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4439 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4440 enum machine_mode mode ATTRIBUTE_UNUSED,
4441 const_tree type ATTRIBUTE_UNUSED)
4443 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4444 pcum->aapcs_vfp_reg_alloc = 0;
4448 #define AAPCS_CP(X) \
4450 aapcs_ ## X ## _cum_init, \
4451 aapcs_ ## X ## _is_call_candidate, \
4452 aapcs_ ## X ## _allocate, \
4453 aapcs_ ## X ## _is_return_candidate, \
4454 aapcs_ ## X ## _allocate_return_reg, \
4455 aapcs_ ## X ## _advance \
4458 /* Table of co-processors that can be used to pass arguments in
4459 registers. Idealy no arugment should be a candidate for more than
4460 one co-processor table entry, but the table is processed in order
4461 and stops after the first match. If that entry then fails to put
4462 the argument into a co-processor register, the argument will go on
4466 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4467 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4469 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4470 BLKmode) is a candidate for this co-processor's registers; this
4471 function should ignore any position-dependent state in
4472 CUMULATIVE_ARGS and only use call-type dependent information. */
4473 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4475 /* Return true if the argument does get a co-processor register; it
4476 should set aapcs_reg to an RTX of the register allocated as is
4477 required for a return from FUNCTION_ARG. */
4478 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4480 /* Return true if a result of mode MODE (or type TYPE if MODE is
4481 BLKmode) is can be returned in this co-processor's registers. */
4482 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4484 /* Allocate and return an RTX element to hold the return type of a
4485 call, this routine must not fail and will only be called if
4486 is_return_candidate returned true with the same parameters. */
4487 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4489 /* Finish processing this argument and prepare to start processing
4491 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4492 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4500 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4505 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4506 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4513 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4515 /* We aren't passed a decl, so we can't check that a call is local.
4516 However, it isn't clear that that would be a win anyway, since it
4517 might limit some tail-calling opportunities. */
4518 enum arm_pcs pcs_variant;
4522 const_tree fndecl = NULL_TREE;
4524 if (TREE_CODE (fntype) == FUNCTION_DECL)
4527 fntype = TREE_TYPE (fntype);
4530 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4533 pcs_variant = arm_pcs_default;
4535 if (pcs_variant != ARM_PCS_AAPCS)
4539 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4540 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4549 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4552 /* We aren't passed a decl, so we can't check that a call is local.
4553 However, it isn't clear that that would be a win anyway, since it
4554 might limit some tail-calling opportunities. */
4555 enum arm_pcs pcs_variant;
4556 int unsignedp ATTRIBUTE_UNUSED;
4560 const_tree fndecl = NULL_TREE;
4562 if (TREE_CODE (fntype) == FUNCTION_DECL)
4565 fntype = TREE_TYPE (fntype);
4568 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4571 pcs_variant = arm_pcs_default;
4573 /* Promote integer types. */
4574 if (type && INTEGRAL_TYPE_P (type))
4575 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4577 if (pcs_variant != ARM_PCS_AAPCS)
4581 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4582 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4584 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4588 /* Promotes small structs returned in a register to full-word size
4589 for big-endian AAPCS. */
4590 if (type && arm_return_in_msb (type))
4592 HOST_WIDE_INT size = int_size_in_bytes (type);
4593 if (size % UNITS_PER_WORD != 0)
4595 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4596 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4600 return gen_rtx_REG (mode, R0_REGNUM);
4604 aapcs_libcall_value (enum machine_mode mode)
4606 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4607 && GET_MODE_SIZE (mode) <= 4)
4610 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4613 /* Lay out a function argument using the AAPCS rules. The rule
4614 numbers referred to here are those in the AAPCS. */
4616 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4617 const_tree type, bool named)
4622 /* We only need to do this once per argument. */
4623 if (pcum->aapcs_arg_processed)
4626 pcum->aapcs_arg_processed = true;
4628 /* Special case: if named is false then we are handling an incoming
4629 anonymous argument which is on the stack. */
4633 /* Is this a potential co-processor register candidate? */
4634 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4636 int slot = aapcs_select_call_coproc (pcum, mode, type);
4637 pcum->aapcs_cprc_slot = slot;
4639 /* We don't have to apply any of the rules from part B of the
4640 preparation phase, these are handled elsewhere in the
4645 /* A Co-processor register candidate goes either in its own
4646 class of registers or on the stack. */
4647 if (!pcum->aapcs_cprc_failed[slot])
4649 /* C1.cp - Try to allocate the argument to co-processor
4651 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4654 /* C2.cp - Put the argument on the stack and note that we
4655 can't assign any more candidates in this slot. We also
4656 need to note that we have allocated stack space, so that
4657 we won't later try to split a non-cprc candidate between
4658 core registers and the stack. */
4659 pcum->aapcs_cprc_failed[slot] = true;
4660 pcum->can_split = false;
4663 /* We didn't get a register, so this argument goes on the
4665 gcc_assert (pcum->can_split == false);
4670 /* C3 - For double-word aligned arguments, round the NCRN up to the
4671 next even number. */
4672 ncrn = pcum->aapcs_ncrn;
4673 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4676 nregs = ARM_NUM_REGS2(mode, type);
4678 /* Sigh, this test should really assert that nregs > 0, but a GCC
4679 extension allows empty structs and then gives them empty size; it
4680 then allows such a structure to be passed by value. For some of
4681 the code below we have to pretend that such an argument has
4682 non-zero size so that we 'locate' it correctly either in
4683 registers or on the stack. */
4684 gcc_assert (nregs >= 0);
4686 nregs2 = nregs ? nregs : 1;
4688 /* C4 - Argument fits entirely in core registers. */
4689 if (ncrn + nregs2 <= NUM_ARG_REGS)
4691 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4692 pcum->aapcs_next_ncrn = ncrn + nregs;
4696 /* C5 - Some core registers left and there are no arguments already
4697 on the stack: split this argument between the remaining core
4698 registers and the stack. */
4699 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4701 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4702 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4703 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4707 /* C6 - NCRN is set to 4. */
4708 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4710 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4714 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4715 for a call to a function whose data type is FNTYPE.
4716 For a library call, FNTYPE is NULL. */
4718 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4720 tree fndecl ATTRIBUTE_UNUSED)
4722 /* Long call handling. */
4724 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4726 pcum->pcs_variant = arm_pcs_default;
4728 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4730 if (arm_libcall_uses_aapcs_base (libname))
4731 pcum->pcs_variant = ARM_PCS_AAPCS;
4733 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4734 pcum->aapcs_reg = NULL_RTX;
4735 pcum->aapcs_partial = 0;
4736 pcum->aapcs_arg_processed = false;
4737 pcum->aapcs_cprc_slot = -1;
4738 pcum->can_split = true;
4740 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4744 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4746 pcum->aapcs_cprc_failed[i] = false;
4747 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4755 /* On the ARM, the offset starts at 0. */
4757 pcum->iwmmxt_nregs = 0;
4758 pcum->can_split = true;
4760 /* Varargs vectors are treated the same as long long.
4761 named_count avoids having to change the way arm handles 'named' */
4762 pcum->named_count = 0;
4765 if (TARGET_REALLY_IWMMXT && fntype)
4769 for (fn_arg = TYPE_ARG_TYPES (fntype);
4771 fn_arg = TREE_CHAIN (fn_arg))
4772 pcum->named_count += 1;
4774 if (! pcum->named_count)
4775 pcum->named_count = INT_MAX;
4780 /* Return true if mode/type need doubleword alignment. */
4782 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4784 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4785 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4789 /* Determine where to put an argument to a function.
4790 Value is zero to push the argument on the stack,
4791 or a hard register in which to store the argument.
4793 MODE is the argument's machine mode.
4794 TYPE is the data type of the argument (as a tree).
4795 This is null for libcalls where that information may
4797 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4798 the preceding args and about the function being called.
4799 NAMED is nonzero if this argument is a named parameter
4800 (otherwise it is an extra parameter matching an ellipsis).
4802 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4803 other arguments are passed on the stack. If (NAMED == 0) (which happens
4804 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4805 defined), say it is passed in the stack (function_prologue will
4806 indeed make it pass in the stack if necessary). */
4809 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4810 const_tree type, bool named)
4812 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4815 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4816 a call insn (op3 of a call_value insn). */
4817 if (mode == VOIDmode)
4820 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4822 aapcs_layout_arg (pcum, mode, type, named);
4823 return pcum->aapcs_reg;
4826 /* Varargs vectors are treated the same as long long.
4827 named_count avoids having to change the way arm handles 'named' */
4828 if (TARGET_IWMMXT_ABI
4829 && arm_vector_mode_supported_p (mode)
4830 && pcum->named_count > pcum->nargs + 1)
4832 if (pcum->iwmmxt_nregs <= 9)
4833 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4836 pcum->can_split = false;
4841 /* Put doubleword aligned quantities in even register pairs. */
4843 && ARM_DOUBLEWORD_ALIGN
4844 && arm_needs_doubleword_align (mode, type))
4847 /* Only allow splitting an arg between regs and memory if all preceding
4848 args were allocated to regs. For args passed by reference we only count
4849 the reference pointer. */
4850 if (pcum->can_split)
4853 nregs = ARM_NUM_REGS2 (mode, type);
4855 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4858 return gen_rtx_REG (mode, pcum->nregs);
4862 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4864 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4865 ? DOUBLEWORD_ALIGNMENT
4870 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4871 tree type, bool named)
4873 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4874 int nregs = pcum->nregs;
4876 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4878 aapcs_layout_arg (pcum, mode, type, named);
4879 return pcum->aapcs_partial;
4882 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4885 if (NUM_ARG_REGS > nregs
4886 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4888 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4893 /* Update the data in PCUM to advance over an argument
4894 of mode MODE and data type TYPE.
4895 (TYPE is null for libcalls where that information may not be available.) */
4898 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4899 const_tree type, bool named)
4901 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4903 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4905 aapcs_layout_arg (pcum, mode, type, named);
4907 if (pcum->aapcs_cprc_slot >= 0)
4909 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4911 pcum->aapcs_cprc_slot = -1;
4914 /* Generic stuff. */
4915 pcum->aapcs_arg_processed = false;
4916 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4917 pcum->aapcs_reg = NULL_RTX;
4918 pcum->aapcs_partial = 0;
4923 if (arm_vector_mode_supported_p (mode)
4924 && pcum->named_count > pcum->nargs
4925 && TARGET_IWMMXT_ABI)
4926 pcum->iwmmxt_nregs += 1;
4928 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4932 /* Variable sized types are passed by reference. This is a GCC
4933 extension to the ARM ABI. */
4936 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4937 enum machine_mode mode ATTRIBUTE_UNUSED,
4938 const_tree type, bool named ATTRIBUTE_UNUSED)
4940 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4943 /* Encode the current state of the #pragma [no_]long_calls. */
4946 OFF, /* No #pragma [no_]long_calls is in effect. */
4947 LONG, /* #pragma long_calls is in effect. */
4948 SHORT /* #pragma no_long_calls is in effect. */
4951 static arm_pragma_enum arm_pragma_long_calls = OFF;
4954 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4956 arm_pragma_long_calls = LONG;
4960 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4962 arm_pragma_long_calls = SHORT;
4966 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4968 arm_pragma_long_calls = OFF;
4971 /* Handle an attribute requiring a FUNCTION_DECL;
4972 arguments as in struct attribute_spec.handler. */
4974 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4975 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4977 if (TREE_CODE (*node) != FUNCTION_DECL)
4979 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4981 *no_add_attrs = true;
4987 /* Handle an "interrupt" or "isr" attribute;
4988 arguments as in struct attribute_spec.handler. */
4990 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4995 if (TREE_CODE (*node) != FUNCTION_DECL)
4997 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4999 *no_add_attrs = true;
5001 /* FIXME: the argument if any is checked for type attributes;
5002 should it be checked for decl ones? */
5006 if (TREE_CODE (*node) == FUNCTION_TYPE
5007 || TREE_CODE (*node) == METHOD_TYPE)
5009 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5011 warning (OPT_Wattributes, "%qE attribute ignored",
5013 *no_add_attrs = true;
5016 else if (TREE_CODE (*node) == POINTER_TYPE
5017 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5018 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5019 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5021 *node = build_variant_type_copy (*node);
5022 TREE_TYPE (*node) = build_type_attribute_variant
5024 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5025 *no_add_attrs = true;
5029 /* Possibly pass this attribute on from the type to a decl. */
5030 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5031 | (int) ATTR_FLAG_FUNCTION_NEXT
5032 | (int) ATTR_FLAG_ARRAY_NEXT))
5034 *no_add_attrs = true;
5035 return tree_cons (name, args, NULL_TREE);
5039 warning (OPT_Wattributes, "%qE attribute ignored",
5048 /* Handle a "pcs" attribute; arguments as in struct
5049 attribute_spec.handler. */
5051 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5052 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5054 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5056 warning (OPT_Wattributes, "%qE attribute ignored", name);
5057 *no_add_attrs = true;
5062 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5063 /* Handle the "notshared" attribute. This attribute is another way of
5064 requesting hidden visibility. ARM's compiler supports
5065 "__declspec(notshared)"; we support the same thing via an
5069 arm_handle_notshared_attribute (tree *node,
5070 tree name ATTRIBUTE_UNUSED,
5071 tree args ATTRIBUTE_UNUSED,
5072 int flags ATTRIBUTE_UNUSED,
5075 tree decl = TYPE_NAME (*node);
5079 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5080 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5081 *no_add_attrs = false;
5087 /* Return 0 if the attributes for two types are incompatible, 1 if they
5088 are compatible, and 2 if they are nearly compatible (which causes a
5089 warning to be generated). */
5091 arm_comp_type_attributes (const_tree type1, const_tree type2)
5095 /* Check for mismatch of non-default calling convention. */
5096 if (TREE_CODE (type1) != FUNCTION_TYPE)
5099 /* Check for mismatched call attributes. */
5100 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5101 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5102 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5103 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5105 /* Only bother to check if an attribute is defined. */
5106 if (l1 | l2 | s1 | s2)
5108 /* If one type has an attribute, the other must have the same attribute. */
5109 if ((l1 != l2) || (s1 != s2))
5112 /* Disallow mixed attributes. */
5113 if ((l1 & s2) || (l2 & s1))
5117 /* Check for mismatched ISR attribute. */
5118 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5120 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5121 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5123 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5130 /* Assigns default attributes to newly defined type. This is used to
5131 set short_call/long_call attributes for function types of
5132 functions defined inside corresponding #pragma scopes. */
5134 arm_set_default_type_attributes (tree type)
5136 /* Add __attribute__ ((long_call)) to all functions, when
5137 inside #pragma long_calls or __attribute__ ((short_call)),
5138 when inside #pragma no_long_calls. */
5139 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5141 tree type_attr_list, attr_name;
5142 type_attr_list = TYPE_ATTRIBUTES (type);
5144 if (arm_pragma_long_calls == LONG)
5145 attr_name = get_identifier ("long_call");
5146 else if (arm_pragma_long_calls == SHORT)
5147 attr_name = get_identifier ("short_call");
5151 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5152 TYPE_ATTRIBUTES (type) = type_attr_list;
5156 /* Return true if DECL is known to be linked into section SECTION. */
5159 arm_function_in_section_p (tree decl, section *section)
5161 /* We can only be certain about functions defined in the same
5162 compilation unit. */
5163 if (!TREE_STATIC (decl))
5166 /* Make sure that SYMBOL always binds to the definition in this
5167 compilation unit. */
5168 if (!targetm.binds_local_p (decl))
5171 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5172 if (!DECL_SECTION_NAME (decl))
5174 /* Make sure that we will not create a unique section for DECL. */
5175 if (flag_function_sections || DECL_ONE_ONLY (decl))
5179 return function_section (decl) == section;
5182 /* Return nonzero if a 32-bit "long_call" should be generated for
5183 a call from the current function to DECL. We generate a long_call
5186 a. has an __attribute__((long call))
5187 or b. is within the scope of a #pragma long_calls
5188 or c. the -mlong-calls command line switch has been specified
5190 However we do not generate a long call if the function:
5192 d. has an __attribute__ ((short_call))
5193 or e. is inside the scope of a #pragma no_long_calls
5194 or f. is defined in the same section as the current function. */
5197 arm_is_long_call_p (tree decl)
5202 return TARGET_LONG_CALLS;
5204 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5205 if (lookup_attribute ("short_call", attrs))
5208 /* For "f", be conservative, and only cater for cases in which the
5209 whole of the current function is placed in the same section. */
5210 if (!flag_reorder_blocks_and_partition
5211 && TREE_CODE (decl) == FUNCTION_DECL
5212 && arm_function_in_section_p (decl, current_function_section ()))
5215 if (lookup_attribute ("long_call", attrs))
5218 return TARGET_LONG_CALLS;
5221 /* Return nonzero if it is ok to make a tail-call to DECL. */
5223 arm_function_ok_for_sibcall (tree decl, tree exp)
5225 unsigned long func_type;
5227 if (cfun->machine->sibcall_blocked)
5230 /* Never tailcall something for which we have no decl, or if we
5231 are generating code for Thumb-1. */
5232 if (decl == NULL || TARGET_THUMB1)
5235 /* The PIC register is live on entry to VxWorks PLT entries, so we
5236 must make the call before restoring the PIC register. */
5237 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5240 /* Cannot tail-call to long calls, since these are out of range of
5241 a branch instruction. */
5242 if (arm_is_long_call_p (decl))
5245 /* If we are interworking and the function is not declared static
5246 then we can't tail-call it unless we know that it exists in this
5247 compilation unit (since it might be a Thumb routine). */
5248 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5251 func_type = arm_current_func_type ();
5252 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5253 if (IS_INTERRUPT (func_type))
5256 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5258 /* Check that the return value locations are the same. For
5259 example that we aren't returning a value from the sibling in
5260 a VFP register but then need to transfer it to a core
5264 a = arm_function_value (TREE_TYPE (exp), decl, false);
5265 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5267 if (!rtx_equal_p (a, b))
5271 /* Never tailcall if function may be called with a misaligned SP. */
5272 if (IS_STACKALIGN (func_type))
5275 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5276 references should become a NOP. Don't convert such calls into
5278 if (TARGET_AAPCS_BASED
5279 && arm_abi == ARM_ABI_AAPCS
5280 && DECL_WEAK (decl))
5283 /* Everything else is ok. */
5288 /* Addressing mode support functions. */
5290 /* Return nonzero if X is a legitimate immediate operand when compiling
5291 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5293 legitimate_pic_operand_p (rtx x)
5295 if (GET_CODE (x) == SYMBOL_REF
5296 || (GET_CODE (x) == CONST
5297 && GET_CODE (XEXP (x, 0)) == PLUS
5298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5304 /* Record that the current function needs a PIC register. Initialize
5305 cfun->machine->pic_reg if we have not already done so. */
5308 require_pic_register (void)
5310 /* A lot of the logic here is made obscure by the fact that this
5311 routine gets called as part of the rtx cost estimation process.
5312 We don't want those calls to affect any assumptions about the real
5313 function; and further, we can't call entry_of_function() until we
5314 start the real expansion process. */
5315 if (!crtl->uses_pic_offset_table)
5317 gcc_assert (can_create_pseudo_p ());
5318 if (arm_pic_register != INVALID_REGNUM)
5320 if (!cfun->machine->pic_reg)
5321 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5323 /* Play games to avoid marking the function as needing pic
5324 if we are being called as part of the cost-estimation
5326 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5327 crtl->uses_pic_offset_table = 1;
5333 if (!cfun->machine->pic_reg)
5334 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5336 /* Play games to avoid marking the function as needing pic
5337 if we are being called as part of the cost-estimation
5339 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5341 crtl->uses_pic_offset_table = 1;
5344 arm_load_pic_register (0UL);
5349 for (insn = seq; insn; insn = NEXT_INSN (insn))
5351 INSN_LOCATOR (insn) = prologue_locator;
5353 /* We can be called during expansion of PHI nodes, where
5354 we can't yet emit instructions directly in the final
5355 insn stream. Queue the insns on the entry edge, they will
5356 be committed after everything else is expanded. */
5357 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5364 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5366 if (GET_CODE (orig) == SYMBOL_REF
5367 || GET_CODE (orig) == LABEL_REF)
5373 gcc_assert (can_create_pseudo_p ());
5374 reg = gen_reg_rtx (Pmode);
5377 /* VxWorks does not impose a fixed gap between segments; the run-time
5378 gap can be different from the object-file gap. We therefore can't
5379 use GOTOFF unless we are absolutely sure that the symbol is in the
5380 same segment as the GOT. Unfortunately, the flexibility of linker
5381 scripts means that we can't be sure of that in general, so assume
5382 that GOTOFF is never valid on VxWorks. */
5383 if ((GET_CODE (orig) == LABEL_REF
5384 || (GET_CODE (orig) == SYMBOL_REF &&
5385 SYMBOL_REF_LOCAL_P (orig)))
5387 && !TARGET_VXWORKS_RTP)
5388 insn = arm_pic_static_addr (orig, reg);
5394 /* If this function doesn't have a pic register, create one now. */
5395 require_pic_register ();
5397 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5399 /* Make the MEM as close to a constant as possible. */
5400 mem = SET_SRC (pat);
5401 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5402 MEM_READONLY_P (mem) = 1;
5403 MEM_NOTRAP_P (mem) = 1;
5405 insn = emit_insn (pat);
5408 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5410 set_unique_reg_note (insn, REG_EQUAL, orig);
5414 else if (GET_CODE (orig) == CONST)
5418 if (GET_CODE (XEXP (orig, 0)) == PLUS
5419 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5422 /* Handle the case where we have: const (UNSPEC_TLS). */
5423 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5424 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5427 /* Handle the case where we have:
5428 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5430 if (GET_CODE (XEXP (orig, 0)) == PLUS
5431 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5432 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5434 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5440 gcc_assert (can_create_pseudo_p ());
5441 reg = gen_reg_rtx (Pmode);
5444 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5446 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5447 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5448 base == reg ? 0 : reg);
5450 if (GET_CODE (offset) == CONST_INT)
5452 /* The base register doesn't really matter, we only want to
5453 test the index for the appropriate mode. */
5454 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5456 gcc_assert (can_create_pseudo_p ());
5457 offset = force_reg (Pmode, offset);
5460 if (GET_CODE (offset) == CONST_INT)
5461 return plus_constant (base, INTVAL (offset));
5464 if (GET_MODE_SIZE (mode) > 4
5465 && (GET_MODE_CLASS (mode) == MODE_INT
5466 || TARGET_SOFT_FLOAT))
5468 emit_insn (gen_addsi3 (reg, base, offset));
5472 return gen_rtx_PLUS (Pmode, base, offset);
5479 /* Find a spare register to use during the prolog of a function. */
5482 thumb_find_work_register (unsigned long pushed_regs_mask)
5486 /* Check the argument registers first as these are call-used. The
5487 register allocation order means that sometimes r3 might be used
5488 but earlier argument registers might not, so check them all. */
5489 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5490 if (!df_regs_ever_live_p (reg))
5493 /* Before going on to check the call-saved registers we can try a couple
5494 more ways of deducing that r3 is available. The first is when we are
5495 pushing anonymous arguments onto the stack and we have less than 4
5496 registers worth of fixed arguments(*). In this case r3 will be part of
5497 the variable argument list and so we can be sure that it will be
5498 pushed right at the start of the function. Hence it will be available
5499 for the rest of the prologue.
5500 (*): ie crtl->args.pretend_args_size is greater than 0. */
5501 if (cfun->machine->uses_anonymous_args
5502 && crtl->args.pretend_args_size > 0)
5503 return LAST_ARG_REGNUM;
5505 /* The other case is when we have fixed arguments but less than 4 registers
5506 worth. In this case r3 might be used in the body of the function, but
5507 it is not being used to convey an argument into the function. In theory
5508 we could just check crtl->args.size to see how many bytes are
5509 being passed in argument registers, but it seems that it is unreliable.
5510 Sometimes it will have the value 0 when in fact arguments are being
5511 passed. (See testcase execute/20021111-1.c for an example). So we also
5512 check the args_info.nregs field as well. The problem with this field is
5513 that it makes no allowances for arguments that are passed to the
5514 function but which are not used. Hence we could miss an opportunity
5515 when a function has an unused argument in r3. But it is better to be
5516 safe than to be sorry. */
5517 if (! cfun->machine->uses_anonymous_args
5518 && crtl->args.size >= 0
5519 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5520 && crtl->args.info.nregs < 4)
5521 return LAST_ARG_REGNUM;
5523 /* Otherwise look for a call-saved register that is going to be pushed. */
5524 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5525 if (pushed_regs_mask & (1 << reg))
5530 /* Thumb-2 can use high regs. */
5531 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5532 if (pushed_regs_mask & (1 << reg))
5535 /* Something went wrong - thumb_compute_save_reg_mask()
5536 should have arranged for a suitable register to be pushed. */
5540 static GTY(()) int pic_labelno;
5542 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5546 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5548 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5550 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5553 gcc_assert (flag_pic);
5555 pic_reg = cfun->machine->pic_reg;
5556 if (TARGET_VXWORKS_RTP)
5558 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5559 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5560 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5562 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5564 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5565 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5569 /* We use an UNSPEC rather than a LABEL_REF because this label
5570 never appears in the code stream. */
5572 labelno = GEN_INT (pic_labelno++);
5573 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5574 l1 = gen_rtx_CONST (VOIDmode, l1);
5576 /* On the ARM the PC register contains 'dot + 8' at the time of the
5577 addition, on the Thumb it is 'dot + 4'. */
5578 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5579 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5581 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5585 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5587 else /* TARGET_THUMB1 */
5589 if (arm_pic_register != INVALID_REGNUM
5590 && REGNO (pic_reg) > LAST_LO_REGNUM)
5592 /* We will have pushed the pic register, so we should always be
5593 able to find a work register. */
5594 pic_tmp = gen_rtx_REG (SImode,
5595 thumb_find_work_register (saved_regs));
5596 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5597 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5598 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5601 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5605 /* Need to emit this whether or not we obey regdecls,
5606 since setjmp/longjmp can cause life info to screw up. */
5610 /* Generate code to load the address of a static var when flag_pic is set. */
5612 arm_pic_static_addr (rtx orig, rtx reg)
5614 rtx l1, labelno, offset_rtx, insn;
5616 gcc_assert (flag_pic);
5618 /* We use an UNSPEC rather than a LABEL_REF because this label
5619 never appears in the code stream. */
5620 labelno = GEN_INT (pic_labelno++);
5621 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5622 l1 = gen_rtx_CONST (VOIDmode, l1);
5624 /* On the ARM the PC register contains 'dot + 8' at the time of the
5625 addition, on the Thumb it is 'dot + 4'. */
5626 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5627 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5628 UNSPEC_SYMBOL_OFFSET);
5629 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5631 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5635 /* Return nonzero if X is valid as an ARM state addressing register. */
5637 arm_address_register_rtx_p (rtx x, int strict_p)
5641 if (GET_CODE (x) != REG)
5647 return ARM_REGNO_OK_FOR_BASE_P (regno);
5649 return (regno <= LAST_ARM_REGNUM
5650 || regno >= FIRST_PSEUDO_REGISTER
5651 || regno == FRAME_POINTER_REGNUM
5652 || regno == ARG_POINTER_REGNUM);
5655 /* Return TRUE if this rtx is the difference of a symbol and a label,
5656 and will reduce to a PC-relative relocation in the object file.
5657 Expressions like this can be left alone when generating PIC, rather
5658 than forced through the GOT. */
5660 pcrel_constant_p (rtx x)
5662 if (GET_CODE (x) == MINUS)
5663 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5668 /* Return true if X will surely end up in an index register after next
5671 will_be_in_index_register (const_rtx x)
5673 /* arm.md: calculate_pic_address will split this into a register. */
5674 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5677 /* Return nonzero if X is a valid ARM state address operand. */
5679 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5683 enum rtx_code code = GET_CODE (x);
5685 if (arm_address_register_rtx_p (x, strict_p))
5688 use_ldrd = (TARGET_LDRD
5690 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5692 if (code == POST_INC || code == PRE_DEC
5693 || ((code == PRE_INC || code == POST_DEC)
5694 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5695 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5697 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5698 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5699 && GET_CODE (XEXP (x, 1)) == PLUS
5700 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5702 rtx addend = XEXP (XEXP (x, 1), 1);
5704 /* Don't allow ldrd post increment by register because it's hard
5705 to fixup invalid register choices. */
5707 && GET_CODE (x) == POST_MODIFY
5708 && GET_CODE (addend) == REG)
5711 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5712 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5715 /* After reload constants split into minipools will have addresses
5716 from a LABEL_REF. */
5717 else if (reload_completed
5718 && (code == LABEL_REF
5720 && GET_CODE (XEXP (x, 0)) == PLUS
5721 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5722 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5725 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5728 else if (code == PLUS)
5730 rtx xop0 = XEXP (x, 0);
5731 rtx xop1 = XEXP (x, 1);
5733 return ((arm_address_register_rtx_p (xop0, strict_p)
5734 && ((GET_CODE(xop1) == CONST_INT
5735 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5736 || (!strict_p && will_be_in_index_register (xop1))))
5737 || (arm_address_register_rtx_p (xop1, strict_p)
5738 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5742 /* Reload currently can't handle MINUS, so disable this for now */
5743 else if (GET_CODE (x) == MINUS)
5745 rtx xop0 = XEXP (x, 0);
5746 rtx xop1 = XEXP (x, 1);
5748 return (arm_address_register_rtx_p (xop0, strict_p)
5749 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5753 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5754 && code == SYMBOL_REF
5755 && CONSTANT_POOL_ADDRESS_P (x)
5757 && symbol_mentioned_p (get_pool_constant (x))
5758 && ! pcrel_constant_p (get_pool_constant (x))))
5764 /* Return nonzero if X is a valid Thumb-2 address operand. */
5766 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5769 enum rtx_code code = GET_CODE (x);
5771 if (arm_address_register_rtx_p (x, strict_p))
5774 use_ldrd = (TARGET_LDRD
5776 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5778 if (code == POST_INC || code == PRE_DEC
5779 || ((code == PRE_INC || code == POST_DEC)
5780 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5781 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5783 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5784 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5785 && GET_CODE (XEXP (x, 1)) == PLUS
5786 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5788 /* Thumb-2 only has autoincrement by constant. */
5789 rtx addend = XEXP (XEXP (x, 1), 1);
5790 HOST_WIDE_INT offset;
5792 if (GET_CODE (addend) != CONST_INT)
5795 offset = INTVAL(addend);
5796 if (GET_MODE_SIZE (mode) <= 4)
5797 return (offset > -256 && offset < 256);
5799 return (use_ldrd && offset > -1024 && offset < 1024
5800 && (offset & 3) == 0);
5803 /* After reload constants split into minipools will have addresses
5804 from a LABEL_REF. */
5805 else if (reload_completed
5806 && (code == LABEL_REF
5808 && GET_CODE (XEXP (x, 0)) == PLUS
5809 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5810 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5813 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5816 else if (code == PLUS)
5818 rtx xop0 = XEXP (x, 0);
5819 rtx xop1 = XEXP (x, 1);
5821 return ((arm_address_register_rtx_p (xop0, strict_p)
5822 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5823 || (!strict_p && will_be_in_index_register (xop1))))
5824 || (arm_address_register_rtx_p (xop1, strict_p)
5825 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5828 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5829 && code == SYMBOL_REF
5830 && CONSTANT_POOL_ADDRESS_P (x)
5832 && symbol_mentioned_p (get_pool_constant (x))
5833 && ! pcrel_constant_p (get_pool_constant (x))))
5839 /* Return nonzero if INDEX is valid for an address index operand in
5842 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5845 HOST_WIDE_INT range;
5846 enum rtx_code code = GET_CODE (index);
5848 /* Standard coprocessor addressing modes. */
5849 if (TARGET_HARD_FLOAT
5850 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5851 && (mode == SFmode || mode == DFmode
5852 || (TARGET_MAVERICK && mode == DImode)))
5853 return (code == CONST_INT && INTVAL (index) < 1024
5854 && INTVAL (index) > -1024
5855 && (INTVAL (index) & 3) == 0);
5857 /* For quad modes, we restrict the constant offset to be slightly less
5858 than what the instruction format permits. We do this because for
5859 quad mode moves, we will actually decompose them into two separate
5860 double-mode reads or writes. INDEX must therefore be a valid
5861 (double-mode) offset and so should INDEX+8. */
5862 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5863 return (code == CONST_INT
5864 && INTVAL (index) < 1016
5865 && INTVAL (index) > -1024
5866 && (INTVAL (index) & 3) == 0);
5868 /* We have no such constraint on double mode offsets, so we permit the
5869 full range of the instruction format. */
5870 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5871 return (code == CONST_INT
5872 && INTVAL (index) < 1024
5873 && INTVAL (index) > -1024
5874 && (INTVAL (index) & 3) == 0);
5876 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5877 return (code == CONST_INT
5878 && INTVAL (index) < 1024
5879 && INTVAL (index) > -1024
5880 && (INTVAL (index) & 3) == 0);
5882 if (arm_address_register_rtx_p (index, strict_p)
5883 && (GET_MODE_SIZE (mode) <= 4))
5886 if (mode == DImode || mode == DFmode)
5888 if (code == CONST_INT)
5890 HOST_WIDE_INT val = INTVAL (index);
5893 return val > -256 && val < 256;
5895 return val > -4096 && val < 4092;
5898 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5901 if (GET_MODE_SIZE (mode) <= 4
5905 || (mode == QImode && outer == SIGN_EXTEND))))
5909 rtx xiop0 = XEXP (index, 0);
5910 rtx xiop1 = XEXP (index, 1);
5912 return ((arm_address_register_rtx_p (xiop0, strict_p)
5913 && power_of_two_operand (xiop1, SImode))
5914 || (arm_address_register_rtx_p (xiop1, strict_p)
5915 && power_of_two_operand (xiop0, SImode)));
5917 else if (code == LSHIFTRT || code == ASHIFTRT
5918 || code == ASHIFT || code == ROTATERT)
5920 rtx op = XEXP (index, 1);
5922 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5923 && GET_CODE (op) == CONST_INT
5925 && INTVAL (op) <= 31);
5929 /* For ARM v4 we may be doing a sign-extend operation during the
5935 || (outer == SIGN_EXTEND && mode == QImode))
5941 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5943 return (code == CONST_INT
5944 && INTVAL (index) < range
5945 && INTVAL (index) > -range);
5948 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5949 index operand. i.e. 1, 2, 4 or 8. */
5951 thumb2_index_mul_operand (rtx op)
5955 if (GET_CODE(op) != CONST_INT)
5959 return (val == 1 || val == 2 || val == 4 || val == 8);
5962 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5964 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5966 enum rtx_code code = GET_CODE (index);
5968 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5969 /* Standard coprocessor addressing modes. */
5970 if (TARGET_HARD_FLOAT
5971 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5972 && (mode == SFmode || mode == DFmode
5973 || (TARGET_MAVERICK && mode == DImode)))
5974 return (code == CONST_INT && INTVAL (index) < 1024
5975 /* Thumb-2 allows only > -256 index range for it's core register
5976 load/stores. Since we allow SF/DF in core registers, we have
5977 to use the intersection between -256~4096 (core) and -1024~1024
5979 && INTVAL (index) > -256
5980 && (INTVAL (index) & 3) == 0);
5982 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5984 /* For DImode assume values will usually live in core regs
5985 and only allow LDRD addressing modes. */
5986 if (!TARGET_LDRD || mode != DImode)
5987 return (code == CONST_INT
5988 && INTVAL (index) < 1024
5989 && INTVAL (index) > -1024
5990 && (INTVAL (index) & 3) == 0);
5993 /* For quad modes, we restrict the constant offset to be slightly less
5994 than what the instruction format permits. We do this because for
5995 quad mode moves, we will actually decompose them into two separate
5996 double-mode reads or writes. INDEX must therefore be a valid
5997 (double-mode) offset and so should INDEX+8. */
5998 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5999 return (code == CONST_INT
6000 && INTVAL (index) < 1016
6001 && INTVAL (index) > -1024
6002 && (INTVAL (index) & 3) == 0);
6004 /* We have no such constraint on double mode offsets, so we permit the
6005 full range of the instruction format. */
6006 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6007 return (code == CONST_INT
6008 && INTVAL (index) < 1024
6009 && INTVAL (index) > -1024
6010 && (INTVAL (index) & 3) == 0);
6012 if (arm_address_register_rtx_p (index, strict_p)
6013 && (GET_MODE_SIZE (mode) <= 4))
6016 if (mode == DImode || mode == DFmode)
6018 if (code == CONST_INT)
6020 HOST_WIDE_INT val = INTVAL (index);
6021 /* ??? Can we assume ldrd for thumb2? */
6022 /* Thumb-2 ldrd only has reg+const addressing modes. */
6023 /* ldrd supports offsets of +-1020.
6024 However the ldr fallback does not. */
6025 return val > -256 && val < 256 && (val & 3) == 0;
6033 rtx xiop0 = XEXP (index, 0);
6034 rtx xiop1 = XEXP (index, 1);
6036 return ((arm_address_register_rtx_p (xiop0, strict_p)
6037 && thumb2_index_mul_operand (xiop1))
6038 || (arm_address_register_rtx_p (xiop1, strict_p)
6039 && thumb2_index_mul_operand (xiop0)));
6041 else if (code == ASHIFT)
6043 rtx op = XEXP (index, 1);
6045 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6046 && GET_CODE (op) == CONST_INT
6048 && INTVAL (op) <= 3);
6051 return (code == CONST_INT
6052 && INTVAL (index) < 4096
6053 && INTVAL (index) > -256);
6056 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6058 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6062 if (GET_CODE (x) != REG)
6068 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6070 return (regno <= LAST_LO_REGNUM
6071 || regno > LAST_VIRTUAL_REGISTER
6072 || regno == FRAME_POINTER_REGNUM
6073 || (GET_MODE_SIZE (mode) >= 4
6074 && (regno == STACK_POINTER_REGNUM
6075 || regno >= FIRST_PSEUDO_REGISTER
6076 || x == hard_frame_pointer_rtx
6077 || x == arg_pointer_rtx)));
6080 /* Return nonzero if x is a legitimate index register. This is the case
6081 for any base register that can access a QImode object. */
6083 thumb1_index_register_rtx_p (rtx x, int strict_p)
6085 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6088 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6090 The AP may be eliminated to either the SP or the FP, so we use the
6091 least common denominator, e.g. SImode, and offsets from 0 to 64.
6093 ??? Verify whether the above is the right approach.
6095 ??? Also, the FP may be eliminated to the SP, so perhaps that
6096 needs special handling also.
6098 ??? Look at how the mips16 port solves this problem. It probably uses
6099 better ways to solve some of these problems.
6101 Although it is not incorrect, we don't accept QImode and HImode
6102 addresses based on the frame pointer or arg pointer until the
6103 reload pass starts. This is so that eliminating such addresses
6104 into stack based ones won't produce impossible code. */
6106 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6108 /* ??? Not clear if this is right. Experiment. */
6109 if (GET_MODE_SIZE (mode) < 4
6110 && !(reload_in_progress || reload_completed)
6111 && (reg_mentioned_p (frame_pointer_rtx, x)
6112 || reg_mentioned_p (arg_pointer_rtx, x)
6113 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6114 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6115 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6116 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6119 /* Accept any base register. SP only in SImode or larger. */
6120 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6123 /* This is PC relative data before arm_reorg runs. */
6124 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6125 && GET_CODE (x) == SYMBOL_REF
6126 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6129 /* This is PC relative data after arm_reorg runs. */
6130 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6132 && (GET_CODE (x) == LABEL_REF
6133 || (GET_CODE (x) == CONST
6134 && GET_CODE (XEXP (x, 0)) == PLUS
6135 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6136 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6139 /* Post-inc indexing only supported for SImode and larger. */
6140 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6141 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6144 else if (GET_CODE (x) == PLUS)
6146 /* REG+REG address can be any two index registers. */
6147 /* We disallow FRAME+REG addressing since we know that FRAME
6148 will be replaced with STACK, and SP relative addressing only
6149 permits SP+OFFSET. */
6150 if (GET_MODE_SIZE (mode) <= 4
6151 && XEXP (x, 0) != frame_pointer_rtx
6152 && XEXP (x, 1) != frame_pointer_rtx
6153 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6154 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6155 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6158 /* REG+const has 5-7 bit offset for non-SP registers. */
6159 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6160 || XEXP (x, 0) == arg_pointer_rtx)
6161 && GET_CODE (XEXP (x, 1)) == CONST_INT
6162 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6165 /* REG+const has 10-bit offset for SP, but only SImode and
6166 larger is supported. */
6167 /* ??? Should probably check for DI/DFmode overflow here
6168 just like GO_IF_LEGITIMATE_OFFSET does. */
6169 else if (GET_CODE (XEXP (x, 0)) == REG
6170 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6171 && GET_MODE_SIZE (mode) >= 4
6172 && GET_CODE (XEXP (x, 1)) == CONST_INT
6173 && INTVAL (XEXP (x, 1)) >= 0
6174 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6175 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6178 else if (GET_CODE (XEXP (x, 0)) == REG
6179 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6180 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6181 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6182 && REGNO (XEXP (x, 0))
6183 <= LAST_VIRTUAL_POINTER_REGISTER))
6184 && GET_MODE_SIZE (mode) >= 4
6185 && GET_CODE (XEXP (x, 1)) == CONST_INT
6186 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6190 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6191 && GET_MODE_SIZE (mode) == 4
6192 && GET_CODE (x) == SYMBOL_REF
6193 && CONSTANT_POOL_ADDRESS_P (x)
6195 && symbol_mentioned_p (get_pool_constant (x))
6196 && ! pcrel_constant_p (get_pool_constant (x))))
6202 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6203 instruction of mode MODE. */
6205 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6207 switch (GET_MODE_SIZE (mode))
6210 return val >= 0 && val < 32;
6213 return val >= 0 && val < 64 && (val & 1) == 0;
6217 && (val + GET_MODE_SIZE (mode)) <= 128
6223 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6226 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6227 else if (TARGET_THUMB2)
6228 return thumb2_legitimate_address_p (mode, x, strict_p);
6229 else /* if (TARGET_THUMB1) */
6230 return thumb1_legitimate_address_p (mode, x, strict_p);
6233 /* Build the SYMBOL_REF for __tls_get_addr. */
6235 static GTY(()) rtx tls_get_addr_libfunc;
6238 get_tls_get_addr (void)
6240 if (!tls_get_addr_libfunc)
6241 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6242 return tls_get_addr_libfunc;
6246 arm_load_tp (rtx target)
6249 target = gen_reg_rtx (SImode);
6253 /* Can return in any reg. */
6254 emit_insn (gen_load_tp_hard (target));
6258 /* Always returned in r0. Immediately copy the result into a pseudo,
6259 otherwise other uses of r0 (e.g. setting up function arguments) may
6260 clobber the value. */
6264 emit_insn (gen_load_tp_soft ());
6266 tmp = gen_rtx_REG (SImode, 0);
6267 emit_move_insn (target, tmp);
6273 load_tls_operand (rtx x, rtx reg)
6277 if (reg == NULL_RTX)
6278 reg = gen_reg_rtx (SImode);
6280 tmp = gen_rtx_CONST (SImode, x);
6282 emit_move_insn (reg, tmp);
6288 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6290 rtx insns, label, labelno, sum;
6292 gcc_assert (reloc != TLS_DESCSEQ);
6295 labelno = GEN_INT (pic_labelno++);
6296 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6297 label = gen_rtx_CONST (VOIDmode, label);
6299 sum = gen_rtx_UNSPEC (Pmode,
6300 gen_rtvec (4, x, GEN_INT (reloc), label,
6301 GEN_INT (TARGET_ARM ? 8 : 4)),
6303 reg = load_tls_operand (sum, reg);
6306 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6308 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6310 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6311 LCT_PURE, /* LCT_CONST? */
6312 Pmode, 1, reg, Pmode);
6314 insns = get_insns ();
6321 arm_tls_descseq_addr (rtx x, rtx reg)
6323 rtx labelno = GEN_INT (pic_labelno++);
6324 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6325 rtx sum = gen_rtx_UNSPEC (Pmode,
6326 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6327 gen_rtx_CONST (VOIDmode, label),
6328 GEN_INT (!TARGET_ARM)),
6330 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6332 emit_insn (gen_tlscall (x, labelno));
6334 reg = gen_reg_rtx (SImode);
6336 gcc_assert (REGNO (reg) != 0);
6338 emit_move_insn (reg, reg0);
6344 legitimize_tls_address (rtx x, rtx reg)
6346 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6347 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6351 case TLS_MODEL_GLOBAL_DYNAMIC:
6352 if (TARGET_GNU2_TLS)
6354 reg = arm_tls_descseq_addr (x, reg);
6356 tp = arm_load_tp (NULL_RTX);
6358 dest = gen_rtx_PLUS (Pmode, tp, reg);
6362 /* Original scheme */
6363 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6364 dest = gen_reg_rtx (Pmode);
6365 emit_libcall_block (insns, dest, ret, x);
6369 case TLS_MODEL_LOCAL_DYNAMIC:
6370 if (TARGET_GNU2_TLS)
6372 reg = arm_tls_descseq_addr (x, reg);
6374 tp = arm_load_tp (NULL_RTX);
6376 dest = gen_rtx_PLUS (Pmode, tp, reg);
6380 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6382 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6383 share the LDM result with other LD model accesses. */
6384 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6386 dest = gen_reg_rtx (Pmode);
6387 emit_libcall_block (insns, dest, ret, eqv);
6389 /* Load the addend. */
6390 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6391 GEN_INT (TLS_LDO32)),
6393 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6394 dest = gen_rtx_PLUS (Pmode, dest, addend);
6398 case TLS_MODEL_INITIAL_EXEC:
6399 labelno = GEN_INT (pic_labelno++);
6400 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6401 label = gen_rtx_CONST (VOIDmode, label);
6402 sum = gen_rtx_UNSPEC (Pmode,
6403 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6404 GEN_INT (TARGET_ARM ? 8 : 4)),
6406 reg = load_tls_operand (sum, reg);
6409 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6410 else if (TARGET_THUMB2)
6411 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6414 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6415 emit_move_insn (reg, gen_const_mem (SImode, reg));
6418 tp = arm_load_tp (NULL_RTX);
6420 return gen_rtx_PLUS (Pmode, tp, reg);
6422 case TLS_MODEL_LOCAL_EXEC:
6423 tp = arm_load_tp (NULL_RTX);
6425 reg = gen_rtx_UNSPEC (Pmode,
6426 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6428 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6430 return gen_rtx_PLUS (Pmode, tp, reg);
6437 /* Try machine-dependent ways of modifying an illegitimate address
6438 to be legitimate. If we find one, return the new, valid address. */
6440 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6444 /* TODO: legitimize_address for Thumb2. */
6447 return thumb_legitimize_address (x, orig_x, mode);
6450 if (arm_tls_symbol_p (x))
6451 return legitimize_tls_address (x, NULL_RTX);
6453 if (GET_CODE (x) == PLUS)
6455 rtx xop0 = XEXP (x, 0);
6456 rtx xop1 = XEXP (x, 1);
6458 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6459 xop0 = force_reg (SImode, xop0);
6461 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6462 xop1 = force_reg (SImode, xop1);
6464 if (ARM_BASE_REGISTER_RTX_P (xop0)
6465 && GET_CODE (xop1) == CONST_INT)
6467 HOST_WIDE_INT n, low_n;
6471 /* VFP addressing modes actually allow greater offsets, but for
6472 now we just stick with the lowest common denominator. */
6474 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6486 low_n = ((mode) == TImode ? 0
6487 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6491 base_reg = gen_reg_rtx (SImode);
6492 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6493 emit_move_insn (base_reg, val);
6494 x = plus_constant (base_reg, low_n);
6496 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6497 x = gen_rtx_PLUS (SImode, xop0, xop1);
6500 /* XXX We don't allow MINUS any more -- see comment in
6501 arm_legitimate_address_outer_p (). */
6502 else if (GET_CODE (x) == MINUS)
6504 rtx xop0 = XEXP (x, 0);
6505 rtx xop1 = XEXP (x, 1);
6507 if (CONSTANT_P (xop0))
6508 xop0 = force_reg (SImode, xop0);
6510 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6511 xop1 = force_reg (SImode, xop1);
6513 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6514 x = gen_rtx_MINUS (SImode, xop0, xop1);
6517 /* Make sure to take full advantage of the pre-indexed addressing mode
6518 with absolute addresses which often allows for the base register to
6519 be factorized for multiple adjacent memory references, and it might
6520 even allows for the mini pool to be avoided entirely. */
6521 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6524 HOST_WIDE_INT mask, base, index;
6527 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6528 use a 8-bit index. So let's use a 12-bit index for SImode only and
6529 hope that arm_gen_constant will enable ldrb to use more bits. */
6530 bits = (mode == SImode) ? 12 : 8;
6531 mask = (1 << bits) - 1;
6532 base = INTVAL (x) & ~mask;
6533 index = INTVAL (x) & mask;
6534 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6536 /* It'll most probably be more efficient to generate the base
6537 with more bits set and use a negative index instead. */
6541 base_reg = force_reg (SImode, GEN_INT (base));
6542 x = plus_constant (base_reg, index);
6547 /* We need to find and carefully transform any SYMBOL and LABEL
6548 references; so go back to the original address expression. */
6549 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6551 if (new_x != orig_x)
6559 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6560 to be legitimate. If we find one, return the new, valid address. */
6562 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6564 if (arm_tls_symbol_p (x))
6565 return legitimize_tls_address (x, NULL_RTX);
6567 if (GET_CODE (x) == PLUS
6568 && GET_CODE (XEXP (x, 1)) == CONST_INT
6569 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6570 || INTVAL (XEXP (x, 1)) < 0))
6572 rtx xop0 = XEXP (x, 0);
6573 rtx xop1 = XEXP (x, 1);
6574 HOST_WIDE_INT offset = INTVAL (xop1);
6576 /* Try and fold the offset into a biasing of the base register and
6577 then offsetting that. Don't do this when optimizing for space
6578 since it can cause too many CSEs. */
6579 if (optimize_size && offset >= 0
6580 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6582 HOST_WIDE_INT delta;
6585 delta = offset - (256 - GET_MODE_SIZE (mode));
6586 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6587 delta = 31 * GET_MODE_SIZE (mode);
6589 delta = offset & (~31 * GET_MODE_SIZE (mode));
6591 xop0 = force_operand (plus_constant (xop0, offset - delta),
6593 x = plus_constant (xop0, delta);
6595 else if (offset < 0 && offset > -256)
6596 /* Small negative offsets are best done with a subtract before the
6597 dereference, forcing these into a register normally takes two
6599 x = force_operand (x, NULL_RTX);
6602 /* For the remaining cases, force the constant into a register. */
6603 xop1 = force_reg (SImode, xop1);
6604 x = gen_rtx_PLUS (SImode, xop0, xop1);
6607 else if (GET_CODE (x) == PLUS
6608 && s_register_operand (XEXP (x, 1), SImode)
6609 && !s_register_operand (XEXP (x, 0), SImode))
6611 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6613 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6618 /* We need to find and carefully transform any SYMBOL and LABEL
6619 references; so go back to the original address expression. */
6620 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6622 if (new_x != orig_x)
6630 arm_legitimize_reload_address (rtx *p,
6631 enum machine_mode mode,
6632 int opnum, int type,
6633 int ind_levels ATTRIBUTE_UNUSED)
6635 /* We must recognize output that we have already generated ourselves. */
6636 if (GET_CODE (*p) == PLUS
6637 && GET_CODE (XEXP (*p, 0)) == PLUS
6638 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6639 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6640 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6642 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6643 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6644 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6648 if (GET_CODE (*p) == PLUS
6649 && GET_CODE (XEXP (*p, 0)) == REG
6650 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6651 /* If the base register is equivalent to a constant, let the generic
6652 code handle it. Otherwise we will run into problems if a future
6653 reload pass decides to rematerialize the constant. */
6654 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6655 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6657 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6658 HOST_WIDE_INT low, high;
6660 /* Detect coprocessor load/stores. */
6661 bool coproc_p = ((TARGET_HARD_FLOAT
6662 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6663 && (mode == SFmode || mode == DFmode
6664 || (mode == DImode && TARGET_MAVERICK)))
6665 || (TARGET_REALLY_IWMMXT
6666 && VALID_IWMMXT_REG_MODE (mode))
6668 && (VALID_NEON_DREG_MODE (mode)
6669 || VALID_NEON_QREG_MODE (mode))));
6671 /* For some conditions, bail out when lower two bits are unaligned. */
6672 if ((val & 0x3) != 0
6673 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6675 /* For DI, and DF under soft-float: */
6676 || ((mode == DImode || mode == DFmode)
6677 /* Without ldrd, we use stm/ldm, which does not
6678 fair well with unaligned bits. */
6680 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6681 || TARGET_THUMB2))))
6684 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6685 of which the (reg+high) gets turned into a reload add insn,
6686 we try to decompose the index into high/low values that can often
6687 also lead to better reload CSE.
6689 ldr r0, [r2, #4100] // Offset too large
6690 ldr r1, [r2, #4104] // Offset too large
6692 is best reloaded as:
6698 which post-reload CSE can simplify in most cases to eliminate the
6699 second add instruction:
6704 The idea here is that we want to split out the bits of the constant
6705 as a mask, rather than as subtracting the maximum offset that the
6706 respective type of load/store used can handle.
6708 When encountering negative offsets, we can still utilize it even if
6709 the overall offset is positive; sometimes this may lead to an immediate
6710 that can be constructed with fewer instructions.
6712 ldr r0, [r2, #0x3FFFFC]
6714 This is best reloaded as:
6715 add t1, r2, #0x400000
6718 The trick for spotting this for a load insn with N bits of offset
6719 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6720 negative offset that is going to make bit N and all the bits below
6721 it become zero in the remainder part.
6723 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6724 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6725 used in most cases of ARM load/store instructions. */
6727 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6728 (((VAL) & ((1 << (N)) - 1)) \
6729 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6734 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6736 /* NEON quad-word load/stores are made of two double-word accesses,
6737 so the valid index range is reduced by 8. Treat as 9-bit range if
6739 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6740 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6742 else if (GET_MODE_SIZE (mode) == 8)
6745 low = (TARGET_THUMB2
6746 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6747 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6749 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6750 to access doublewords. The supported load/store offsets are
6751 -8, -4, and 4, which we try to produce here. */
6752 low = ((val & 0xf) ^ 0x8) - 0x8;
6754 else if (GET_MODE_SIZE (mode) < 8)
6756 /* NEON element load/stores do not have an offset. */
6757 if (TARGET_NEON_FP16 && mode == HFmode)
6762 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6763 Try the wider 12-bit range first, and re-try if the result
6765 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6767 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6771 if (mode == HImode || mode == HFmode)
6774 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6777 /* The storehi/movhi_bytes fallbacks can use only
6778 [-4094,+4094] of the full ldrb/strb index range. */
6779 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6780 if (low == 4095 || low == -4095)
6785 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6791 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6792 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6793 - (unsigned HOST_WIDE_INT) 0x80000000);
6794 /* Check for overflow or zero */
6795 if (low == 0 || high == 0 || (high + low != val))
6798 /* Reload the high part into a base reg; leave the low part
6800 *p = gen_rtx_PLUS (GET_MODE (*p),
6801 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6804 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6805 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6806 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6814 thumb_legitimize_reload_address (rtx *x_p,
6815 enum machine_mode mode,
6816 int opnum, int type,
6817 int ind_levels ATTRIBUTE_UNUSED)
6821 if (GET_CODE (x) == PLUS
6822 && GET_MODE_SIZE (mode) < 4
6823 && REG_P (XEXP (x, 0))
6824 && XEXP (x, 0) == stack_pointer_rtx
6825 && GET_CODE (XEXP (x, 1)) == CONST_INT
6826 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6831 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6832 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6836 /* If both registers are hi-regs, then it's better to reload the
6837 entire expression rather than each register individually. That
6838 only requires one reload register rather than two. */
6839 if (GET_CODE (x) == PLUS
6840 && REG_P (XEXP (x, 0))
6841 && REG_P (XEXP (x, 1))
6842 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6843 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6848 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6849 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6856 /* Test for various thread-local symbols. */
6858 /* Return TRUE if X is a thread-local symbol. */
6861 arm_tls_symbol_p (rtx x)
6863 if (! TARGET_HAVE_TLS)
6866 if (GET_CODE (x) != SYMBOL_REF)
6869 return SYMBOL_REF_TLS_MODEL (x) != 0;
6872 /* Helper for arm_tls_referenced_p. */
6875 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6877 if (GET_CODE (*x) == SYMBOL_REF)
6878 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6880 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6881 TLS offsets, not real symbol references. */
6882 if (GET_CODE (*x) == UNSPEC
6883 && XINT (*x, 1) == UNSPEC_TLS)
6889 /* Return TRUE if X contains any TLS symbol references. */
6892 arm_tls_referenced_p (rtx x)
6894 if (! TARGET_HAVE_TLS)
6897 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6900 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6902 On the ARM, allow any integer (invalid ones are removed later by insn
6903 patterns), nice doubles and symbol_refs which refer to the function's
6906 When generating pic allow anything. */
6909 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6911 /* At present, we have no support for Neon structure constants, so forbid
6912 them here. It might be possible to handle simple cases like 0 and -1
6914 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6917 return flag_pic || !label_mentioned_p (x);
6921 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6923 return (GET_CODE (x) == CONST_INT
6924 || GET_CODE (x) == CONST_DOUBLE
6925 || CONSTANT_ADDRESS_P (x)
6930 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6932 return (!arm_cannot_force_const_mem (mode, x)
6934 ? arm_legitimate_constant_p_1 (mode, x)
6935 : thumb_legitimate_constant_p (mode, x)));
6938 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6941 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6945 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6947 split_const (x, &base, &offset);
6948 if (GET_CODE (base) == SYMBOL_REF
6949 && !offset_within_block_p (base, INTVAL (offset)))
6952 return arm_tls_referenced_p (x);
6955 #define REG_OR_SUBREG_REG(X) \
6956 (GET_CODE (X) == REG \
6957 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6959 #define REG_OR_SUBREG_RTX(X) \
6960 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6963 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6965 enum machine_mode mode = GET_MODE (x);
6979 return COSTS_N_INSNS (1);
6982 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6985 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6992 return COSTS_N_INSNS (2) + cycles;
6994 return COSTS_N_INSNS (1) + 16;
6997 return (COSTS_N_INSNS (1)
6998 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6999 + GET_CODE (SET_DEST (x)) == MEM));
7004 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7006 if (thumb_shiftable_const (INTVAL (x)))
7007 return COSTS_N_INSNS (2);
7008 return COSTS_N_INSNS (3);
7010 else if ((outer == PLUS || outer == COMPARE)
7011 && INTVAL (x) < 256 && INTVAL (x) > -256)
7013 else if ((outer == IOR || outer == XOR || outer == AND)
7014 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7015 return COSTS_N_INSNS (1);
7016 else if (outer == AND)
7019 /* This duplicates the tests in the andsi3 expander. */
7020 for (i = 9; i <= 31; i++)
7021 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7022 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7023 return COSTS_N_INSNS (2);
7025 else if (outer == ASHIFT || outer == ASHIFTRT
7026 || outer == LSHIFTRT)
7028 return COSTS_N_INSNS (2);
7034 return COSTS_N_INSNS (3);
7052 /* XXX another guess. */
7053 /* Memory costs quite a lot for the first word, but subsequent words
7054 load at the equivalent of a single insn each. */
7055 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7067 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7068 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7074 return total + COSTS_N_INSNS (1);
7076 /* Assume a two-shift sequence. Increase the cost slightly so
7077 we prefer actual shifts over an extend operation. */
7078 return total + 1 + COSTS_N_INSNS (2);
7086 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7088 enum machine_mode mode = GET_MODE (x);
7089 enum rtx_code subcode;
7091 enum rtx_code code = GET_CODE (x);
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7106 if (TARGET_HARD_FLOAT && mode == SFmode)
7107 *total = COSTS_N_INSNS (2);
7108 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7109 *total = COSTS_N_INSNS (4);
7111 *total = COSTS_N_INSNS (20);
7115 if (GET_CODE (XEXP (x, 1)) == REG)
7116 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7117 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7118 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7124 *total += COSTS_N_INSNS (4);
7129 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7130 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7133 *total += COSTS_N_INSNS (3);
7137 *total += COSTS_N_INSNS (1);
7138 /* Increase the cost of complex shifts because they aren't any faster,
7139 and reduce dual issue opportunities. */
7140 if (arm_tune_cortex_a9
7141 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7149 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7150 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7151 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7153 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7157 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7158 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7160 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7167 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7169 if (TARGET_HARD_FLOAT
7171 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7173 *total = COSTS_N_INSNS (1);
7174 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7175 && arm_const_double_rtx (XEXP (x, 0)))
7177 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7181 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7182 && arm_const_double_rtx (XEXP (x, 1)))
7184 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7190 *total = COSTS_N_INSNS (20);
7194 *total = COSTS_N_INSNS (1);
7195 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7196 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7198 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7202 subcode = GET_CODE (XEXP (x, 1));
7203 if (subcode == ASHIFT || subcode == ASHIFTRT
7204 || subcode == LSHIFTRT
7205 || subcode == ROTATE || subcode == ROTATERT)
7207 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7208 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7212 /* A shift as a part of RSB costs no more than RSB itself. */
7213 if (GET_CODE (XEXP (x, 0)) == MULT
7214 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7216 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7217 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7222 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7224 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7225 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7229 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7230 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7232 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7233 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7234 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7235 *total += COSTS_N_INSNS (1);
7243 if (code == PLUS && arm_arch6 && mode == SImode
7244 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7245 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7247 *total = COSTS_N_INSNS (1);
7248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7250 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7254 /* MLA: All arguments must be registers. We filter out
7255 multiplication by a power of two, so that we fall down into
7257 if (GET_CODE (XEXP (x, 0)) == MULT
7258 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7260 /* The cost comes from the cost of the multiply. */
7264 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7266 if (TARGET_HARD_FLOAT
7268 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7270 *total = COSTS_N_INSNS (1);
7271 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7272 && arm_const_double_rtx (XEXP (x, 1)))
7274 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7281 *total = COSTS_N_INSNS (20);
7285 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7286 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7288 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7289 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7290 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7291 *total += COSTS_N_INSNS (1);
7297 case AND: case XOR: case IOR:
7299 /* Normally the frame registers will be spilt into reg+const during
7300 reload, so it is a bad idea to combine them with other instructions,
7301 since then they might not be moved outside of loops. As a compromise
7302 we allow integration with ops that have a constant as their second
7304 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7305 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7306 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7307 *total = COSTS_N_INSNS (1);
7311 *total += COSTS_N_INSNS (2);
7312 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7313 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7315 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7322 *total += COSTS_N_INSNS (1);
7323 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7324 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7326 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7329 subcode = GET_CODE (XEXP (x, 0));
7330 if (subcode == ASHIFT || subcode == ASHIFTRT
7331 || subcode == LSHIFTRT
7332 || subcode == ROTATE || subcode == ROTATERT)
7334 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7335 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7340 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7342 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7343 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7347 if (subcode == UMIN || subcode == UMAX
7348 || subcode == SMIN || subcode == SMAX)
7350 *total = COSTS_N_INSNS (3);
7357 /* This should have been handled by the CPU specific routines. */
7361 if (arm_arch3m && mode == SImode
7362 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7363 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7364 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7365 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7366 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7367 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7369 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7372 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7376 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7378 if (TARGET_HARD_FLOAT
7380 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7382 *total = COSTS_N_INSNS (1);
7385 *total = COSTS_N_INSNS (2);
7391 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7392 if (mode == SImode && code == NOT)
7394 subcode = GET_CODE (XEXP (x, 0));
7395 if (subcode == ASHIFT || subcode == ASHIFTRT
7396 || subcode == LSHIFTRT
7397 || subcode == ROTATE || subcode == ROTATERT
7399 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7401 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7402 /* Register shifts cost an extra cycle. */
7403 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7404 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7413 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7415 *total = COSTS_N_INSNS (4);
7419 operand = XEXP (x, 0);
7421 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7422 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7423 && GET_CODE (XEXP (operand, 0)) == REG
7424 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7425 *total += COSTS_N_INSNS (1);
7426 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7427 + rtx_cost (XEXP (x, 2), code, 2, speed));
7431 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7433 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7439 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7440 && mode == SImode && XEXP (x, 1) == const0_rtx)
7442 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7448 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7449 && mode == SImode && XEXP (x, 1) == const0_rtx)
7451 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7471 /* SCC insns. In the case where the comparison has already been
7472 performed, then they cost 2 instructions. Otherwise they need
7473 an additional comparison before them. */
7474 *total = COSTS_N_INSNS (2);
7475 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7482 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7488 *total += COSTS_N_INSNS (1);
7489 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7490 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7492 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7496 subcode = GET_CODE (XEXP (x, 0));
7497 if (subcode == ASHIFT || subcode == ASHIFTRT
7498 || subcode == LSHIFTRT
7499 || subcode == ROTATE || subcode == ROTATERT)
7501 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7502 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7507 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7509 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7510 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7520 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7521 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7522 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7523 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7527 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7529 if (TARGET_HARD_FLOAT
7531 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7533 *total = COSTS_N_INSNS (1);
7536 *total = COSTS_N_INSNS (20);
7539 *total = COSTS_N_INSNS (1);
7541 *total += COSTS_N_INSNS (3);
7547 if (GET_MODE_CLASS (mode) == MODE_INT)
7549 rtx op = XEXP (x, 0);
7550 enum machine_mode opmode = GET_MODE (op);
7553 *total += COSTS_N_INSNS (1);
7555 if (opmode != SImode)
7559 /* If !arm_arch4, we use one of the extendhisi2_mem
7560 or movhi_bytes patterns for HImode. For a QImode
7561 sign extension, we first zero-extend from memory
7562 and then perform a shift sequence. */
7563 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7564 *total += COSTS_N_INSNS (2);
7567 *total += COSTS_N_INSNS (1);
7569 /* We don't have the necessary insn, so we need to perform some
7571 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7572 /* An and with constant 255. */
7573 *total += COSTS_N_INSNS (1);
7575 /* A shift sequence. Increase costs slightly to avoid
7576 combining two shifts into an extend operation. */
7577 *total += COSTS_N_INSNS (2) + 1;
7583 switch (GET_MODE (XEXP (x, 0)))
7590 *total = COSTS_N_INSNS (1);
7600 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7604 if (const_ok_for_arm (INTVAL (x))
7605 || const_ok_for_arm (~INTVAL (x)))
7606 *total = COSTS_N_INSNS (1);
7608 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7609 INTVAL (x), NULL_RTX,
7616 *total = COSTS_N_INSNS (3);
7620 *total = COSTS_N_INSNS (1);
7624 *total = COSTS_N_INSNS (1);
7625 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7629 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7630 && (mode == SFmode || !TARGET_VFP_SINGLE))
7631 *total = COSTS_N_INSNS (1);
7633 *total = COSTS_N_INSNS (4);
7640 /* We cost this as high as our memory costs to allow this to
7641 be hoisted from loops. */
7642 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7644 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7649 *total = COSTS_N_INSNS (4);
7654 /* Estimates the size cost of thumb1 instructions.
7655 For now most of the code is copied from thumb1_rtx_costs. We need more
7656 fine grain tuning when we have more related test cases. */
7658 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7660 enum machine_mode mode = GET_MODE (x);
7673 return COSTS_N_INSNS (1);
7676 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7678 /* Thumb1 mul instruction can't operate on const. We must Load it
7679 into a register first. */
7680 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7681 return COSTS_N_INSNS (1) + const_size;
7683 return COSTS_N_INSNS (1);
7686 return (COSTS_N_INSNS (1)
7687 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7688 + GET_CODE (SET_DEST (x)) == MEM));
7693 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7694 return COSTS_N_INSNS (1);
7695 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7696 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7697 return COSTS_N_INSNS (2);
7698 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7699 if (thumb_shiftable_const (INTVAL (x)))
7700 return COSTS_N_INSNS (2);
7701 return COSTS_N_INSNS (3);
7703 else if ((outer == PLUS || outer == COMPARE)
7704 && INTVAL (x) < 256 && INTVAL (x) > -256)
7706 else if ((outer == IOR || outer == XOR || outer == AND)
7707 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7708 return COSTS_N_INSNS (1);
7709 else if (outer == AND)
7712 /* This duplicates the tests in the andsi3 expander. */
7713 for (i = 9; i <= 31; i++)
7714 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7715 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7716 return COSTS_N_INSNS (2);
7718 else if (outer == ASHIFT || outer == ASHIFTRT
7719 || outer == LSHIFTRT)
7721 return COSTS_N_INSNS (2);
7727 return COSTS_N_INSNS (3);
7745 /* XXX another guess. */
7746 /* Memory costs quite a lot for the first word, but subsequent words
7747 load at the equivalent of a single insn each. */
7748 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7749 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7754 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7759 /* XXX still guessing. */
7760 switch (GET_MODE (XEXP (x, 0)))
7763 return (1 + (mode == DImode ? 4 : 0)
7764 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7767 return (4 + (mode == DImode ? 4 : 0)
7768 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7771 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7782 /* RTX costs when optimizing for size. */
7784 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7787 enum machine_mode mode = GET_MODE (x);
7790 *total = thumb1_size_rtx_costs (x, code, outer_code);
7794 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7798 /* A memory access costs 1 insn if the mode is small, or the address is
7799 a single register, otherwise it costs one insn per word. */
7800 if (REG_P (XEXP (x, 0)))
7801 *total = COSTS_N_INSNS (1);
7803 && GET_CODE (XEXP (x, 0)) == PLUS
7804 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7805 /* This will be split into two instructions.
7806 See arm.md:calculate_pic_address. */
7807 *total = COSTS_N_INSNS (2);
7809 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7816 /* Needs a libcall, so it costs about this. */
7817 *total = COSTS_N_INSNS (2);
7821 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7823 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7831 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7833 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7836 else if (mode == SImode)
7838 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7839 /* Slightly disparage register shifts, but not by much. */
7840 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7841 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7845 /* Needs a libcall. */
7846 *total = COSTS_N_INSNS (2);
7850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7851 && (mode == SFmode || !TARGET_VFP_SINGLE))
7853 *total = COSTS_N_INSNS (1);
7859 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7860 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7862 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7863 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7864 || subcode1 == ROTATE || subcode1 == ROTATERT
7865 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7866 || subcode1 == ASHIFTRT)
7868 /* It's just the cost of the two operands. */
7873 *total = COSTS_N_INSNS (1);
7877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7881 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7882 && (mode == SFmode || !TARGET_VFP_SINGLE))
7884 *total = COSTS_N_INSNS (1);
7888 /* A shift as a part of ADD costs nothing. */
7889 if (GET_CODE (XEXP (x, 0)) == MULT
7890 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7892 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7893 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7894 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7899 case AND: case XOR: case IOR:
7902 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7904 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7905 || subcode == LSHIFTRT || subcode == ASHIFTRT
7906 || (code == AND && subcode == NOT))
7908 /* It's just the cost of the two operands. */
7914 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7918 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7922 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7923 && (mode == SFmode || !TARGET_VFP_SINGLE))
7925 *total = COSTS_N_INSNS (1);
7931 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7940 if (cc_register (XEXP (x, 0), VOIDmode))
7943 *total = COSTS_N_INSNS (1);
7947 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7948 && (mode == SFmode || !TARGET_VFP_SINGLE))
7949 *total = COSTS_N_INSNS (1);
7951 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7956 return arm_rtx_costs_1 (x, outer_code, total, 0);
7959 if (const_ok_for_arm (INTVAL (x)))
7960 /* A multiplication by a constant requires another instruction
7961 to load the constant to a register. */
7962 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7964 else if (const_ok_for_arm (~INTVAL (x)))
7965 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7966 else if (const_ok_for_arm (-INTVAL (x)))
7968 if (outer_code == COMPARE || outer_code == PLUS
7969 || outer_code == MINUS)
7972 *total = COSTS_N_INSNS (1);
7975 *total = COSTS_N_INSNS (2);
7981 *total = COSTS_N_INSNS (2);
7985 *total = COSTS_N_INSNS (4);
7990 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7991 cost of these slightly. */
7992 *total = COSTS_N_INSNS (1) + 1;
7999 if (mode != VOIDmode)
8000 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8002 *total = COSTS_N_INSNS (4); /* How knows? */
8007 /* RTX costs when optimizing for size. */
8009 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8010 int *total, bool speed)
8013 return arm_size_rtx_costs (x, (enum rtx_code) code,
8014 (enum rtx_code) outer_code, total);
8016 return current_tune->rtx_costs (x, (enum rtx_code) code,
8017 (enum rtx_code) outer_code,
8021 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8022 supported on any "slowmul" cores, so it can be ignored. */
8025 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8026 int *total, bool speed)
8028 enum machine_mode mode = GET_MODE (x);
8032 *total = thumb1_rtx_costs (x, code, outer_code);
8039 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8042 *total = COSTS_N_INSNS (20);
8046 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8048 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8049 & (unsigned HOST_WIDE_INT) 0xffffffff);
8050 int cost, const_ok = const_ok_for_arm (i);
8051 int j, booth_unit_size;
8053 /* Tune as appropriate. */
8054 cost = const_ok ? 4 : 8;
8055 booth_unit_size = 2;
8056 for (j = 0; i && j < 32; j += booth_unit_size)
8058 i >>= booth_unit_size;
8062 *total = COSTS_N_INSNS (cost);
8063 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8067 *total = COSTS_N_INSNS (20);
8071 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8076 /* RTX cost for cores with a fast multiply unit (M variants). */
8079 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8080 int *total, bool speed)
8082 enum machine_mode mode = GET_MODE (x);
8086 *total = thumb1_rtx_costs (x, code, outer_code);
8090 /* ??? should thumb2 use different costs? */
8094 /* There is no point basing this on the tuning, since it is always the
8095 fast variant if it exists at all. */
8097 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8098 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8099 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8101 *total = COSTS_N_INSNS(2);
8108 *total = COSTS_N_INSNS (5);
8112 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8114 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8115 & (unsigned HOST_WIDE_INT) 0xffffffff);
8116 int cost, const_ok = const_ok_for_arm (i);
8117 int j, booth_unit_size;
8119 /* Tune as appropriate. */
8120 cost = const_ok ? 4 : 8;
8121 booth_unit_size = 8;
8122 for (j = 0; i && j < 32; j += booth_unit_size)
8124 i >>= booth_unit_size;
8128 *total = COSTS_N_INSNS(cost);
8134 *total = COSTS_N_INSNS (4);
8138 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8140 if (TARGET_HARD_FLOAT
8142 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8144 *total = COSTS_N_INSNS (1);
8149 /* Requires a lib call */
8150 *total = COSTS_N_INSNS (20);
8154 return arm_rtx_costs_1 (x, outer_code, total, speed);
8159 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8160 so it can be ignored. */
8163 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8164 int *total, bool speed)
8166 enum machine_mode mode = GET_MODE (x);
8170 *total = thumb1_rtx_costs (x, code, outer_code);
8177 if (GET_CODE (XEXP (x, 0)) != MULT)
8178 return arm_rtx_costs_1 (x, outer_code, total, speed);
8180 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8181 will stall until the multiplication is complete. */
8182 *total = COSTS_N_INSNS (3);
8186 /* There is no point basing this on the tuning, since it is always the
8187 fast variant if it exists at all. */
8189 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8190 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8191 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8193 *total = COSTS_N_INSNS (2);
8200 *total = COSTS_N_INSNS (5);
8204 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8206 /* If operand 1 is a constant we can more accurately
8207 calculate the cost of the multiply. The multiplier can
8208 retire 15 bits on the first cycle and a further 12 on the
8209 second. We do, of course, have to load the constant into
8210 a register first. */
8211 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8212 /* There's a general overhead of one cycle. */
8214 unsigned HOST_WIDE_INT masked_const;
8219 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8221 masked_const = i & 0xffff8000;
8222 if (masked_const != 0)
8225 masked_const = i & 0xf8000000;
8226 if (masked_const != 0)
8229 *total = COSTS_N_INSNS (cost);
8235 *total = COSTS_N_INSNS (3);
8239 /* Requires a lib call */
8240 *total = COSTS_N_INSNS (20);
8244 return arm_rtx_costs_1 (x, outer_code, total, speed);
8249 /* RTX costs for 9e (and later) cores. */
8252 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8253 int *total, bool speed)
8255 enum machine_mode mode = GET_MODE (x);
8262 *total = COSTS_N_INSNS (3);
8266 *total = thumb1_rtx_costs (x, code, outer_code);
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8277 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8278 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8281 *total = COSTS_N_INSNS (2);
8288 *total = COSTS_N_INSNS (5);
8294 *total = COSTS_N_INSNS (2);
8298 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8300 if (TARGET_HARD_FLOAT
8302 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8304 *total = COSTS_N_INSNS (1);
8309 *total = COSTS_N_INSNS (20);
8313 return arm_rtx_costs_1 (x, outer_code, total, speed);
8316 /* All address computations that can be done are free, but rtx cost returns
8317 the same for practically all of them. So we weight the different types
8318 of address here in the order (most pref first):
8319 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8321 arm_arm_address_cost (rtx x)
8323 enum rtx_code c = GET_CODE (x);
8325 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8327 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8332 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8335 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8345 arm_thumb_address_cost (rtx x)
8347 enum rtx_code c = GET_CODE (x);
8352 && GET_CODE (XEXP (x, 0)) == REG
8353 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8360 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8362 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8365 /* Adjust cost hook for XScale. */
8367 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8369 /* Some true dependencies can have a higher cost depending
8370 on precisely how certain input operands are used. */
8371 if (REG_NOTE_KIND(link) == 0
8372 && recog_memoized (insn) >= 0
8373 && recog_memoized (dep) >= 0)
8375 int shift_opnum = get_attr_shift (insn);
8376 enum attr_type attr_type = get_attr_type (dep);
8378 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8379 operand for INSN. If we have a shifted input operand and the
8380 instruction we depend on is another ALU instruction, then we may
8381 have to account for an additional stall. */
8382 if (shift_opnum != 0
8383 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8385 rtx shifted_operand;
8388 /* Get the shifted operand. */
8389 extract_insn (insn);
8390 shifted_operand = recog_data.operand[shift_opnum];
8392 /* Iterate over all the operands in DEP. If we write an operand
8393 that overlaps with SHIFTED_OPERAND, then we have increase the
8394 cost of this dependency. */
8396 preprocess_constraints ();
8397 for (opno = 0; opno < recog_data.n_operands; opno++)
8399 /* We can ignore strict inputs. */
8400 if (recog_data.operand_type[opno] == OP_IN)
8403 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8415 /* Adjust cost hook for Cortex A9. */
8417 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8419 switch (REG_NOTE_KIND (link))
8426 case REG_DEP_OUTPUT:
8427 if (recog_memoized (insn) >= 0
8428 && recog_memoized (dep) >= 0)
8430 if (GET_CODE (PATTERN (insn)) == SET)
8433 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8435 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8437 enum attr_type attr_type_insn = get_attr_type (insn);
8438 enum attr_type attr_type_dep = get_attr_type (dep);
8440 /* By default all dependencies of the form
8443 have an extra latency of 1 cycle because
8444 of the input and output dependency in this
8445 case. However this gets modeled as an true
8446 dependency and hence all these checks. */
8447 if (REG_P (SET_DEST (PATTERN (insn)))
8448 && REG_P (SET_DEST (PATTERN (dep)))
8449 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8450 SET_DEST (PATTERN (dep))))
8452 /* FMACS is a special case where the dependant
8453 instruction can be issued 3 cycles before
8454 the normal latency in case of an output
8456 if ((attr_type_insn == TYPE_FMACS
8457 || attr_type_insn == TYPE_FMACD)
8458 && (attr_type_dep == TYPE_FMACS
8459 || attr_type_dep == TYPE_FMACD))
8461 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8462 *cost = insn_default_latency (dep) - 3;
8464 *cost = insn_default_latency (dep);
8469 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8470 *cost = insn_default_latency (dep) + 1;
8472 *cost = insn_default_latency (dep);
8488 /* Adjust cost hook for FA726TE. */
8490 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8492 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8493 have penalty of 3. */
8494 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8495 && recog_memoized (insn) >= 0
8496 && recog_memoized (dep) >= 0
8497 && get_attr_conds (dep) == CONDS_SET)
8499 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8500 if (get_attr_conds (insn) == CONDS_USE
8501 && get_attr_type (insn) != TYPE_BRANCH)
8507 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8508 || get_attr_conds (insn) == CONDS_USE)
8518 /* Implement TARGET_REGISTER_MOVE_COST.
8520 Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8521 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8522 it is typically more expensive than a single memory access. We set
8523 the cost to less than two memory accesses so that floating
8524 point to integer conversion does not go through memory. */
8527 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8528 reg_class_t from, reg_class_t to)
8532 if ((from == FPA_REGS && to != FPA_REGS)
8533 || (from != FPA_REGS && to == FPA_REGS))
8535 else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8536 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8538 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8539 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8541 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8543 else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8544 || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8551 if (from == HI_REGS || to == HI_REGS)
8558 /* Implement TARGET_MEMORY_MOVE_COST. */
8561 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8562 bool in ATTRIBUTE_UNUSED)
8568 if (GET_MODE_SIZE (mode) < 4)
8571 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8575 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8576 It corrects the value of COST based on the relationship between
8577 INSN and DEP through the dependence LINK. It returns the new
8578 value. There is a per-core adjust_cost hook to adjust scheduler costs
8579 and the per-core hook can choose to completely override the generic
8580 adjust_cost function. Only put bits of code into arm_adjust_cost that
8581 are common across all cores. */
8583 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8587 /* When generating Thumb-1 code, we want to place flag-setting operations
8588 close to a conditional branch which depends on them, so that we can
8589 omit the comparison. */
8591 && REG_NOTE_KIND (link) == 0
8592 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8593 && recog_memoized (dep) >= 0
8594 && get_attr_conds (dep) == CONDS_SET)
8597 if (current_tune->sched_adjust_cost != NULL)
8599 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8603 /* XXX This is not strictly true for the FPA. */
8604 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8605 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8608 /* Call insns don't incur a stall, even if they follow a load. */
8609 if (REG_NOTE_KIND (link) == 0
8610 && GET_CODE (insn) == CALL_INSN)
8613 if ((i_pat = single_set (insn)) != NULL
8614 && GET_CODE (SET_SRC (i_pat)) == MEM
8615 && (d_pat = single_set (dep)) != NULL
8616 && GET_CODE (SET_DEST (d_pat)) == MEM)
8618 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8619 /* This is a load after a store, there is no conflict if the load reads
8620 from a cached area. Assume that loads from the stack, and from the
8621 constant pool are cached, and that others will miss. This is a
8624 if ((GET_CODE (src_mem) == SYMBOL_REF
8625 && CONSTANT_POOL_ADDRESS_P (src_mem))
8626 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8627 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8628 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8636 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8639 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8641 return (optimize > 0) ? 2 : 0;
8645 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8647 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8650 static int fp_consts_inited = 0;
8652 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8653 static const char * const strings_fp[8] =
8656 "4", "5", "0.5", "10"
8659 static REAL_VALUE_TYPE values_fp[8];
8662 init_fp_table (void)
8668 fp_consts_inited = 1;
8670 fp_consts_inited = 8;
8672 for (i = 0; i < fp_consts_inited; i++)
8674 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8679 /* Return TRUE if rtx X is a valid immediate FP constant. */
8681 arm_const_double_rtx (rtx x)
8686 if (!fp_consts_inited)
8689 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8690 if (REAL_VALUE_MINUS_ZERO (r))
8693 for (i = 0; i < fp_consts_inited; i++)
8694 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8700 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8702 neg_const_double_rtx_ok_for_fpa (rtx x)
8707 if (!fp_consts_inited)
8710 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8711 r = real_value_negate (&r);
8712 if (REAL_VALUE_MINUS_ZERO (r))
8715 for (i = 0; i < 8; i++)
8716 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8723 /* VFPv3 has a fairly wide range of representable immediates, formed from
8724 "quarter-precision" floating-point values. These can be evaluated using this
8725 formula (with ^ for exponentiation):
8729 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8730 16 <= n <= 31 and 0 <= r <= 7.
8732 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8734 - A (most-significant) is the sign bit.
8735 - BCD are the exponent (encoded as r XOR 3).
8736 - EFGH are the mantissa (encoded as n - 16).
8739 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8740 fconst[sd] instruction, or -1 if X isn't suitable. */
8742 vfp3_const_double_index (rtx x)
8744 REAL_VALUE_TYPE r, m;
8746 unsigned HOST_WIDE_INT mantissa, mant_hi;
8747 unsigned HOST_WIDE_INT mask;
8748 HOST_WIDE_INT m1, m2;
8749 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8751 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8754 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8756 /* We can't represent these things, so detect them first. */
8757 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8760 /* Extract sign, exponent and mantissa. */
8761 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8762 r = real_value_abs (&r);
8763 exponent = REAL_EXP (&r);
8764 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8765 highest (sign) bit, with a fixed binary point at bit point_pos.
8766 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8767 bits for the mantissa, this may fail (low bits would be lost). */
8768 real_ldexp (&m, &r, point_pos - exponent);
8769 REAL_VALUE_TO_INT (&m1, &m2, m);
8773 /* If there are bits set in the low part of the mantissa, we can't
8774 represent this value. */
8778 /* Now make it so that mantissa contains the most-significant bits, and move
8779 the point_pos to indicate that the least-significant bits have been
8781 point_pos -= HOST_BITS_PER_WIDE_INT;
8784 /* We can permit four significant bits of mantissa only, plus a high bit
8785 which is always 1. */
8786 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8787 if ((mantissa & mask) != 0)
8790 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8791 mantissa >>= point_pos - 5;
8793 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8794 floating-point immediate zero with Neon using an integer-zero load, but
8795 that case is handled elsewhere.) */
8799 gcc_assert (mantissa >= 16 && mantissa <= 31);
8801 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8802 normalized significands are in the range [1, 2). (Our mantissa is shifted
8803 left 4 places at this point relative to normalized IEEE754 values). GCC
8804 internally uses [0.5, 1) (see real.c), so the exponent returned from
8805 REAL_EXP must be altered. */
8806 exponent = 5 - exponent;
8808 if (exponent < 0 || exponent > 7)
8811 /* Sign, mantissa and exponent are now in the correct form to plug into the
8812 formula described in the comment above. */
8813 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8816 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8818 vfp3_const_double_rtx (rtx x)
8823 return vfp3_const_double_index (x) != -1;
8826 /* Recognize immediates which can be used in various Neon instructions. Legal
8827 immediates are described by the following table (for VMVN variants, the
8828 bitwise inverse of the constant shown is recognized. In either case, VMOV
8829 is output and the correct instruction to use for a given constant is chosen
8830 by the assembler). The constant shown is replicated across all elements of
8831 the destination vector.
8833 insn elems variant constant (binary)
8834 ---- ----- ------- -----------------
8835 vmov i32 0 00000000 00000000 00000000 abcdefgh
8836 vmov i32 1 00000000 00000000 abcdefgh 00000000
8837 vmov i32 2 00000000 abcdefgh 00000000 00000000
8838 vmov i32 3 abcdefgh 00000000 00000000 00000000
8839 vmov i16 4 00000000 abcdefgh
8840 vmov i16 5 abcdefgh 00000000
8841 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8842 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8843 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8844 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8845 vmvn i16 10 00000000 abcdefgh
8846 vmvn i16 11 abcdefgh 00000000
8847 vmov i32 12 00000000 00000000 abcdefgh 11111111
8848 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8849 vmov i32 14 00000000 abcdefgh 11111111 11111111
8850 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8852 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8853 eeeeeeee ffffffff gggggggg hhhhhhhh
8854 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8856 For case 18, B = !b. Representable values are exactly those accepted by
8857 vfp3_const_double_index, but are output as floating-point numbers rather
8860 Variants 0-5 (inclusive) may also be used as immediates for the second
8861 operand of VORR/VBIC instructions.
8863 The INVERSE argument causes the bitwise inverse of the given operand to be
8864 recognized instead (used for recognizing legal immediates for the VAND/VORN
8865 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8866 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8867 output, rather than the real insns vbic/vorr).
8869 INVERSE makes no difference to the recognition of float vectors.
8871 The return value is the variant of immediate as shown in the above table, or
8872 -1 if the given value doesn't match any of the listed patterns.
8875 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8876 rtx *modconst, int *elementwidth)
8878 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8880 for (i = 0; i < idx; i += (STRIDE)) \
8885 immtype = (CLASS); \
8886 elsize = (ELSIZE); \
8890 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8891 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8892 unsigned char bytes[16];
8893 int immtype = -1, matches;
8894 unsigned int invmask = inverse ? 0xff : 0;
8896 /* Vectors of float constants. */
8897 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8899 rtx el0 = CONST_VECTOR_ELT (op, 0);
8902 if (!vfp3_const_double_rtx (el0))
8905 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8907 for (i = 1; i < n_elts; i++)
8909 rtx elt = CONST_VECTOR_ELT (op, i);
8912 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8914 if (!REAL_VALUES_EQUAL (r0, re))
8919 *modconst = CONST_VECTOR_ELT (op, 0);
8927 /* Splat vector constant out into a byte vector. */
8928 for (i = 0; i < n_elts; i++)
8930 rtx el = CONST_VECTOR_ELT (op, i);
8931 unsigned HOST_WIDE_INT elpart;
8932 unsigned int part, parts;
8934 if (GET_CODE (el) == CONST_INT)
8936 elpart = INTVAL (el);
8939 else if (GET_CODE (el) == CONST_DOUBLE)
8941 elpart = CONST_DOUBLE_LOW (el);
8947 for (part = 0; part < parts; part++)
8950 for (byte = 0; byte < innersize; byte++)
8952 bytes[idx++] = (elpart & 0xff) ^ invmask;
8953 elpart >>= BITS_PER_UNIT;
8955 if (GET_CODE (el) == CONST_DOUBLE)
8956 elpart = CONST_DOUBLE_HIGH (el);
8961 gcc_assert (idx == GET_MODE_SIZE (mode));
8965 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8966 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8968 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8969 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8971 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8972 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8974 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8975 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8977 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8979 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8981 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8982 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8984 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8985 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8987 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8988 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8990 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8991 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8993 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8995 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8997 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8998 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9000 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9001 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9003 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9004 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9006 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9007 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9009 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9011 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9012 && bytes[i] == bytes[(i + 8) % idx]);
9020 *elementwidth = elsize;
9024 unsigned HOST_WIDE_INT imm = 0;
9026 /* Un-invert bytes of recognized vector, if necessary. */
9028 for (i = 0; i < idx; i++)
9029 bytes[i] ^= invmask;
9033 /* FIXME: Broken on 32-bit H_W_I hosts. */
9034 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9036 for (i = 0; i < 8; i++)
9037 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9038 << (i * BITS_PER_UNIT);
9040 *modconst = GEN_INT (imm);
9044 unsigned HOST_WIDE_INT imm = 0;
9046 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9047 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9049 *modconst = GEN_INT (imm);
9057 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9058 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9059 float elements), and a modified constant (whatever should be output for a
9060 VMOV) in *MODCONST. */
9063 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9064 rtx *modconst, int *elementwidth)
9068 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9074 *modconst = tmpconst;
9077 *elementwidth = tmpwidth;
9082 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9083 the immediate is valid, write a constant suitable for using as an operand
9084 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9085 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9088 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9089 rtx *modconst, int *elementwidth)
9093 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9095 if (retval < 0 || retval > 5)
9099 *modconst = tmpconst;
9102 *elementwidth = tmpwidth;
9107 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9108 the immediate is valid, write a constant suitable for using as an operand
9109 to VSHR/VSHL to *MODCONST and the corresponding element width to
9110 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9111 because they have different limitations. */
9114 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9115 rtx *modconst, int *elementwidth,
9118 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9119 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9120 unsigned HOST_WIDE_INT last_elt = 0;
9121 unsigned HOST_WIDE_INT maxshift;
9123 /* Split vector constant out into a byte vector. */
9124 for (i = 0; i < n_elts; i++)
9126 rtx el = CONST_VECTOR_ELT (op, i);
9127 unsigned HOST_WIDE_INT elpart;
9129 if (GET_CODE (el) == CONST_INT)
9130 elpart = INTVAL (el);
9131 else if (GET_CODE (el) == CONST_DOUBLE)
9136 if (i != 0 && elpart != last_elt)
9142 /* Shift less than element size. */
9143 maxshift = innersize * 8;
9147 /* Left shift immediate value can be from 0 to <size>-1. */
9148 if (last_elt >= maxshift)
9153 /* Right shift immediate value can be from 1 to <size>. */
9154 if (last_elt == 0 || last_elt > maxshift)
9159 *elementwidth = innersize * 8;
9162 *modconst = CONST_VECTOR_ELT (op, 0);
9167 /* Return a string suitable for output of Neon immediate logic operation
9171 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9172 int inverse, int quad)
9174 int width, is_valid;
9175 static char templ[40];
9177 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9179 gcc_assert (is_valid != 0);
9182 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9184 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9189 /* Return a string suitable for output of Neon immediate shift operation
9190 (VSHR or VSHL) MNEM. */
9193 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9194 enum machine_mode mode, int quad,
9197 int width, is_valid;
9198 static char templ[40];
9200 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9201 gcc_assert (is_valid != 0);
9204 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9206 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9211 /* Output a sequence of pairwise operations to implement a reduction.
9212 NOTE: We do "too much work" here, because pairwise operations work on two
9213 registers-worth of operands in one go. Unfortunately we can't exploit those
9214 extra calculations to do the full operation in fewer steps, I don't think.
9215 Although all vector elements of the result but the first are ignored, we
9216 actually calculate the same result in each of the elements. An alternative
9217 such as initially loading a vector with zero to use as each of the second
9218 operands would use up an additional register and take an extra instruction,
9219 for no particular gain. */
9222 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9223 rtx (*reduc) (rtx, rtx, rtx))
9225 enum machine_mode inner = GET_MODE_INNER (mode);
9226 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9229 for (i = parts / 2; i >= 1; i /= 2)
9231 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9232 emit_insn (reduc (dest, tmpsum, tmpsum));
9237 /* If VALS is a vector constant that can be loaded into a register
9238 using VDUP, generate instructions to do so and return an RTX to
9239 assign to the register. Otherwise return NULL_RTX. */
9242 neon_vdup_constant (rtx vals)
9244 enum machine_mode mode = GET_MODE (vals);
9245 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9246 int n_elts = GET_MODE_NUNITS (mode);
9247 bool all_same = true;
9251 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9254 for (i = 0; i < n_elts; ++i)
9256 x = XVECEXP (vals, 0, i);
9257 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9262 /* The elements are not all the same. We could handle repeating
9263 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9264 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9268 /* We can load this constant by using VDUP and a constant in a
9269 single ARM register. This will be cheaper than a vector
9272 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9273 return gen_rtx_VEC_DUPLICATE (mode, x);
9276 /* Generate code to load VALS, which is a PARALLEL containing only
9277 constants (for vec_init) or CONST_VECTOR, efficiently into a
9278 register. Returns an RTX to copy into the register, or NULL_RTX
9279 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9282 neon_make_constant (rtx vals)
9284 enum machine_mode mode = GET_MODE (vals);
9286 rtx const_vec = NULL_RTX;
9287 int n_elts = GET_MODE_NUNITS (mode);
9291 if (GET_CODE (vals) == CONST_VECTOR)
9293 else if (GET_CODE (vals) == PARALLEL)
9295 /* A CONST_VECTOR must contain only CONST_INTs and
9296 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9297 Only store valid constants in a CONST_VECTOR. */
9298 for (i = 0; i < n_elts; ++i)
9300 rtx x = XVECEXP (vals, 0, i);
9301 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9304 if (n_const == n_elts)
9305 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9310 if (const_vec != NULL
9311 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9312 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9314 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9315 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9316 pipeline cycle; creating the constant takes one or two ARM
9319 else if (const_vec != NULL_RTX)
9320 /* Load from constant pool. On Cortex-A8 this takes two cycles
9321 (for either double or quad vectors). We can not take advantage
9322 of single-cycle VLD1 because we need a PC-relative addressing
9326 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9327 We can not construct an initializer. */
9331 /* Initialize vector TARGET to VALS. */
9334 neon_expand_vector_init (rtx target, rtx vals)
9336 enum machine_mode mode = GET_MODE (target);
9337 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9338 int n_elts = GET_MODE_NUNITS (mode);
9339 int n_var = 0, one_var = -1;
9340 bool all_same = true;
9344 for (i = 0; i < n_elts; ++i)
9346 x = XVECEXP (vals, 0, i);
9347 if (!CONSTANT_P (x))
9348 ++n_var, one_var = i;
9350 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9356 rtx constant = neon_make_constant (vals);
9357 if (constant != NULL_RTX)
9359 emit_move_insn (target, constant);
9364 /* Splat a single non-constant element if we can. */
9365 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9367 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9368 emit_insn (gen_rtx_SET (VOIDmode, target,
9369 gen_rtx_VEC_DUPLICATE (mode, x)));
9373 /* One field is non-constant. Load constant then overwrite varying
9374 field. This is more efficient than using the stack. */
9377 rtx copy = copy_rtx (vals);
9378 rtx index = GEN_INT (one_var);
9380 /* Load constant part of vector, substitute neighboring value for
9382 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9383 neon_expand_vector_init (target, copy);
9385 /* Insert variable. */
9386 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9390 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9393 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9396 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9399 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9402 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9405 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9408 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9411 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9414 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9422 /* Construct the vector in memory one field at a time
9423 and load the whole vector. */
9424 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9425 for (i = 0; i < n_elts; i++)
9426 emit_move_insn (adjust_address_nv (mem, inner_mode,
9427 i * GET_MODE_SIZE (inner_mode)),
9428 XVECEXP (vals, 0, i));
9429 emit_move_insn (target, mem);
9432 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9433 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9434 reported source locations are bogus. */
9437 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9442 gcc_assert (GET_CODE (operand) == CONST_INT);
9444 lane = INTVAL (operand);
9446 if (lane < low || lane >= high)
9450 /* Bounds-check lanes. */
9453 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9455 bounds_check (operand, low, high, "lane out of range");
9458 /* Bounds-check constants. */
9461 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9463 bounds_check (operand, low, high, "constant out of range");
9467 neon_element_bits (enum machine_mode mode)
9470 return GET_MODE_BITSIZE (mode);
9472 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9476 /* Predicates for `match_operand' and `match_operator'. */
9478 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9480 cirrus_memory_offset (rtx op)
9482 /* Reject eliminable registers. */
9483 if (! (reload_in_progress || reload_completed)
9484 && ( reg_mentioned_p (frame_pointer_rtx, op)
9485 || reg_mentioned_p (arg_pointer_rtx, op)
9486 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9487 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9488 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9489 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9492 if (GET_CODE (op) == MEM)
9498 /* Match: (mem (reg)). */
9499 if (GET_CODE (ind) == REG)
9505 if (GET_CODE (ind) == PLUS
9506 && GET_CODE (XEXP (ind, 0)) == REG
9507 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9508 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9515 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9516 WB is true if full writeback address modes are allowed and is false
9517 if limited writeback address modes (POST_INC and PRE_DEC) are
9521 arm_coproc_mem_operand (rtx op, bool wb)
9525 /* Reject eliminable registers. */
9526 if (! (reload_in_progress || reload_completed)
9527 && ( reg_mentioned_p (frame_pointer_rtx, op)
9528 || reg_mentioned_p (arg_pointer_rtx, op)
9529 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9530 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9531 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9532 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9535 /* Constants are converted into offsets from labels. */
9536 if (GET_CODE (op) != MEM)
9541 if (reload_completed
9542 && (GET_CODE (ind) == LABEL_REF
9543 || (GET_CODE (ind) == CONST
9544 && GET_CODE (XEXP (ind, 0)) == PLUS
9545 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9546 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9549 /* Match: (mem (reg)). */
9550 if (GET_CODE (ind) == REG)
9551 return arm_address_register_rtx_p (ind, 0);
9553 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9554 acceptable in any case (subject to verification by
9555 arm_address_register_rtx_p). We need WB to be true to accept
9556 PRE_INC and POST_DEC. */
9557 if (GET_CODE (ind) == POST_INC
9558 || GET_CODE (ind) == PRE_DEC
9560 && (GET_CODE (ind) == PRE_INC
9561 || GET_CODE (ind) == POST_DEC)))
9562 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9565 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9566 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9567 && GET_CODE (XEXP (ind, 1)) == PLUS
9568 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9569 ind = XEXP (ind, 1);
9574 if (GET_CODE (ind) == PLUS
9575 && GET_CODE (XEXP (ind, 0)) == REG
9576 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9577 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9578 && INTVAL (XEXP (ind, 1)) > -1024
9579 && INTVAL (XEXP (ind, 1)) < 1024
9580 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9586 /* Return TRUE if OP is a memory operand which we can load or store a vector
9587 to/from. TYPE is one of the following values:
9588 0 - Vector load/stor (vldr)
9589 1 - Core registers (ldm)
9590 2 - Element/structure loads (vld1)
9593 neon_vector_mem_operand (rtx op, int type)
9597 /* Reject eliminable registers. */
9598 if (! (reload_in_progress || reload_completed)
9599 && ( reg_mentioned_p (frame_pointer_rtx, op)
9600 || reg_mentioned_p (arg_pointer_rtx, op)
9601 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9602 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9603 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9604 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9607 /* Constants are converted into offsets from labels. */
9608 if (GET_CODE (op) != MEM)
9613 if (reload_completed
9614 && (GET_CODE (ind) == LABEL_REF
9615 || (GET_CODE (ind) == CONST
9616 && GET_CODE (XEXP (ind, 0)) == PLUS
9617 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9618 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9621 /* Match: (mem (reg)). */
9622 if (GET_CODE (ind) == REG)
9623 return arm_address_register_rtx_p (ind, 0);
9625 /* Allow post-increment with Neon registers. */
9626 if ((type != 1 && GET_CODE (ind) == POST_INC)
9627 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9628 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9630 /* FIXME: vld1 allows register post-modify. */
9636 && GET_CODE (ind) == PLUS
9637 && GET_CODE (XEXP (ind, 0)) == REG
9638 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9639 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9640 && INTVAL (XEXP (ind, 1)) > -1024
9641 && INTVAL (XEXP (ind, 1)) < 1016
9642 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9648 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9651 neon_struct_mem_operand (rtx op)
9655 /* Reject eliminable registers. */
9656 if (! (reload_in_progress || reload_completed)
9657 && ( reg_mentioned_p (frame_pointer_rtx, op)
9658 || reg_mentioned_p (arg_pointer_rtx, op)
9659 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9660 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9661 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9662 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9665 /* Constants are converted into offsets from labels. */
9666 if (GET_CODE (op) != MEM)
9671 if (reload_completed
9672 && (GET_CODE (ind) == LABEL_REF
9673 || (GET_CODE (ind) == CONST
9674 && GET_CODE (XEXP (ind, 0)) == PLUS
9675 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9676 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9679 /* Match: (mem (reg)). */
9680 if (GET_CODE (ind) == REG)
9681 return arm_address_register_rtx_p (ind, 0);
9683 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9684 if (GET_CODE (ind) == POST_INC
9685 || GET_CODE (ind) == PRE_DEC)
9686 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9691 /* Return true if X is a register that will be eliminated later on. */
9693 arm_eliminable_register (rtx x)
9695 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9696 || REGNO (x) == ARG_POINTER_REGNUM
9697 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9698 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9701 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9702 coprocessor registers. Otherwise return NO_REGS. */
9705 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9709 if (!TARGET_NEON_FP16)
9710 return GENERAL_REGS;
9711 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9713 return GENERAL_REGS;
9716 /* The neon move patterns handle all legitimate vector and struct
9719 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9720 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9721 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9722 || VALID_NEON_STRUCT_MODE (mode)))
9725 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9728 return GENERAL_REGS;
9731 /* Values which must be returned in the most-significant end of the return
9735 arm_return_in_msb (const_tree valtype)
9737 return (TARGET_AAPCS_BASED
9739 && (AGGREGATE_TYPE_P (valtype)
9740 || TREE_CODE (valtype) == COMPLEX_TYPE
9741 || FIXED_POINT_TYPE_P (valtype)));
9744 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9745 Use by the Cirrus Maverick code which has to workaround
9746 a hardware bug triggered by such instructions. */
9748 arm_memory_load_p (rtx insn)
9750 rtx body, lhs, rhs;;
9752 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9755 body = PATTERN (insn);
9757 if (GET_CODE (body) != SET)
9760 lhs = XEXP (body, 0);
9761 rhs = XEXP (body, 1);
9763 lhs = REG_OR_SUBREG_RTX (lhs);
9765 /* If the destination is not a general purpose
9766 register we do not have to worry. */
9767 if (GET_CODE (lhs) != REG
9768 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9771 /* As well as loads from memory we also have to react
9772 to loads of invalid constants which will be turned
9773 into loads from the minipool. */
9774 return (GET_CODE (rhs) == MEM
9775 || GET_CODE (rhs) == SYMBOL_REF
9776 || note_invalid_constants (insn, -1, false));
9779 /* Return TRUE if INSN is a Cirrus instruction. */
9781 arm_cirrus_insn_p (rtx insn)
9783 enum attr_cirrus attr;
9785 /* get_attr cannot accept USE or CLOBBER. */
9787 || GET_CODE (insn) != INSN
9788 || GET_CODE (PATTERN (insn)) == USE
9789 || GET_CODE (PATTERN (insn)) == CLOBBER)
9792 attr = get_attr_cirrus (insn);
9794 return attr != CIRRUS_NOT;
9797 /* Cirrus reorg for invalid instruction combinations. */
9799 cirrus_reorg (rtx first)
9801 enum attr_cirrus attr;
9802 rtx body = PATTERN (first);
9806 /* Any branch must be followed by 2 non Cirrus instructions. */
9807 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9810 t = next_nonnote_insn (first);
9812 if (arm_cirrus_insn_p (t))
9815 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9819 emit_insn_after (gen_nop (), first);
9824 /* (float (blah)) is in parallel with a clobber. */
9825 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9826 body = XVECEXP (body, 0, 0);
9828 if (GET_CODE (body) == SET)
9830 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9832 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9833 be followed by a non Cirrus insn. */
9834 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9836 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9837 emit_insn_after (gen_nop (), first);
9841 else if (arm_memory_load_p (first))
9843 unsigned int arm_regno;
9845 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9846 ldr/cfmv64hr combination where the Rd field is the same
9847 in both instructions must be split with a non Cirrus
9854 /* Get Arm register number for ldr insn. */
9855 if (GET_CODE (lhs) == REG)
9856 arm_regno = REGNO (lhs);
9859 gcc_assert (GET_CODE (rhs) == REG);
9860 arm_regno = REGNO (rhs);
9864 first = next_nonnote_insn (first);
9866 if (! arm_cirrus_insn_p (first))
9869 body = PATTERN (first);
9871 /* (float (blah)) is in parallel with a clobber. */
9872 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9873 body = XVECEXP (body, 0, 0);
9875 if (GET_CODE (body) == FLOAT)
9876 body = XEXP (body, 0);
9878 if (get_attr_cirrus (first) == CIRRUS_MOVE
9879 && GET_CODE (XEXP (body, 1)) == REG
9880 && arm_regno == REGNO (XEXP (body, 1)))
9881 emit_insn_after (gen_nop (), first);
9887 /* get_attr cannot accept USE or CLOBBER. */
9889 || GET_CODE (first) != INSN
9890 || GET_CODE (PATTERN (first)) == USE
9891 || GET_CODE (PATTERN (first)) == CLOBBER)
9894 attr = get_attr_cirrus (first);
9896 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9897 must be followed by a non-coprocessor instruction. */
9898 if (attr == CIRRUS_COMPARE)
9902 t = next_nonnote_insn (first);
9904 if (arm_cirrus_insn_p (t))
9907 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9911 emit_insn_after (gen_nop (), first);
9917 /* Return TRUE if X references a SYMBOL_REF. */
9919 symbol_mentioned_p (rtx x)
9924 if (GET_CODE (x) == SYMBOL_REF)
9927 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9928 are constant offsets, not symbols. */
9929 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9932 fmt = GET_RTX_FORMAT (GET_CODE (x));
9934 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9940 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9941 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9944 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9951 /* Return TRUE if X references a LABEL_REF. */
9953 label_mentioned_p (rtx x)
9958 if (GET_CODE (x) == LABEL_REF)
9961 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9962 instruction, but they are constant offsets, not symbols. */
9963 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9966 fmt = GET_RTX_FORMAT (GET_CODE (x));
9967 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9973 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9974 if (label_mentioned_p (XVECEXP (x, i, j)))
9977 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9985 tls_mentioned_p (rtx x)
9987 switch (GET_CODE (x))
9990 return tls_mentioned_p (XEXP (x, 0));
9993 if (XINT (x, 1) == UNSPEC_TLS)
10001 /* Must not copy any rtx that uses a pc-relative address. */
10004 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10006 if (GET_CODE (*x) == UNSPEC
10007 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10008 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10014 arm_cannot_copy_insn_p (rtx insn)
10016 /* The tls call insn cannot be copied, as it is paired with a data
10018 if (recog_memoized (insn) == CODE_FOR_tlscall)
10021 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10025 minmax_code (rtx x)
10027 enum rtx_code code = GET_CODE (x);
10040 gcc_unreachable ();
10044 /* Return 1 if memory locations are adjacent. */
10046 adjacent_mem_locations (rtx a, rtx b)
10048 /* We don't guarantee to preserve the order of these memory refs. */
10049 if (volatile_refs_p (a) || volatile_refs_p (b))
10052 if ((GET_CODE (XEXP (a, 0)) == REG
10053 || (GET_CODE (XEXP (a, 0)) == PLUS
10054 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10055 && (GET_CODE (XEXP (b, 0)) == REG
10056 || (GET_CODE (XEXP (b, 0)) == PLUS
10057 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10059 HOST_WIDE_INT val0 = 0, val1 = 0;
10063 if (GET_CODE (XEXP (a, 0)) == PLUS)
10065 reg0 = XEXP (XEXP (a, 0), 0);
10066 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10069 reg0 = XEXP (a, 0);
10071 if (GET_CODE (XEXP (b, 0)) == PLUS)
10073 reg1 = XEXP (XEXP (b, 0), 0);
10074 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10077 reg1 = XEXP (b, 0);
10079 /* Don't accept any offset that will require multiple
10080 instructions to handle, since this would cause the
10081 arith_adjacentmem pattern to output an overlong sequence. */
10082 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10085 /* Don't allow an eliminable register: register elimination can make
10086 the offset too large. */
10087 if (arm_eliminable_register (reg0))
10090 val_diff = val1 - val0;
10094 /* If the target has load delay slots, then there's no benefit
10095 to using an ldm instruction unless the offset is zero and
10096 we are optimizing for size. */
10097 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10098 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10099 && (val_diff == 4 || val_diff == -4));
10102 return ((REGNO (reg0) == REGNO (reg1))
10103 && (val_diff == 4 || val_diff == -4));
10109 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10110 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10111 instruction. ADD_OFFSET is nonzero if the base address register needs
10112 to be modified with an add instruction before we can use it. */
10115 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10116 int nops, HOST_WIDE_INT add_offset)
10118 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10119 if the offset isn't small enough. The reason 2 ldrs are faster
10120 is because these ARMs are able to do more than one cache access
10121 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10122 whilst the ARM8 has a double bandwidth cache. This means that
10123 these cores can do both an instruction fetch and a data fetch in
10124 a single cycle, so the trick of calculating the address into a
10125 scratch register (one of the result regs) and then doing a load
10126 multiple actually becomes slower (and no smaller in code size).
10127 That is the transformation
10129 ldr rd1, [rbase + offset]
10130 ldr rd2, [rbase + offset + 4]
10134 add rd1, rbase, offset
10135 ldmia rd1, {rd1, rd2}
10137 produces worse code -- '3 cycles + any stalls on rd2' instead of
10138 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10139 access per cycle, the first sequence could never complete in less
10140 than 6 cycles, whereas the ldm sequence would only take 5 and
10141 would make better use of sequential accesses if not hitting the
10144 We cheat here and test 'arm_ld_sched' which we currently know to
10145 only be true for the ARM8, ARM9 and StrongARM. If this ever
10146 changes, then the test below needs to be reworked. */
10147 if (nops == 2 && arm_ld_sched && add_offset != 0)
10150 /* XScale has load-store double instructions, but they have stricter
10151 alignment requirements than load-store multiple, so we cannot
10154 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10155 the pipeline until completion.
10163 An ldr instruction takes 1-3 cycles, but does not block the
10172 Best case ldr will always win. However, the more ldr instructions
10173 we issue, the less likely we are to be able to schedule them well.
10174 Using ldr instructions also increases code size.
10176 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10177 for counts of 3 or 4 regs. */
10178 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10183 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10184 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10185 an array ORDER which describes the sequence to use when accessing the
10186 offsets that produces an ascending order. In this sequence, each
10187 offset must be larger by exactly 4 than the previous one. ORDER[0]
10188 must have been filled in with the lowest offset by the caller.
10189 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10190 we use to verify that ORDER produces an ascending order of registers.
10191 Return true if it was possible to construct such an order, false if
10195 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10196 int *unsorted_regs)
10199 for (i = 1; i < nops; i++)
10203 order[i] = order[i - 1];
10204 for (j = 0; j < nops; j++)
10205 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10207 /* We must find exactly one offset that is higher than the
10208 previous one by 4. */
10209 if (order[i] != order[i - 1])
10213 if (order[i] == order[i - 1])
10215 /* The register numbers must be ascending. */
10216 if (unsorted_regs != NULL
10217 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10223 /* Used to determine in a peephole whether a sequence of load
10224 instructions can be changed into a load-multiple instruction.
10225 NOPS is the number of separate load instructions we are examining. The
10226 first NOPS entries in OPERANDS are the destination registers, the
10227 next NOPS entries are memory operands. If this function is
10228 successful, *BASE is set to the common base register of the memory
10229 accesses; *LOAD_OFFSET is set to the first memory location's offset
10230 from that base register.
10231 REGS is an array filled in with the destination register numbers.
10232 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10233 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10234 the sequence of registers in REGS matches the loads from ascending memory
10235 locations, and the function verifies that the register numbers are
10236 themselves ascending. If CHECK_REGS is false, the register numbers
10237 are stored in the order they are found in the operands. */
10239 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10240 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10242 int unsorted_regs[MAX_LDM_STM_OPS];
10243 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10244 int order[MAX_LDM_STM_OPS];
10245 rtx base_reg_rtx = NULL;
10249 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10250 easily extended if required. */
10251 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10253 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10255 /* Loop over the operands and check that the memory references are
10256 suitable (i.e. immediate offsets from the same base register). At
10257 the same time, extract the target register, and the memory
10259 for (i = 0; i < nops; i++)
10264 /* Convert a subreg of a mem into the mem itself. */
10265 if (GET_CODE (operands[nops + i]) == SUBREG)
10266 operands[nops + i] = alter_subreg (operands + (nops + i));
10268 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10270 /* Don't reorder volatile memory references; it doesn't seem worth
10271 looking for the case where the order is ok anyway. */
10272 if (MEM_VOLATILE_P (operands[nops + i]))
10275 offset = const0_rtx;
10277 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10278 || (GET_CODE (reg) == SUBREG
10279 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10280 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10281 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10283 || (GET_CODE (reg) == SUBREG
10284 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10285 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10290 base_reg = REGNO (reg);
10291 base_reg_rtx = reg;
10292 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10295 else if (base_reg != (int) REGNO (reg))
10296 /* Not addressed from the same base register. */
10299 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10300 ? REGNO (operands[i])
10301 : REGNO (SUBREG_REG (operands[i])));
10303 /* If it isn't an integer register, or if it overwrites the
10304 base register but isn't the last insn in the list, then
10305 we can't do this. */
10306 if (unsorted_regs[i] < 0
10307 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10308 || unsorted_regs[i] > 14
10309 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10312 unsorted_offsets[i] = INTVAL (offset);
10313 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10317 /* Not a suitable memory address. */
10321 /* All the useful information has now been extracted from the
10322 operands into unsorted_regs and unsorted_offsets; additionally,
10323 order[0] has been set to the lowest offset in the list. Sort
10324 the offsets into order, verifying that they are adjacent, and
10325 check that the register numbers are ascending. */
10326 if (!compute_offset_order (nops, unsorted_offsets, order,
10327 check_regs ? unsorted_regs : NULL))
10331 memcpy (saved_order, order, sizeof order);
10337 for (i = 0; i < nops; i++)
10338 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10340 *load_offset = unsorted_offsets[order[0]];
10344 && !peep2_reg_dead_p (nops, base_reg_rtx))
10347 if (unsorted_offsets[order[0]] == 0)
10348 ldm_case = 1; /* ldmia */
10349 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10350 ldm_case = 2; /* ldmib */
10351 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10352 ldm_case = 3; /* ldmda */
10353 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10354 ldm_case = 4; /* ldmdb */
10355 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10356 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10361 if (!multiple_operation_profitable_p (false, nops,
10363 ? unsorted_offsets[order[0]] : 0))
10369 /* Used to determine in a peephole whether a sequence of store instructions can
10370 be changed into a store-multiple instruction.
10371 NOPS is the number of separate store instructions we are examining.
10372 NOPS_TOTAL is the total number of instructions recognized by the peephole
10374 The first NOPS entries in OPERANDS are the source registers, the next
10375 NOPS entries are memory operands. If this function is successful, *BASE is
10376 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10377 to the first memory location's offset from that base register. REGS is an
10378 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10379 likewise filled with the corresponding rtx's.
10380 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10381 numbers to an ascending order of stores.
10382 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10383 from ascending memory locations, and the function verifies that the register
10384 numbers are themselves ascending. If CHECK_REGS is false, the register
10385 numbers are stored in the order they are found in the operands. */
10387 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10388 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10389 HOST_WIDE_INT *load_offset, bool check_regs)
10391 int unsorted_regs[MAX_LDM_STM_OPS];
10392 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10393 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10394 int order[MAX_LDM_STM_OPS];
10396 rtx base_reg_rtx = NULL;
10399 /* Write back of base register is currently only supported for Thumb 1. */
10400 int base_writeback = TARGET_THUMB1;
10402 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10403 easily extended if required. */
10404 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10406 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10408 /* Loop over the operands and check that the memory references are
10409 suitable (i.e. immediate offsets from the same base register). At
10410 the same time, extract the target register, and the memory
10412 for (i = 0; i < nops; i++)
10417 /* Convert a subreg of a mem into the mem itself. */
10418 if (GET_CODE (operands[nops + i]) == SUBREG)
10419 operands[nops + i] = alter_subreg (operands + (nops + i));
10421 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10423 /* Don't reorder volatile memory references; it doesn't seem worth
10424 looking for the case where the order is ok anyway. */
10425 if (MEM_VOLATILE_P (operands[nops + i]))
10428 offset = const0_rtx;
10430 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10431 || (GET_CODE (reg) == SUBREG
10432 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10433 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10434 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10436 || (GET_CODE (reg) == SUBREG
10437 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10438 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10441 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10442 ? operands[i] : SUBREG_REG (operands[i]));
10443 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10447 base_reg = REGNO (reg);
10448 base_reg_rtx = reg;
10449 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10452 else if (base_reg != (int) REGNO (reg))
10453 /* Not addressed from the same base register. */
10456 /* If it isn't an integer register, then we can't do this. */
10457 if (unsorted_regs[i] < 0
10458 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10459 /* The effects are unpredictable if the base register is
10460 both updated and stored. */
10461 || (base_writeback && unsorted_regs[i] == base_reg)
10462 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10463 || unsorted_regs[i] > 14)
10466 unsorted_offsets[i] = INTVAL (offset);
10467 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10471 /* Not a suitable memory address. */
10475 /* All the useful information has now been extracted from the
10476 operands into unsorted_regs and unsorted_offsets; additionally,
10477 order[0] has been set to the lowest offset in the list. Sort
10478 the offsets into order, verifying that they are adjacent, and
10479 check that the register numbers are ascending. */
10480 if (!compute_offset_order (nops, unsorted_offsets, order,
10481 check_regs ? unsorted_regs : NULL))
10485 memcpy (saved_order, order, sizeof order);
10491 for (i = 0; i < nops; i++)
10493 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10495 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10498 *load_offset = unsorted_offsets[order[0]];
10502 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10505 if (unsorted_offsets[order[0]] == 0)
10506 stm_case = 1; /* stmia */
10507 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10508 stm_case = 2; /* stmib */
10509 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10510 stm_case = 3; /* stmda */
10511 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10512 stm_case = 4; /* stmdb */
10516 if (!multiple_operation_profitable_p (false, nops, 0))
10522 /* Routines for use in generating RTL. */
10524 /* Generate a load-multiple instruction. COUNT is the number of loads in
10525 the instruction; REGS and MEMS are arrays containing the operands.
10526 BASEREG is the base register to be used in addressing the memory operands.
10527 WBACK_OFFSET is nonzero if the instruction should update the base
10531 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10532 HOST_WIDE_INT wback_offset)
10537 if (!multiple_operation_profitable_p (false, count, 0))
10543 for (i = 0; i < count; i++)
10544 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10546 if (wback_offset != 0)
10547 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10549 seq = get_insns ();
10555 result = gen_rtx_PARALLEL (VOIDmode,
10556 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10557 if (wback_offset != 0)
10559 XVECEXP (result, 0, 0)
10560 = gen_rtx_SET (VOIDmode, basereg,
10561 plus_constant (basereg, wback_offset));
10566 for (j = 0; i < count; i++, j++)
10567 XVECEXP (result, 0, i)
10568 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10573 /* Generate a store-multiple instruction. COUNT is the number of stores in
10574 the instruction; REGS and MEMS are arrays containing the operands.
10575 BASEREG is the base register to be used in addressing the memory operands.
10576 WBACK_OFFSET is nonzero if the instruction should update the base
10580 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10581 HOST_WIDE_INT wback_offset)
10586 if (GET_CODE (basereg) == PLUS)
10587 basereg = XEXP (basereg, 0);
10589 if (!multiple_operation_profitable_p (false, count, 0))
10595 for (i = 0; i < count; i++)
10596 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10598 if (wback_offset != 0)
10599 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10601 seq = get_insns ();
10607 result = gen_rtx_PARALLEL (VOIDmode,
10608 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10609 if (wback_offset != 0)
10611 XVECEXP (result, 0, 0)
10612 = gen_rtx_SET (VOIDmode, basereg,
10613 plus_constant (basereg, wback_offset));
10618 for (j = 0; i < count; i++, j++)
10619 XVECEXP (result, 0, i)
10620 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10625 /* Generate either a load-multiple or a store-multiple instruction. This
10626 function can be used in situations where we can start with a single MEM
10627 rtx and adjust its address upwards.
10628 COUNT is the number of operations in the instruction, not counting a
10629 possible update of the base register. REGS is an array containing the
10631 BASEREG is the base register to be used in addressing the memory operands,
10632 which are constructed from BASEMEM.
10633 WRITE_BACK specifies whether the generated instruction should include an
10634 update of the base register.
10635 OFFSETP is used to pass an offset to and from this function; this offset
10636 is not used when constructing the address (instead BASEMEM should have an
10637 appropriate offset in its address), it is used only for setting
10638 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10641 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10642 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10644 rtx mems[MAX_LDM_STM_OPS];
10645 HOST_WIDE_INT offset = *offsetp;
10648 gcc_assert (count <= MAX_LDM_STM_OPS);
10650 if (GET_CODE (basereg) == PLUS)
10651 basereg = XEXP (basereg, 0);
10653 for (i = 0; i < count; i++)
10655 rtx addr = plus_constant (basereg, i * 4);
10656 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10664 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10665 write_back ? 4 * count : 0);
10667 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10668 write_back ? 4 * count : 0);
10672 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10673 rtx basemem, HOST_WIDE_INT *offsetp)
10675 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10680 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10681 rtx basemem, HOST_WIDE_INT *offsetp)
10683 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10687 /* Called from a peephole2 expander to turn a sequence of loads into an
10688 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10689 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10690 is true if we can reorder the registers because they are used commutatively
10692 Returns true iff we could generate a new instruction. */
10695 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10697 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10698 rtx mems[MAX_LDM_STM_OPS];
10699 int i, j, base_reg;
10701 HOST_WIDE_INT offset;
10702 int write_back = FALSE;
10706 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10707 &base_reg, &offset, !sort_regs);
10713 for (i = 0; i < nops - 1; i++)
10714 for (j = i + 1; j < nops; j++)
10715 if (regs[i] > regs[j])
10721 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10725 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10726 gcc_assert (ldm_case == 1 || ldm_case == 5);
10732 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10733 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10735 if (!TARGET_THUMB1)
10737 base_reg = regs[0];
10738 base_reg_rtx = newbase;
10742 for (i = 0; i < nops; i++)
10744 addr = plus_constant (base_reg_rtx, offset + i * 4);
10745 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10748 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10749 write_back ? offset + i * 4 : 0));
10753 /* Called from a peephole2 expander to turn a sequence of stores into an
10754 STM instruction. OPERANDS are the operands found by the peephole matcher;
10755 NOPS indicates how many separate stores we are trying to combine.
10756 Returns true iff we could generate a new instruction. */
10759 gen_stm_seq (rtx *operands, int nops)
10762 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10763 rtx mems[MAX_LDM_STM_OPS];
10766 HOST_WIDE_INT offset;
10767 int write_back = FALSE;
10770 bool base_reg_dies;
10772 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10773 mem_order, &base_reg, &offset, true);
10778 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10780 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10783 gcc_assert (base_reg_dies);
10789 gcc_assert (base_reg_dies);
10790 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10794 addr = plus_constant (base_reg_rtx, offset);
10796 for (i = 0; i < nops; i++)
10798 addr = plus_constant (base_reg_rtx, offset + i * 4);
10799 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10802 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10803 write_back ? offset + i * 4 : 0));
10807 /* Called from a peephole2 expander to turn a sequence of stores that are
10808 preceded by constant loads into an STM instruction. OPERANDS are the
10809 operands found by the peephole matcher; NOPS indicates how many
10810 separate stores we are trying to combine; there are 2 * NOPS
10811 instructions in the peephole.
10812 Returns true iff we could generate a new instruction. */
10815 gen_const_stm_seq (rtx *operands, int nops)
10817 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10818 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10819 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10820 rtx mems[MAX_LDM_STM_OPS];
10823 HOST_WIDE_INT offset;
10824 int write_back = FALSE;
10827 bool base_reg_dies;
10829 HARD_REG_SET allocated;
10831 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10832 mem_order, &base_reg, &offset, false);
10837 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10839 /* If the same register is used more than once, try to find a free
10841 CLEAR_HARD_REG_SET (allocated);
10842 for (i = 0; i < nops; i++)
10844 for (j = i + 1; j < nops; j++)
10845 if (regs[i] == regs[j])
10847 rtx t = peep2_find_free_register (0, nops * 2,
10848 TARGET_THUMB1 ? "l" : "r",
10849 SImode, &allocated);
10853 regs[i] = REGNO (t);
10857 /* Compute an ordering that maps the register numbers to an ascending
10860 for (i = 0; i < nops; i++)
10861 if (regs[i] < regs[reg_order[0]])
10864 for (i = 1; i < nops; i++)
10866 int this_order = reg_order[i - 1];
10867 for (j = 0; j < nops; j++)
10868 if (regs[j] > regs[reg_order[i - 1]]
10869 && (this_order == reg_order[i - 1]
10870 || regs[j] < regs[this_order]))
10872 reg_order[i] = this_order;
10875 /* Ensure that registers that must be live after the instruction end
10876 up with the correct value. */
10877 for (i = 0; i < nops; i++)
10879 int this_order = reg_order[i];
10880 if ((this_order != mem_order[i]
10881 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10882 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10886 /* Load the constants. */
10887 for (i = 0; i < nops; i++)
10889 rtx op = operands[2 * nops + mem_order[i]];
10890 sorted_regs[i] = regs[reg_order[i]];
10891 emit_move_insn (reg_rtxs[reg_order[i]], op);
10894 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10896 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10899 gcc_assert (base_reg_dies);
10905 gcc_assert (base_reg_dies);
10906 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10910 addr = plus_constant (base_reg_rtx, offset);
10912 for (i = 0; i < nops; i++)
10914 addr = plus_constant (base_reg_rtx, offset + i * 4);
10915 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10918 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10919 write_back ? offset + i * 4 : 0));
10923 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10924 unaligned copies on processors which support unaligned semantics for those
10925 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10926 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10927 An interleave factor of 1 (the minimum) will perform no interleaving.
10928 Load/store multiple are used for aligned addresses where possible. */
10931 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10932 HOST_WIDE_INT length,
10933 unsigned int interleave_factor)
10935 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10936 int *regnos = XALLOCAVEC (int, interleave_factor);
10937 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10938 HOST_WIDE_INT i, j;
10939 HOST_WIDE_INT remaining = length, words;
10940 rtx halfword_tmp = NULL, byte_tmp = NULL;
10942 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10943 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10944 HOST_WIDE_INT srcoffset, dstoffset;
10945 HOST_WIDE_INT src_autoinc, dst_autoinc;
10948 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10950 /* Use hard registers if we have aligned source or destination so we can use
10951 load/store multiple with contiguous registers. */
10952 if (dst_aligned || src_aligned)
10953 for (i = 0; i < interleave_factor; i++)
10954 regs[i] = gen_rtx_REG (SImode, i);
10956 for (i = 0; i < interleave_factor; i++)
10957 regs[i] = gen_reg_rtx (SImode);
10959 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10960 src = copy_addr_to_reg (XEXP (srcbase, 0));
10962 srcoffset = dstoffset = 0;
10964 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10965 For copying the last bytes we want to subtract this offset again. */
10966 src_autoinc = dst_autoinc = 0;
10968 for (i = 0; i < interleave_factor; i++)
10971 /* Copy BLOCK_SIZE_BYTES chunks. */
10973 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
10976 if (src_aligned && interleave_factor > 1)
10978 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
10979 TRUE, srcbase, &srcoffset));
10980 src_autoinc += UNITS_PER_WORD * interleave_factor;
10984 for (j = 0; j < interleave_factor; j++)
10986 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
10988 mem = adjust_automodify_address (srcbase, SImode, addr,
10989 srcoffset + j * UNITS_PER_WORD);
10990 emit_insn (gen_unaligned_loadsi (regs[j], mem));
10992 srcoffset += block_size_bytes;
10996 if (dst_aligned && interleave_factor > 1)
10998 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
10999 TRUE, dstbase, &dstoffset));
11000 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11004 for (j = 0; j < interleave_factor; j++)
11006 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
11008 mem = adjust_automodify_address (dstbase, SImode, addr,
11009 dstoffset + j * UNITS_PER_WORD);
11010 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11012 dstoffset += block_size_bytes;
11015 remaining -= block_size_bytes;
11018 /* Copy any whole words left (note these aren't interleaved with any
11019 subsequent halfword/byte load/stores in the interests of simplicity). */
11021 words = remaining / UNITS_PER_WORD;
11023 gcc_assert (words < interleave_factor);
11025 if (src_aligned && words > 1)
11027 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11029 src_autoinc += UNITS_PER_WORD * words;
11033 for (j = 0; j < words; j++)
11035 addr = plus_constant (src,
11036 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11037 mem = adjust_automodify_address (srcbase, SImode, addr,
11038 srcoffset + j * UNITS_PER_WORD);
11039 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11041 srcoffset += words * UNITS_PER_WORD;
11044 if (dst_aligned && words > 1)
11046 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11048 dst_autoinc += words * UNITS_PER_WORD;
11052 for (j = 0; j < words; j++)
11054 addr = plus_constant (dst,
11055 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11056 mem = adjust_automodify_address (dstbase, SImode, addr,
11057 dstoffset + j * UNITS_PER_WORD);
11058 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11060 dstoffset += words * UNITS_PER_WORD;
11063 remaining -= words * UNITS_PER_WORD;
11065 gcc_assert (remaining < 4);
11067 /* Copy a halfword if necessary. */
11069 if (remaining >= 2)
11071 halfword_tmp = gen_reg_rtx (SImode);
11073 addr = plus_constant (src, srcoffset - src_autoinc);
11074 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11075 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11077 /* Either write out immediately, or delay until we've loaded the last
11078 byte, depending on interleave factor. */
11079 if (interleave_factor == 1)
11081 addr = plus_constant (dst, dstoffset - dst_autoinc);
11082 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11083 emit_insn (gen_unaligned_storehi (mem,
11084 gen_lowpart (HImode, halfword_tmp)));
11085 halfword_tmp = NULL;
11093 gcc_assert (remaining < 2);
11095 /* Copy last byte. */
11097 if ((remaining & 1) != 0)
11099 byte_tmp = gen_reg_rtx (SImode);
11101 addr = plus_constant (src, srcoffset - src_autoinc);
11102 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11103 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11105 if (interleave_factor == 1)
11107 addr = plus_constant (dst, dstoffset - dst_autoinc);
11108 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11109 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11118 /* Store last halfword if we haven't done so already. */
11122 addr = plus_constant (dst, dstoffset - dst_autoinc);
11123 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11124 emit_insn (gen_unaligned_storehi (mem,
11125 gen_lowpart (HImode, halfword_tmp)));
11129 /* Likewise for last byte. */
11133 addr = plus_constant (dst, dstoffset - dst_autoinc);
11134 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11135 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11139 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11142 /* From mips_adjust_block_mem:
11144 Helper function for doing a loop-based block operation on memory
11145 reference MEM. Each iteration of the loop will operate on LENGTH
11148 Create a new base register for use within the loop and point it to
11149 the start of MEM. Create a new memory reference that uses this
11150 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11153 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11156 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11158 /* Although the new mem does not refer to a known location,
11159 it does keep up to LENGTH bytes of alignment. */
11160 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11161 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11164 /* From mips_block_move_loop:
11166 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11167 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11168 the memory regions do not overlap. */
11171 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11172 unsigned int interleave_factor,
11173 HOST_WIDE_INT bytes_per_iter)
11175 rtx label, src_reg, dest_reg, final_src, test;
11176 HOST_WIDE_INT leftover;
11178 leftover = length % bytes_per_iter;
11179 length -= leftover;
11181 /* Create registers and memory references for use within the loop. */
11182 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11183 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11185 /* Calculate the value that SRC_REG should have after the last iteration of
11187 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11188 0, 0, OPTAB_WIDEN);
11190 /* Emit the start of the loop. */
11191 label = gen_label_rtx ();
11192 emit_label (label);
11194 /* Emit the loop body. */
11195 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11196 interleave_factor);
11198 /* Move on to the next block. */
11199 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11200 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11202 /* Emit the loop condition. */
11203 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11204 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11206 /* Mop up any left-over bytes. */
11208 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11211 /* Emit a block move when either the source or destination is unaligned (not
11212 aligned to a four-byte boundary). This may need further tuning depending on
11213 core type, optimize_size setting, etc. */
11216 arm_movmemqi_unaligned (rtx *operands)
11218 HOST_WIDE_INT length = INTVAL (operands[2]);
11222 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11223 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11224 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11225 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11226 or dst_aligned though: allow more interleaving in those cases since the
11227 resulting code can be smaller. */
11228 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11229 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11232 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11233 interleave_factor, bytes_per_iter);
11235 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11236 interleave_factor);
11240 /* Note that the loop created by arm_block_move_unaligned_loop may be
11241 subject to loop unrolling, which makes tuning this condition a little
11244 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11246 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11253 arm_gen_movmemqi (rtx *operands)
11255 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11256 HOST_WIDE_INT srcoffset, dstoffset;
11258 rtx src, dst, srcbase, dstbase;
11259 rtx part_bytes_reg = NULL;
11262 if (GET_CODE (operands[2]) != CONST_INT
11263 || GET_CODE (operands[3]) != CONST_INT
11264 || INTVAL (operands[2]) > 64)
11267 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11268 return arm_movmemqi_unaligned (operands);
11270 if (INTVAL (operands[3]) & 3)
11273 dstbase = operands[0];
11274 srcbase = operands[1];
11276 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11277 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11279 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11280 out_words_to_go = INTVAL (operands[2]) / 4;
11281 last_bytes = INTVAL (operands[2]) & 3;
11282 dstoffset = srcoffset = 0;
11284 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11285 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11287 for (i = 0; in_words_to_go >= 2; i+=4)
11289 if (in_words_to_go > 4)
11290 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11291 TRUE, srcbase, &srcoffset));
11293 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11294 src, FALSE, srcbase,
11297 if (out_words_to_go)
11299 if (out_words_to_go > 4)
11300 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11301 TRUE, dstbase, &dstoffset));
11302 else if (out_words_to_go != 1)
11303 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11304 out_words_to_go, dst,
11307 dstbase, &dstoffset));
11310 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11311 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11312 if (last_bytes != 0)
11314 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11320 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11321 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11324 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11325 if (out_words_to_go)
11329 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11330 sreg = copy_to_reg (mem);
11332 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11333 emit_move_insn (mem, sreg);
11336 gcc_assert (!in_words_to_go); /* Sanity check */
11339 if (in_words_to_go)
11341 gcc_assert (in_words_to_go > 0);
11343 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11344 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11347 gcc_assert (!last_bytes || part_bytes_reg);
11349 if (BYTES_BIG_ENDIAN && last_bytes)
11351 rtx tmp = gen_reg_rtx (SImode);
11353 /* The bytes we want are in the top end of the word. */
11354 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11355 GEN_INT (8 * (4 - last_bytes))));
11356 part_bytes_reg = tmp;
11360 mem = adjust_automodify_address (dstbase, QImode,
11361 plus_constant (dst, last_bytes - 1),
11362 dstoffset + last_bytes - 1);
11363 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11367 tmp = gen_reg_rtx (SImode);
11368 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11369 part_bytes_reg = tmp;
11376 if (last_bytes > 1)
11378 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11379 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11383 rtx tmp = gen_reg_rtx (SImode);
11384 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11385 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11386 part_bytes_reg = tmp;
11393 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11394 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11401 /* Select a dominance comparison mode if possible for a test of the general
11402 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11403 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11404 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11405 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11406 In all cases OP will be either EQ or NE, but we don't need to know which
11407 here. If we are unable to support a dominance comparison we return
11408 CC mode. This will then fail to match for the RTL expressions that
11409 generate this call. */
11411 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11413 enum rtx_code cond1, cond2;
11416 /* Currently we will probably get the wrong result if the individual
11417 comparisons are not simple. This also ensures that it is safe to
11418 reverse a comparison if necessary. */
11419 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11421 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11425 /* The if_then_else variant of this tests the second condition if the
11426 first passes, but is true if the first fails. Reverse the first
11427 condition to get a true "inclusive-or" expression. */
11428 if (cond_or == DOM_CC_NX_OR_Y)
11429 cond1 = reverse_condition (cond1);
11431 /* If the comparisons are not equal, and one doesn't dominate the other,
11432 then we can't do this. */
11434 && !comparison_dominates_p (cond1, cond2)
11435 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11440 enum rtx_code temp = cond1;
11448 if (cond_or == DOM_CC_X_AND_Y)
11453 case EQ: return CC_DEQmode;
11454 case LE: return CC_DLEmode;
11455 case LEU: return CC_DLEUmode;
11456 case GE: return CC_DGEmode;
11457 case GEU: return CC_DGEUmode;
11458 default: gcc_unreachable ();
11462 if (cond_or == DOM_CC_X_AND_Y)
11474 gcc_unreachable ();
11478 if (cond_or == DOM_CC_X_AND_Y)
11490 gcc_unreachable ();
11494 if (cond_or == DOM_CC_X_AND_Y)
11495 return CC_DLTUmode;
11500 return CC_DLTUmode;
11502 return CC_DLEUmode;
11506 gcc_unreachable ();
11510 if (cond_or == DOM_CC_X_AND_Y)
11511 return CC_DGTUmode;
11516 return CC_DGTUmode;
11518 return CC_DGEUmode;
11522 gcc_unreachable ();
11525 /* The remaining cases only occur when both comparisons are the
11528 gcc_assert (cond1 == cond2);
11532 gcc_assert (cond1 == cond2);
11536 gcc_assert (cond1 == cond2);
11540 gcc_assert (cond1 == cond2);
11541 return CC_DLEUmode;
11544 gcc_assert (cond1 == cond2);
11545 return CC_DGEUmode;
11548 gcc_unreachable ();
11553 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11555 /* All floating point compares return CCFP if it is an equality
11556 comparison, and CCFPE otherwise. */
11557 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11577 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11582 gcc_unreachable ();
11586 /* A compare with a shifted operand. Because of canonicalization, the
11587 comparison will have to be swapped when we emit the assembler. */
11588 if (GET_MODE (y) == SImode
11589 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11590 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11591 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11592 || GET_CODE (x) == ROTATERT))
11595 /* This operation is performed swapped, but since we only rely on the Z
11596 flag we don't need an additional mode. */
11597 if (GET_MODE (y) == SImode
11598 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11599 && GET_CODE (x) == NEG
11600 && (op == EQ || op == NE))
11603 /* This is a special case that is used by combine to allow a
11604 comparison of a shifted byte load to be split into a zero-extend
11605 followed by a comparison of the shifted integer (only valid for
11606 equalities and unsigned inequalities). */
11607 if (GET_MODE (x) == SImode
11608 && GET_CODE (x) == ASHIFT
11609 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11610 && GET_CODE (XEXP (x, 0)) == SUBREG
11611 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11612 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11613 && (op == EQ || op == NE
11614 || op == GEU || op == GTU || op == LTU || op == LEU)
11615 && GET_CODE (y) == CONST_INT)
11618 /* A construct for a conditional compare, if the false arm contains
11619 0, then both conditions must be true, otherwise either condition
11620 must be true. Not all conditions are possible, so CCmode is
11621 returned if it can't be done. */
11622 if (GET_CODE (x) == IF_THEN_ELSE
11623 && (XEXP (x, 2) == const0_rtx
11624 || XEXP (x, 2) == const1_rtx)
11625 && COMPARISON_P (XEXP (x, 0))
11626 && COMPARISON_P (XEXP (x, 1)))
11627 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11628 INTVAL (XEXP (x, 2)));
11630 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11631 if (GET_CODE (x) == AND
11632 && (op == EQ || op == NE)
11633 && COMPARISON_P (XEXP (x, 0))
11634 && COMPARISON_P (XEXP (x, 1)))
11635 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11638 if (GET_CODE (x) == IOR
11639 && (op == EQ || op == NE)
11640 && COMPARISON_P (XEXP (x, 0))
11641 && COMPARISON_P (XEXP (x, 1)))
11642 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11645 /* An operation (on Thumb) where we want to test for a single bit.
11646 This is done by shifting that bit up into the top bit of a
11647 scratch register; we can then branch on the sign bit. */
11649 && GET_MODE (x) == SImode
11650 && (op == EQ || op == NE)
11651 && GET_CODE (x) == ZERO_EXTRACT
11652 && XEXP (x, 1) == const1_rtx)
11655 /* An operation that sets the condition codes as a side-effect, the
11656 V flag is not set correctly, so we can only use comparisons where
11657 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11659 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11660 if (GET_MODE (x) == SImode
11662 && (op == EQ || op == NE || op == LT || op == GE)
11663 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11664 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11665 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11666 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11667 || GET_CODE (x) == LSHIFTRT
11668 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11669 || GET_CODE (x) == ROTATERT
11670 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11671 return CC_NOOVmode;
11673 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11676 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11677 && GET_CODE (x) == PLUS
11678 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11681 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11683 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11685 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11692 /* A DImode comparison against zero can be implemented by
11693 or'ing the two halves together. */
11694 if (y == const0_rtx)
11697 /* We can do an equality test in three Thumb instructions. */
11707 /* DImode unsigned comparisons can be implemented by cmp +
11708 cmpeq without a scratch register. Not worth doing in
11719 /* DImode signed and unsigned comparisons can be implemented
11720 by cmp + sbcs with a scratch register, but that does not
11721 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11722 gcc_assert (op != EQ && op != NE);
11726 gcc_unreachable ();
11733 /* X and Y are two things to compare using CODE. Emit the compare insn and
11734 return the rtx for register 0 in the proper mode. FP means this is a
11735 floating point compare: I don't think that it is needed on the arm. */
11737 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11739 enum machine_mode mode;
11741 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11743 /* We might have X as a constant, Y as a register because of the predicates
11744 used for cmpdi. If so, force X to a register here. */
11745 if (dimode_comparison && !REG_P (x))
11746 x = force_reg (DImode, x);
11748 mode = SELECT_CC_MODE (code, x, y);
11749 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11751 if (dimode_comparison
11752 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11753 && mode != CC_CZmode)
11757 /* To compare two non-zero values for equality, XOR them and
11758 then compare against zero. Not used for ARM mode; there
11759 CC_CZmode is cheaper. */
11760 if (mode == CC_Zmode && y != const0_rtx)
11762 gcc_assert (!reload_completed);
11763 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11767 /* A scratch register is required. */
11768 if (reload_completed)
11769 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11771 scratch = gen_rtx_SCRATCH (SImode);
11773 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11774 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11778 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11783 /* Generate a sequence of insns that will generate the correct return
11784 address mask depending on the physical architecture that the program
11787 arm_gen_return_addr_mask (void)
11789 rtx reg = gen_reg_rtx (Pmode);
11791 emit_insn (gen_return_addr_mask (reg));
11796 arm_reload_in_hi (rtx *operands)
11798 rtx ref = operands[1];
11800 HOST_WIDE_INT offset = 0;
11802 if (GET_CODE (ref) == SUBREG)
11804 offset = SUBREG_BYTE (ref);
11805 ref = SUBREG_REG (ref);
11808 if (GET_CODE (ref) == REG)
11810 /* We have a pseudo which has been spilt onto the stack; there
11811 are two cases here: the first where there is a simple
11812 stack-slot replacement and a second where the stack-slot is
11813 out of range, or is used as a subreg. */
11814 if (reg_equiv_mem (REGNO (ref)))
11816 ref = reg_equiv_mem (REGNO (ref));
11817 base = find_replacement (&XEXP (ref, 0));
11820 /* The slot is out of range, or was dressed up in a SUBREG. */
11821 base = reg_equiv_address (REGNO (ref));
11824 base = find_replacement (&XEXP (ref, 0));
11826 /* Handle the case where the address is too complex to be offset by 1. */
11827 if (GET_CODE (base) == MINUS
11828 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11830 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11832 emit_set_insn (base_plus, base);
11835 else if (GET_CODE (base) == PLUS)
11837 /* The addend must be CONST_INT, or we would have dealt with it above. */
11838 HOST_WIDE_INT hi, lo;
11840 offset += INTVAL (XEXP (base, 1));
11841 base = XEXP (base, 0);
11843 /* Rework the address into a legal sequence of insns. */
11844 /* Valid range for lo is -4095 -> 4095 */
11847 : -((-offset) & 0xfff));
11849 /* Corner case, if lo is the max offset then we would be out of range
11850 once we have added the additional 1 below, so bump the msb into the
11851 pre-loading insn(s). */
11855 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11856 ^ (HOST_WIDE_INT) 0x80000000)
11857 - (HOST_WIDE_INT) 0x80000000);
11859 gcc_assert (hi + lo == offset);
11863 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11865 /* Get the base address; addsi3 knows how to handle constants
11866 that require more than one insn. */
11867 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11873 /* Operands[2] may overlap operands[0] (though it won't overlap
11874 operands[1]), that's why we asked for a DImode reg -- so we can
11875 use the bit that does not overlap. */
11876 if (REGNO (operands[2]) == REGNO (operands[0]))
11877 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11879 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11881 emit_insn (gen_zero_extendqisi2 (scratch,
11882 gen_rtx_MEM (QImode,
11883 plus_constant (base,
11885 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11886 gen_rtx_MEM (QImode,
11887 plus_constant (base,
11889 if (!BYTES_BIG_ENDIAN)
11890 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11891 gen_rtx_IOR (SImode,
11894 gen_rtx_SUBREG (SImode, operands[0], 0),
11898 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11899 gen_rtx_IOR (SImode,
11900 gen_rtx_ASHIFT (SImode, scratch,
11902 gen_rtx_SUBREG (SImode, operands[0], 0)));
11905 /* Handle storing a half-word to memory during reload by synthesizing as two
11906 byte stores. Take care not to clobber the input values until after we
11907 have moved them somewhere safe. This code assumes that if the DImode
11908 scratch in operands[2] overlaps either the input value or output address
11909 in some way, then that value must die in this insn (we absolutely need
11910 two scratch registers for some corner cases). */
11912 arm_reload_out_hi (rtx *operands)
11914 rtx ref = operands[0];
11915 rtx outval = operands[1];
11917 HOST_WIDE_INT offset = 0;
11919 if (GET_CODE (ref) == SUBREG)
11921 offset = SUBREG_BYTE (ref);
11922 ref = SUBREG_REG (ref);
11925 if (GET_CODE (ref) == REG)
11927 /* We have a pseudo which has been spilt onto the stack; there
11928 are two cases here: the first where there is a simple
11929 stack-slot replacement and a second where the stack-slot is
11930 out of range, or is used as a subreg. */
11931 if (reg_equiv_mem (REGNO (ref)))
11933 ref = reg_equiv_mem (REGNO (ref));
11934 base = find_replacement (&XEXP (ref, 0));
11937 /* The slot is out of range, or was dressed up in a SUBREG. */
11938 base = reg_equiv_address (REGNO (ref));
11941 base = find_replacement (&XEXP (ref, 0));
11943 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11945 /* Handle the case where the address is too complex to be offset by 1. */
11946 if (GET_CODE (base) == MINUS
11947 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11949 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11951 /* Be careful not to destroy OUTVAL. */
11952 if (reg_overlap_mentioned_p (base_plus, outval))
11954 /* Updating base_plus might destroy outval, see if we can
11955 swap the scratch and base_plus. */
11956 if (!reg_overlap_mentioned_p (scratch, outval))
11959 scratch = base_plus;
11964 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11966 /* Be conservative and copy OUTVAL into the scratch now,
11967 this should only be necessary if outval is a subreg
11968 of something larger than a word. */
11969 /* XXX Might this clobber base? I can't see how it can,
11970 since scratch is known to overlap with OUTVAL, and
11971 must be wider than a word. */
11972 emit_insn (gen_movhi (scratch_hi, outval));
11973 outval = scratch_hi;
11977 emit_set_insn (base_plus, base);
11980 else if (GET_CODE (base) == PLUS)
11982 /* The addend must be CONST_INT, or we would have dealt with it above. */
11983 HOST_WIDE_INT hi, lo;
11985 offset += INTVAL (XEXP (base, 1));
11986 base = XEXP (base, 0);
11988 /* Rework the address into a legal sequence of insns. */
11989 /* Valid range for lo is -4095 -> 4095 */
11992 : -((-offset) & 0xfff));
11994 /* Corner case, if lo is the max offset then we would be out of range
11995 once we have added the additional 1 below, so bump the msb into the
11996 pre-loading insn(s). */
12000 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12001 ^ (HOST_WIDE_INT) 0x80000000)
12002 - (HOST_WIDE_INT) 0x80000000);
12004 gcc_assert (hi + lo == offset);
12008 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12010 /* Be careful not to destroy OUTVAL. */
12011 if (reg_overlap_mentioned_p (base_plus, outval))
12013 /* Updating base_plus might destroy outval, see if we
12014 can swap the scratch and base_plus. */
12015 if (!reg_overlap_mentioned_p (scratch, outval))
12018 scratch = base_plus;
12023 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12025 /* Be conservative and copy outval into scratch now,
12026 this should only be necessary if outval is a
12027 subreg of something larger than a word. */
12028 /* XXX Might this clobber base? I can't see how it
12029 can, since scratch is known to overlap with
12031 emit_insn (gen_movhi (scratch_hi, outval));
12032 outval = scratch_hi;
12036 /* Get the base address; addsi3 knows how to handle constants
12037 that require more than one insn. */
12038 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12044 if (BYTES_BIG_ENDIAN)
12046 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12047 plus_constant (base, offset + 1)),
12048 gen_lowpart (QImode, outval)));
12049 emit_insn (gen_lshrsi3 (scratch,
12050 gen_rtx_SUBREG (SImode, outval, 0),
12052 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12053 gen_lowpart (QImode, scratch)));
12057 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12058 gen_lowpart (QImode, outval)));
12059 emit_insn (gen_lshrsi3 (scratch,
12060 gen_rtx_SUBREG (SImode, outval, 0),
12062 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12063 plus_constant (base, offset + 1)),
12064 gen_lowpart (QImode, scratch)));
12068 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12069 (padded to the size of a word) should be passed in a register. */
12072 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12074 if (TARGET_AAPCS_BASED)
12075 return must_pass_in_stack_var_size (mode, type);
12077 return must_pass_in_stack_var_size_or_pad (mode, type);
12081 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12082 Return true if an argument passed on the stack should be padded upwards,
12083 i.e. if the least-significant byte has useful data.
12084 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12085 aggregate types are placed in the lowest memory address. */
12088 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12090 if (!TARGET_AAPCS_BASED)
12091 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12093 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12100 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12101 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12102 register has useful data, and return the opposite if the most
12103 significant byte does. */
12106 arm_pad_reg_upward (enum machine_mode mode,
12107 tree type, int first ATTRIBUTE_UNUSED)
12109 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12111 /* For AAPCS, small aggregates, small fixed-point types,
12112 and small complex types are always padded upwards. */
12115 if ((AGGREGATE_TYPE_P (type)
12116 || TREE_CODE (type) == COMPLEX_TYPE
12117 || FIXED_POINT_TYPE_P (type))
12118 && int_size_in_bytes (type) <= 4)
12123 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12124 && GET_MODE_SIZE (mode) <= 4)
12129 /* Otherwise, use default padding. */
12130 return !BYTES_BIG_ENDIAN;
12134 /* Print a symbolic form of X to the debug file, F. */
12136 arm_print_value (FILE *f, rtx x)
12138 switch (GET_CODE (x))
12141 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12145 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12153 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12155 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12156 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12164 fprintf (f, "\"%s\"", XSTR (x, 0));
12168 fprintf (f, "`%s'", XSTR (x, 0));
12172 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12176 arm_print_value (f, XEXP (x, 0));
12180 arm_print_value (f, XEXP (x, 0));
12182 arm_print_value (f, XEXP (x, 1));
12190 fprintf (f, "????");
12195 /* Routines for manipulation of the constant pool. */
12197 /* Arm instructions cannot load a large constant directly into a
12198 register; they have to come from a pc relative load. The constant
12199 must therefore be placed in the addressable range of the pc
12200 relative load. Depending on the precise pc relative load
12201 instruction the range is somewhere between 256 bytes and 4k. This
12202 means that we often have to dump a constant inside a function, and
12203 generate code to branch around it.
12205 It is important to minimize this, since the branches will slow
12206 things down and make the code larger.
12208 Normally we can hide the table after an existing unconditional
12209 branch so that there is no interruption of the flow, but in the
12210 worst case the code looks like this:
12228 We fix this by performing a scan after scheduling, which notices
12229 which instructions need to have their operands fetched from the
12230 constant table and builds the table.
12232 The algorithm starts by building a table of all the constants that
12233 need fixing up and all the natural barriers in the function (places
12234 where a constant table can be dropped without breaking the flow).
12235 For each fixup we note how far the pc-relative replacement will be
12236 able to reach and the offset of the instruction into the function.
12238 Having built the table we then group the fixes together to form
12239 tables that are as large as possible (subject to addressing
12240 constraints) and emit each table of constants after the last
12241 barrier that is within range of all the instructions in the group.
12242 If a group does not contain a barrier, then we forcibly create one
12243 by inserting a jump instruction into the flow. Once the table has
12244 been inserted, the insns are then modified to reference the
12245 relevant entry in the pool.
12247 Possible enhancements to the algorithm (not implemented) are:
12249 1) For some processors and object formats, there may be benefit in
12250 aligning the pools to the start of cache lines; this alignment
12251 would need to be taken into account when calculating addressability
12254 /* These typedefs are located at the start of this file, so that
12255 they can be used in the prototypes there. This comment is to
12256 remind readers of that fact so that the following structures
12257 can be understood more easily.
12259 typedef struct minipool_node Mnode;
12260 typedef struct minipool_fixup Mfix; */
12262 struct minipool_node
12264 /* Doubly linked chain of entries. */
12267 /* The maximum offset into the code that this entry can be placed. While
12268 pushing fixes for forward references, all entries are sorted in order
12269 of increasing max_address. */
12270 HOST_WIDE_INT max_address;
12271 /* Similarly for an entry inserted for a backwards ref. */
12272 HOST_WIDE_INT min_address;
12273 /* The number of fixes referencing this entry. This can become zero
12274 if we "unpush" an entry. In this case we ignore the entry when we
12275 come to emit the code. */
12277 /* The offset from the start of the minipool. */
12278 HOST_WIDE_INT offset;
12279 /* The value in table. */
12281 /* The mode of value. */
12282 enum machine_mode mode;
12283 /* The size of the value. With iWMMXt enabled
12284 sizes > 4 also imply an alignment of 8-bytes. */
12288 struct minipool_fixup
12292 HOST_WIDE_INT address;
12294 enum machine_mode mode;
12298 HOST_WIDE_INT forwards;
12299 HOST_WIDE_INT backwards;
12302 /* Fixes less than a word need padding out to a word boundary. */
12303 #define MINIPOOL_FIX_SIZE(mode) \
12304 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12306 static Mnode * minipool_vector_head;
12307 static Mnode * minipool_vector_tail;
12308 static rtx minipool_vector_label;
12309 static int minipool_pad;
12311 /* The linked list of all minipool fixes required for this function. */
12312 Mfix * minipool_fix_head;
12313 Mfix * minipool_fix_tail;
12314 /* The fix entry for the current minipool, once it has been placed. */
12315 Mfix * minipool_barrier;
12317 /* Determines if INSN is the start of a jump table. Returns the end
12318 of the TABLE or NULL_RTX. */
12320 is_jump_table (rtx insn)
12324 if (jump_to_label_p (insn)
12325 && ((table = next_real_insn (JUMP_LABEL (insn)))
12326 == next_real_insn (insn))
12328 && GET_CODE (table) == JUMP_INSN
12329 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12330 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12336 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12337 #define JUMP_TABLES_IN_TEXT_SECTION 0
12340 static HOST_WIDE_INT
12341 get_jump_table_size (rtx insn)
12343 /* ADDR_VECs only take room if read-only data does into the text
12345 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12347 rtx body = PATTERN (insn);
12348 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12349 HOST_WIDE_INT size;
12350 HOST_WIDE_INT modesize;
12352 modesize = GET_MODE_SIZE (GET_MODE (body));
12353 size = modesize * XVECLEN (body, elt);
12357 /* Round up size of TBB table to a halfword boundary. */
12358 size = (size + 1) & ~(HOST_WIDE_INT)1;
12361 /* No padding necessary for TBH. */
12364 /* Add two bytes for alignment on Thumb. */
12369 gcc_unreachable ();
12377 /* Return the maximum amount of padding that will be inserted before
12380 static HOST_WIDE_INT
12381 get_label_padding (rtx label)
12383 HOST_WIDE_INT align, min_insn_size;
12385 align = 1 << label_to_alignment (label);
12386 min_insn_size = TARGET_THUMB ? 2 : 4;
12387 return align > min_insn_size ? align - min_insn_size : 0;
12390 /* Move a minipool fix MP from its current location to before MAX_MP.
12391 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12392 constraints may need updating. */
12394 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12395 HOST_WIDE_INT max_address)
12397 /* The code below assumes these are different. */
12398 gcc_assert (mp != max_mp);
12400 if (max_mp == NULL)
12402 if (max_address < mp->max_address)
12403 mp->max_address = max_address;
12407 if (max_address > max_mp->max_address - mp->fix_size)
12408 mp->max_address = max_mp->max_address - mp->fix_size;
12410 mp->max_address = max_address;
12412 /* Unlink MP from its current position. Since max_mp is non-null,
12413 mp->prev must be non-null. */
12414 mp->prev->next = mp->next;
12415 if (mp->next != NULL)
12416 mp->next->prev = mp->prev;
12418 minipool_vector_tail = mp->prev;
12420 /* Re-insert it before MAX_MP. */
12422 mp->prev = max_mp->prev;
12425 if (mp->prev != NULL)
12426 mp->prev->next = mp;
12428 minipool_vector_head = mp;
12431 /* Save the new entry. */
12434 /* Scan over the preceding entries and adjust their addresses as
12436 while (mp->prev != NULL
12437 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12439 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12446 /* Add a constant to the minipool for a forward reference. Returns the
12447 node added or NULL if the constant will not fit in this pool. */
12449 add_minipool_forward_ref (Mfix *fix)
12451 /* If set, max_mp is the first pool_entry that has a lower
12452 constraint than the one we are trying to add. */
12453 Mnode * max_mp = NULL;
12454 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12457 /* If the minipool starts before the end of FIX->INSN then this FIX
12458 can not be placed into the current pool. Furthermore, adding the
12459 new constant pool entry may cause the pool to start FIX_SIZE bytes
12461 if (minipool_vector_head &&
12462 (fix->address + get_attr_length (fix->insn)
12463 >= minipool_vector_head->max_address - fix->fix_size))
12466 /* Scan the pool to see if a constant with the same value has
12467 already been added. While we are doing this, also note the
12468 location where we must insert the constant if it doesn't already
12470 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12472 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12473 && fix->mode == mp->mode
12474 && (GET_CODE (fix->value) != CODE_LABEL
12475 || (CODE_LABEL_NUMBER (fix->value)
12476 == CODE_LABEL_NUMBER (mp->value)))
12477 && rtx_equal_p (fix->value, mp->value))
12479 /* More than one fix references this entry. */
12481 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12484 /* Note the insertion point if necessary. */
12486 && mp->max_address > max_address)
12489 /* If we are inserting an 8-bytes aligned quantity and
12490 we have not already found an insertion point, then
12491 make sure that all such 8-byte aligned quantities are
12492 placed at the start of the pool. */
12493 if (ARM_DOUBLEWORD_ALIGN
12495 && fix->fix_size >= 8
12496 && mp->fix_size < 8)
12499 max_address = mp->max_address;
12503 /* The value is not currently in the minipool, so we need to create
12504 a new entry for it. If MAX_MP is NULL, the entry will be put on
12505 the end of the list since the placement is less constrained than
12506 any existing entry. Otherwise, we insert the new fix before
12507 MAX_MP and, if necessary, adjust the constraints on the other
12510 mp->fix_size = fix->fix_size;
12511 mp->mode = fix->mode;
12512 mp->value = fix->value;
12514 /* Not yet required for a backwards ref. */
12515 mp->min_address = -65536;
12517 if (max_mp == NULL)
12519 mp->max_address = max_address;
12521 mp->prev = minipool_vector_tail;
12523 if (mp->prev == NULL)
12525 minipool_vector_head = mp;
12526 minipool_vector_label = gen_label_rtx ();
12529 mp->prev->next = mp;
12531 minipool_vector_tail = mp;
12535 if (max_address > max_mp->max_address - mp->fix_size)
12536 mp->max_address = max_mp->max_address - mp->fix_size;
12538 mp->max_address = max_address;
12541 mp->prev = max_mp->prev;
12543 if (mp->prev != NULL)
12544 mp->prev->next = mp;
12546 minipool_vector_head = mp;
12549 /* Save the new entry. */
12552 /* Scan over the preceding entries and adjust their addresses as
12554 while (mp->prev != NULL
12555 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12557 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12565 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12566 HOST_WIDE_INT min_address)
12568 HOST_WIDE_INT offset;
12570 /* The code below assumes these are different. */
12571 gcc_assert (mp != min_mp);
12573 if (min_mp == NULL)
12575 if (min_address > mp->min_address)
12576 mp->min_address = min_address;
12580 /* We will adjust this below if it is too loose. */
12581 mp->min_address = min_address;
12583 /* Unlink MP from its current position. Since min_mp is non-null,
12584 mp->next must be non-null. */
12585 mp->next->prev = mp->prev;
12586 if (mp->prev != NULL)
12587 mp->prev->next = mp->next;
12589 minipool_vector_head = mp->next;
12591 /* Reinsert it after MIN_MP. */
12593 mp->next = min_mp->next;
12595 if (mp->next != NULL)
12596 mp->next->prev = mp;
12598 minipool_vector_tail = mp;
12604 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12606 mp->offset = offset;
12607 if (mp->refcount > 0)
12608 offset += mp->fix_size;
12610 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12611 mp->next->min_address = mp->min_address + mp->fix_size;
12617 /* Add a constant to the minipool for a backward reference. Returns the
12618 node added or NULL if the constant will not fit in this pool.
12620 Note that the code for insertion for a backwards reference can be
12621 somewhat confusing because the calculated offsets for each fix do
12622 not take into account the size of the pool (which is still under
12625 add_minipool_backward_ref (Mfix *fix)
12627 /* If set, min_mp is the last pool_entry that has a lower constraint
12628 than the one we are trying to add. */
12629 Mnode *min_mp = NULL;
12630 /* This can be negative, since it is only a constraint. */
12631 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12634 /* If we can't reach the current pool from this insn, or if we can't
12635 insert this entry at the end of the pool without pushing other
12636 fixes out of range, then we don't try. This ensures that we
12637 can't fail later on. */
12638 if (min_address >= minipool_barrier->address
12639 || (minipool_vector_tail->min_address + fix->fix_size
12640 >= minipool_barrier->address))
12643 /* Scan the pool to see if a constant with the same value has
12644 already been added. While we are doing this, also note the
12645 location where we must insert the constant if it doesn't already
12647 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12649 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12650 && fix->mode == mp->mode
12651 && (GET_CODE (fix->value) != CODE_LABEL
12652 || (CODE_LABEL_NUMBER (fix->value)
12653 == CODE_LABEL_NUMBER (mp->value)))
12654 && rtx_equal_p (fix->value, mp->value)
12655 /* Check that there is enough slack to move this entry to the
12656 end of the table (this is conservative). */
12657 && (mp->max_address
12658 > (minipool_barrier->address
12659 + minipool_vector_tail->offset
12660 + minipool_vector_tail->fix_size)))
12663 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12666 if (min_mp != NULL)
12667 mp->min_address += fix->fix_size;
12670 /* Note the insertion point if necessary. */
12671 if (mp->min_address < min_address)
12673 /* For now, we do not allow the insertion of 8-byte alignment
12674 requiring nodes anywhere but at the start of the pool. */
12675 if (ARM_DOUBLEWORD_ALIGN
12676 && fix->fix_size >= 8 && mp->fix_size < 8)
12681 else if (mp->max_address
12682 < minipool_barrier->address + mp->offset + fix->fix_size)
12684 /* Inserting before this entry would push the fix beyond
12685 its maximum address (which can happen if we have
12686 re-located a forwards fix); force the new fix to come
12688 if (ARM_DOUBLEWORD_ALIGN
12689 && fix->fix_size >= 8 && mp->fix_size < 8)
12694 min_address = mp->min_address + fix->fix_size;
12697 /* Do not insert a non-8-byte aligned quantity before 8-byte
12698 aligned quantities. */
12699 else if (ARM_DOUBLEWORD_ALIGN
12700 && fix->fix_size < 8
12701 && mp->fix_size >= 8)
12704 min_address = mp->min_address + fix->fix_size;
12709 /* We need to create a new entry. */
12711 mp->fix_size = fix->fix_size;
12712 mp->mode = fix->mode;
12713 mp->value = fix->value;
12715 mp->max_address = minipool_barrier->address + 65536;
12717 mp->min_address = min_address;
12719 if (min_mp == NULL)
12722 mp->next = minipool_vector_head;
12724 if (mp->next == NULL)
12726 minipool_vector_tail = mp;
12727 minipool_vector_label = gen_label_rtx ();
12730 mp->next->prev = mp;
12732 minipool_vector_head = mp;
12736 mp->next = min_mp->next;
12740 if (mp->next != NULL)
12741 mp->next->prev = mp;
12743 minipool_vector_tail = mp;
12746 /* Save the new entry. */
12754 /* Scan over the following entries and adjust their offsets. */
12755 while (mp->next != NULL)
12757 if (mp->next->min_address < mp->min_address + mp->fix_size)
12758 mp->next->min_address = mp->min_address + mp->fix_size;
12761 mp->next->offset = mp->offset + mp->fix_size;
12763 mp->next->offset = mp->offset;
12772 assign_minipool_offsets (Mfix *barrier)
12774 HOST_WIDE_INT offset = 0;
12777 minipool_barrier = barrier;
12779 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12781 mp->offset = offset;
12783 if (mp->refcount > 0)
12784 offset += mp->fix_size;
12788 /* Output the literal table */
12790 dump_minipool (rtx scan)
12796 if (ARM_DOUBLEWORD_ALIGN)
12797 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12798 if (mp->refcount > 0 && mp->fix_size >= 8)
12805 fprintf (dump_file,
12806 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12807 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12809 scan = emit_label_after (gen_label_rtx (), scan);
12810 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12811 scan = emit_label_after (minipool_vector_label, scan);
12813 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12815 if (mp->refcount > 0)
12819 fprintf (dump_file,
12820 ";; Offset %u, min %ld, max %ld ",
12821 (unsigned) mp->offset, (unsigned long) mp->min_address,
12822 (unsigned long) mp->max_address);
12823 arm_print_value (dump_file, mp->value);
12824 fputc ('\n', dump_file);
12827 switch (mp->fix_size)
12829 #ifdef HAVE_consttable_1
12831 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12835 #ifdef HAVE_consttable_2
12837 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12841 #ifdef HAVE_consttable_4
12843 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12847 #ifdef HAVE_consttable_8
12849 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12853 #ifdef HAVE_consttable_16
12855 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12860 gcc_unreachable ();
12868 minipool_vector_head = minipool_vector_tail = NULL;
12869 scan = emit_insn_after (gen_consttable_end (), scan);
12870 scan = emit_barrier_after (scan);
12873 /* Return the cost of forcibly inserting a barrier after INSN. */
12875 arm_barrier_cost (rtx insn)
12877 /* Basing the location of the pool on the loop depth is preferable,
12878 but at the moment, the basic block information seems to be
12879 corrupt by this stage of the compilation. */
12880 int base_cost = 50;
12881 rtx next = next_nonnote_insn (insn);
12883 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12886 switch (GET_CODE (insn))
12889 /* It will always be better to place the table before the label, rather
12898 return base_cost - 10;
12901 return base_cost + 10;
12905 /* Find the best place in the insn stream in the range
12906 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12907 Create the barrier by inserting a jump and add a new fix entry for
12910 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12912 HOST_WIDE_INT count = 0;
12914 rtx from = fix->insn;
12915 /* The instruction after which we will insert the jump. */
12916 rtx selected = NULL;
12918 /* The address at which the jump instruction will be placed. */
12919 HOST_WIDE_INT selected_address;
12921 HOST_WIDE_INT max_count = max_address - fix->address;
12922 rtx label = gen_label_rtx ();
12924 selected_cost = arm_barrier_cost (from);
12925 selected_address = fix->address;
12927 while (from && count < max_count)
12932 /* This code shouldn't have been called if there was a natural barrier
12934 gcc_assert (GET_CODE (from) != BARRIER);
12936 /* Count the length of this insn. This must stay in sync with the
12937 code that pushes minipool fixes. */
12938 if (LABEL_P (from))
12939 count += get_label_padding (from);
12941 count += get_attr_length (from);
12943 /* If there is a jump table, add its length. */
12944 tmp = is_jump_table (from);
12947 count += get_jump_table_size (tmp);
12949 /* Jump tables aren't in a basic block, so base the cost on
12950 the dispatch insn. If we select this location, we will
12951 still put the pool after the table. */
12952 new_cost = arm_barrier_cost (from);
12954 if (count < max_count
12955 && (!selected || new_cost <= selected_cost))
12958 selected_cost = new_cost;
12959 selected_address = fix->address + count;
12962 /* Continue after the dispatch table. */
12963 from = NEXT_INSN (tmp);
12967 new_cost = arm_barrier_cost (from);
12969 if (count < max_count
12970 && (!selected || new_cost <= selected_cost))
12973 selected_cost = new_cost;
12974 selected_address = fix->address + count;
12977 from = NEXT_INSN (from);
12980 /* Make sure that we found a place to insert the jump. */
12981 gcc_assert (selected);
12983 /* Make sure we do not split a call and its corresponding
12984 CALL_ARG_LOCATION note. */
12985 if (CALL_P (selected))
12987 rtx next = NEXT_INSN (selected);
12988 if (next && NOTE_P (next)
12989 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12993 /* Create a new JUMP_INSN that branches around a barrier. */
12994 from = emit_jump_insn_after (gen_jump (label), selected);
12995 JUMP_LABEL (from) = label;
12996 barrier = emit_barrier_after (from);
12997 emit_label_after (label, barrier);
12999 /* Create a minipool barrier entry for the new barrier. */
13000 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13001 new_fix->insn = barrier;
13002 new_fix->address = selected_address;
13003 new_fix->next = fix->next;
13004 fix->next = new_fix;
13009 /* Record that there is a natural barrier in the insn stream at
13012 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13014 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13017 fix->address = address;
13020 if (minipool_fix_head != NULL)
13021 minipool_fix_tail->next = fix;
13023 minipool_fix_head = fix;
13025 minipool_fix_tail = fix;
13028 /* Record INSN, which will need fixing up to load a value from the
13029 minipool. ADDRESS is the offset of the insn since the start of the
13030 function; LOC is a pointer to the part of the insn which requires
13031 fixing; VALUE is the constant that must be loaded, which is of type
13034 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13035 enum machine_mode mode, rtx value)
13037 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13040 fix->address = address;
13043 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13044 fix->value = value;
13045 fix->forwards = get_attr_pool_range (insn);
13046 fix->backwards = get_attr_neg_pool_range (insn);
13047 fix->minipool = NULL;
13049 /* If an insn doesn't have a range defined for it, then it isn't
13050 expecting to be reworked by this code. Better to stop now than
13051 to generate duff assembly code. */
13052 gcc_assert (fix->forwards || fix->backwards);
13054 /* If an entry requires 8-byte alignment then assume all constant pools
13055 require 4 bytes of padding. Trying to do this later on a per-pool
13056 basis is awkward because existing pool entries have to be modified. */
13057 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13062 fprintf (dump_file,
13063 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13064 GET_MODE_NAME (mode),
13065 INSN_UID (insn), (unsigned long) address,
13066 -1 * (long)fix->backwards, (long)fix->forwards);
13067 arm_print_value (dump_file, fix->value);
13068 fprintf (dump_file, "\n");
13071 /* Add it to the chain of fixes. */
13074 if (minipool_fix_head != NULL)
13075 minipool_fix_tail->next = fix;
13077 minipool_fix_head = fix;
13079 minipool_fix_tail = fix;
13082 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13083 Returns the number of insns needed, or 99 if we don't know how to
13086 arm_const_double_inline_cost (rtx val)
13088 rtx lowpart, highpart;
13089 enum machine_mode mode;
13091 mode = GET_MODE (val);
13093 if (mode == VOIDmode)
13096 gcc_assert (GET_MODE_SIZE (mode) == 8);
13098 lowpart = gen_lowpart (SImode, val);
13099 highpart = gen_highpart_mode (SImode, mode, val);
13101 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13102 gcc_assert (GET_CODE (highpart) == CONST_INT);
13104 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13105 NULL_RTX, NULL_RTX, 0, 0)
13106 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13107 NULL_RTX, NULL_RTX, 0, 0));
13110 /* Return true if it is worthwhile to split a 64-bit constant into two
13111 32-bit operations. This is the case if optimizing for size, or
13112 if we have load delay slots, or if one 32-bit part can be done with
13113 a single data operation. */
13115 arm_const_double_by_parts (rtx val)
13117 enum machine_mode mode = GET_MODE (val);
13120 if (optimize_size || arm_ld_sched)
13123 if (mode == VOIDmode)
13126 part = gen_highpart_mode (SImode, mode, val);
13128 gcc_assert (GET_CODE (part) == CONST_INT);
13130 if (const_ok_for_arm (INTVAL (part))
13131 || const_ok_for_arm (~INTVAL (part)))
13134 part = gen_lowpart (SImode, val);
13136 gcc_assert (GET_CODE (part) == CONST_INT);
13138 if (const_ok_for_arm (INTVAL (part))
13139 || const_ok_for_arm (~INTVAL (part)))
13145 /* Return true if it is possible to inline both the high and low parts
13146 of a 64-bit constant into 32-bit data processing instructions. */
13148 arm_const_double_by_immediates (rtx val)
13150 enum machine_mode mode = GET_MODE (val);
13153 if (mode == VOIDmode)
13156 part = gen_highpart_mode (SImode, mode, val);
13158 gcc_assert (GET_CODE (part) == CONST_INT);
13160 if (!const_ok_for_arm (INTVAL (part)))
13163 part = gen_lowpart (SImode, val);
13165 gcc_assert (GET_CODE (part) == CONST_INT);
13167 if (!const_ok_for_arm (INTVAL (part)))
13173 /* Scan INSN and note any of its operands that need fixing.
13174 If DO_PUSHES is false we do not actually push any of the fixups
13175 needed. The function returns TRUE if any fixups were needed/pushed.
13176 This is used by arm_memory_load_p() which needs to know about loads
13177 of constants that will be converted into minipool loads. */
13179 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13181 bool result = false;
13184 extract_insn (insn);
13186 if (!constrain_operands (1))
13187 fatal_insn_not_found (insn);
13189 if (recog_data.n_alternatives == 0)
13192 /* Fill in recog_op_alt with information about the constraints of
13194 preprocess_constraints ();
13196 for (opno = 0; opno < recog_data.n_operands; opno++)
13198 /* Things we need to fix can only occur in inputs. */
13199 if (recog_data.operand_type[opno] != OP_IN)
13202 /* If this alternative is a memory reference, then any mention
13203 of constants in this alternative is really to fool reload
13204 into allowing us to accept one there. We need to fix them up
13205 now so that we output the right code. */
13206 if (recog_op_alt[opno][which_alternative].memory_ok)
13208 rtx op = recog_data.operand[opno];
13210 if (CONSTANT_P (op))
13213 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13214 recog_data.operand_mode[opno], op);
13217 else if (GET_CODE (op) == MEM
13218 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13219 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13223 rtx cop = avoid_constant_pool_reference (op);
13225 /* Casting the address of something to a mode narrower
13226 than a word can cause avoid_constant_pool_reference()
13227 to return the pool reference itself. That's no good to
13228 us here. Lets just hope that we can use the
13229 constant pool value directly. */
13231 cop = get_pool_constant (XEXP (op, 0));
13233 push_minipool_fix (insn, address,
13234 recog_data.operand_loc[opno],
13235 recog_data.operand_mode[opno], cop);
13246 /* Convert instructions to their cc-clobbering variant if possible, since
13247 that allows us to use smaller encodings. */
13250 thumb2_reorg (void)
13255 INIT_REG_SET (&live);
13257 /* We are freeing block_for_insn in the toplev to keep compatibility
13258 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13259 compute_bb_for_insn ();
13266 COPY_REG_SET (&live, DF_LR_OUT (bb));
13267 df_simulate_initialize_backwards (bb, &live);
13268 FOR_BB_INSNS_REVERSE (bb, insn)
13270 if (NONJUMP_INSN_P (insn)
13271 && !REGNO_REG_SET_P (&live, CC_REGNUM))
13273 rtx pat = PATTERN (insn);
13274 if (GET_CODE (pat) == SET
13275 && low_register_operand (XEXP (pat, 0), SImode)
13276 && thumb_16bit_operator (XEXP (pat, 1), SImode)
13277 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
13278 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
13280 rtx dst = XEXP (pat, 0);
13281 rtx src = XEXP (pat, 1);
13282 rtx op0 = XEXP (src, 0);
13283 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
13284 ? XEXP (src, 1) : NULL);
13286 if (rtx_equal_p (dst, op0)
13287 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
13289 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13290 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13291 rtvec vec = gen_rtvec (2, pat, clobber);
13293 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13294 INSN_CODE (insn) = -1;
13296 /* We can also handle a commutative operation where the
13297 second operand matches the destination. */
13298 else if (op1 && rtx_equal_p (dst, op1))
13300 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13301 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13304 src = copy_rtx (src);
13305 XEXP (src, 0) = op1;
13306 XEXP (src, 1) = op0;
13307 pat = gen_rtx_SET (VOIDmode, dst, src);
13308 vec = gen_rtvec (2, pat, clobber);
13309 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13310 INSN_CODE (insn) = -1;
13315 if (NONDEBUG_INSN_P (insn))
13316 df_simulate_one_insn_backwards (bb, insn, &live);
13320 CLEAR_REG_SET (&live);
13323 /* Gcc puts the pool in the wrong place for ARM, since we can only
13324 load addresses a limited distance around the pc. We do some
13325 special munging to move the constant pool values to the correct
13326 point in the code. */
13331 HOST_WIDE_INT address = 0;
13337 minipool_fix_head = minipool_fix_tail = NULL;
13339 /* The first insn must always be a note, or the code below won't
13340 scan it properly. */
13341 insn = get_insns ();
13342 gcc_assert (GET_CODE (insn) == NOTE);
13345 /* Scan all the insns and record the operands that will need fixing. */
13346 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13348 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13349 && (arm_cirrus_insn_p (insn)
13350 || GET_CODE (insn) == JUMP_INSN
13351 || arm_memory_load_p (insn)))
13352 cirrus_reorg (insn);
13354 if (GET_CODE (insn) == BARRIER)
13355 push_minipool_barrier (insn, address);
13356 else if (INSN_P (insn))
13360 note_invalid_constants (insn, address, true);
13361 address += get_attr_length (insn);
13363 /* If the insn is a vector jump, add the size of the table
13364 and skip the table. */
13365 if ((table = is_jump_table (insn)) != NULL)
13367 address += get_jump_table_size (table);
13371 else if (LABEL_P (insn))
13372 /* Add the worst-case padding due to alignment. We don't add
13373 the _current_ padding because the minipool insertions
13374 themselves might change it. */
13375 address += get_label_padding (insn);
13378 fix = minipool_fix_head;
13380 /* Now scan the fixups and perform the required changes. */
13385 Mfix * last_added_fix;
13386 Mfix * last_barrier = NULL;
13389 /* Skip any further barriers before the next fix. */
13390 while (fix && GET_CODE (fix->insn) == BARRIER)
13393 /* No more fixes. */
13397 last_added_fix = NULL;
13399 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13401 if (GET_CODE (ftmp->insn) == BARRIER)
13403 if (ftmp->address >= minipool_vector_head->max_address)
13406 last_barrier = ftmp;
13408 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13411 last_added_fix = ftmp; /* Keep track of the last fix added. */
13414 /* If we found a barrier, drop back to that; any fixes that we
13415 could have reached but come after the barrier will now go in
13416 the next mini-pool. */
13417 if (last_barrier != NULL)
13419 /* Reduce the refcount for those fixes that won't go into this
13421 for (fdel = last_barrier->next;
13422 fdel && fdel != ftmp;
13425 fdel->minipool->refcount--;
13426 fdel->minipool = NULL;
13429 ftmp = last_barrier;
13433 /* ftmp is first fix that we can't fit into this pool and
13434 there no natural barriers that we could use. Insert a
13435 new barrier in the code somewhere between the previous
13436 fix and this one, and arrange to jump around it. */
13437 HOST_WIDE_INT max_address;
13439 /* The last item on the list of fixes must be a barrier, so
13440 we can never run off the end of the list of fixes without
13441 last_barrier being set. */
13444 max_address = minipool_vector_head->max_address;
13445 /* Check that there isn't another fix that is in range that
13446 we couldn't fit into this pool because the pool was
13447 already too large: we need to put the pool before such an
13448 instruction. The pool itself may come just after the
13449 fix because create_fix_barrier also allows space for a
13450 jump instruction. */
13451 if (ftmp->address < max_address)
13452 max_address = ftmp->address + 1;
13454 last_barrier = create_fix_barrier (last_added_fix, max_address);
13457 assign_minipool_offsets (last_barrier);
13461 if (GET_CODE (ftmp->insn) != BARRIER
13462 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13469 /* Scan over the fixes we have identified for this pool, fixing them
13470 up and adding the constants to the pool itself. */
13471 for (this_fix = fix; this_fix && ftmp != this_fix;
13472 this_fix = this_fix->next)
13473 if (GET_CODE (this_fix->insn) != BARRIER)
13476 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13477 minipool_vector_label),
13478 this_fix->minipool->offset);
13479 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13482 dump_minipool (last_barrier->insn);
13486 /* From now on we must synthesize any constants that we can't handle
13487 directly. This can happen if the RTL gets split during final
13488 instruction generation. */
13489 after_arm_reorg = 1;
13491 /* Free the minipool memory. */
13492 obstack_free (&minipool_obstack, minipool_startobj);
13495 /* Routines to output assembly language. */
13497 /* If the rtx is the correct value then return the string of the number.
13498 In this way we can ensure that valid double constants are generated even
13499 when cross compiling. */
13501 fp_immediate_constant (rtx x)
13506 if (!fp_consts_inited)
13509 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13510 for (i = 0; i < 8; i++)
13511 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13512 return strings_fp[i];
13514 gcc_unreachable ();
13517 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13518 static const char *
13519 fp_const_from_val (REAL_VALUE_TYPE *r)
13523 if (!fp_consts_inited)
13526 for (i = 0; i < 8; i++)
13527 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13528 return strings_fp[i];
13530 gcc_unreachable ();
13533 /* Output the operands of a LDM/STM instruction to STREAM.
13534 MASK is the ARM register set mask of which only bits 0-15 are important.
13535 REG is the base register, either the frame pointer or the stack pointer,
13536 INSTR is the possibly suffixed load or store instruction.
13537 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13540 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13541 unsigned long mask, int rfe)
13544 bool not_first = FALSE;
13546 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13547 fputc ('\t', stream);
13548 asm_fprintf (stream, instr, reg);
13549 fputc ('{', stream);
13551 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13552 if (mask & (1 << i))
13555 fprintf (stream, ", ");
13557 asm_fprintf (stream, "%r", i);
13562 fprintf (stream, "}^\n");
13564 fprintf (stream, "}\n");
13568 /* Output a FLDMD instruction to STREAM.
13569 BASE if the register containing the address.
13570 REG and COUNT specify the register range.
13571 Extra registers may be added to avoid hardware bugs.
13573 We output FLDMD even for ARMv5 VFP implementations. Although
13574 FLDMD is technically not supported until ARMv6, it is believed
13575 that all VFP implementations support its use in this context. */
13578 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13582 /* Workaround ARM10 VFPr1 bug. */
13583 if (count == 2 && !arm_arch6)
13590 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13591 load into multiple parts if we have to handle more than 16 registers. */
13594 vfp_output_fldmd (stream, base, reg, 16);
13595 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13599 fputc ('\t', stream);
13600 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13602 for (i = reg; i < reg + count; i++)
13605 fputs (", ", stream);
13606 asm_fprintf (stream, "d%d", i);
13608 fputs ("}\n", stream);
13613 /* Output the assembly for a store multiple. */
13616 vfp_output_fstmd (rtx * operands)
13623 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13624 p = strlen (pattern);
13626 gcc_assert (GET_CODE (operands[1]) == REG);
13628 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13629 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13631 p += sprintf (&pattern[p], ", d%d", base + i);
13633 strcpy (&pattern[p], "}");
13635 output_asm_insn (pattern, operands);
13640 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13641 number of bytes pushed. */
13644 vfp_emit_fstmd (int base_reg, int count)
13651 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13652 register pairs are stored by a store multiple insn. We avoid this
13653 by pushing an extra pair. */
13654 if (count == 2 && !arm_arch6)
13656 if (base_reg == LAST_VFP_REGNUM - 3)
13661 /* FSTMD may not store more than 16 doubleword registers at once. Split
13662 larger stores into multiple parts (up to a maximum of two, in
13667 /* NOTE: base_reg is an internal register number, so each D register
13669 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13670 saved += vfp_emit_fstmd (base_reg, 16);
13674 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13675 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13677 reg = gen_rtx_REG (DFmode, base_reg);
13680 XVECEXP (par, 0, 0)
13681 = gen_rtx_SET (VOIDmode,
13684 gen_rtx_PRE_MODIFY (Pmode,
13687 (stack_pointer_rtx,
13690 gen_rtx_UNSPEC (BLKmode,
13691 gen_rtvec (1, reg),
13692 UNSPEC_PUSH_MULT));
13694 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13695 plus_constant (stack_pointer_rtx, -(count * 8)));
13696 RTX_FRAME_RELATED_P (tmp) = 1;
13697 XVECEXP (dwarf, 0, 0) = tmp;
13699 tmp = gen_rtx_SET (VOIDmode,
13700 gen_frame_mem (DFmode, stack_pointer_rtx),
13702 RTX_FRAME_RELATED_P (tmp) = 1;
13703 XVECEXP (dwarf, 0, 1) = tmp;
13705 for (i = 1; i < count; i++)
13707 reg = gen_rtx_REG (DFmode, base_reg);
13709 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13711 tmp = gen_rtx_SET (VOIDmode,
13712 gen_frame_mem (DFmode,
13713 plus_constant (stack_pointer_rtx,
13716 RTX_FRAME_RELATED_P (tmp) = 1;
13717 XVECEXP (dwarf, 0, i + 1) = tmp;
13720 par = emit_insn (par);
13721 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13722 RTX_FRAME_RELATED_P (par) = 1;
13727 /* Emit a call instruction with pattern PAT. ADDR is the address of
13728 the call target. */
13731 arm_emit_call_insn (rtx pat, rtx addr)
13735 insn = emit_call_insn (pat);
13737 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13738 If the call might use such an entry, add a use of the PIC register
13739 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13740 if (TARGET_VXWORKS_RTP
13742 && GET_CODE (addr) == SYMBOL_REF
13743 && (SYMBOL_REF_DECL (addr)
13744 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13745 : !SYMBOL_REF_LOCAL_P (addr)))
13747 require_pic_register ();
13748 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13752 /* Output a 'call' insn. */
13754 output_call (rtx *operands)
13756 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13758 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13759 if (REGNO (operands[0]) == LR_REGNUM)
13761 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13762 output_asm_insn ("mov%?\t%0, %|lr", operands);
13765 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13767 if (TARGET_INTERWORK || arm_arch4t)
13768 output_asm_insn ("bx%?\t%0", operands);
13770 output_asm_insn ("mov%?\t%|pc, %0", operands);
13775 /* Output a 'call' insn that is a reference in memory. This is
13776 disabled for ARMv5 and we prefer a blx instead because otherwise
13777 there's a significant performance overhead. */
13779 output_call_mem (rtx *operands)
13781 gcc_assert (!arm_arch5);
13782 if (TARGET_INTERWORK)
13784 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13785 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13786 output_asm_insn ("bx%?\t%|ip", operands);
13788 else if (regno_use_in (LR_REGNUM, operands[0]))
13790 /* LR is used in the memory address. We load the address in the
13791 first instruction. It's safe to use IP as the target of the
13792 load since the call will kill it anyway. */
13793 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13794 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13796 output_asm_insn ("bx%?\t%|ip", operands);
13798 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13802 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13803 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13810 /* Output a move from arm registers to an fpa registers.
13811 OPERANDS[0] is an fpa register.
13812 OPERANDS[1] is the first registers of an arm register pair. */
13814 output_mov_long_double_fpa_from_arm (rtx *operands)
13816 int arm_reg0 = REGNO (operands[1]);
13819 gcc_assert (arm_reg0 != IP_REGNUM);
13821 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13822 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13823 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13825 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13826 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13831 /* Output a move from an fpa register to arm registers.
13832 OPERANDS[0] is the first registers of an arm register pair.
13833 OPERANDS[1] is an fpa register. */
13835 output_mov_long_double_arm_from_fpa (rtx *operands)
13837 int arm_reg0 = REGNO (operands[0]);
13840 gcc_assert (arm_reg0 != IP_REGNUM);
13842 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13843 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13844 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13846 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13847 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13851 /* Output a move from arm registers to arm registers of a long double
13852 OPERANDS[0] is the destination.
13853 OPERANDS[1] is the source. */
13855 output_mov_long_double_arm_from_arm (rtx *operands)
13857 /* We have to be careful here because the two might overlap. */
13858 int dest_start = REGNO (operands[0]);
13859 int src_start = REGNO (operands[1]);
13863 if (dest_start < src_start)
13865 for (i = 0; i < 3; i++)
13867 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13868 ops[1] = gen_rtx_REG (SImode, src_start + i);
13869 output_asm_insn ("mov%?\t%0, %1", ops);
13874 for (i = 2; i >= 0; i--)
13876 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13877 ops[1] = gen_rtx_REG (SImode, src_start + i);
13878 output_asm_insn ("mov%?\t%0, %1", ops);
13886 arm_emit_movpair (rtx dest, rtx src)
13888 /* If the src is an immediate, simplify it. */
13889 if (CONST_INT_P (src))
13891 HOST_WIDE_INT val = INTVAL (src);
13892 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13893 if ((val >> 16) & 0x0000ffff)
13894 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13896 GEN_INT ((val >> 16) & 0x0000ffff));
13899 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13900 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13903 /* Output a move from arm registers to an fpa registers.
13904 OPERANDS[0] is an fpa register.
13905 OPERANDS[1] is the first registers of an arm register pair. */
13907 output_mov_double_fpa_from_arm (rtx *operands)
13909 int arm_reg0 = REGNO (operands[1]);
13912 gcc_assert (arm_reg0 != IP_REGNUM);
13914 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13915 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13916 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13917 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13921 /* Output a move from an fpa register to arm registers.
13922 OPERANDS[0] is the first registers of an arm register pair.
13923 OPERANDS[1] is an fpa register. */
13925 output_mov_double_arm_from_fpa (rtx *operands)
13927 int arm_reg0 = REGNO (operands[0]);
13930 gcc_assert (arm_reg0 != IP_REGNUM);
13932 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13933 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13934 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13935 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13939 /* Output a move between double words. It must be REG<-MEM
13942 output_move_double (rtx *operands, bool emit, int *count)
13944 enum rtx_code code0 = GET_CODE (operands[0]);
13945 enum rtx_code code1 = GET_CODE (operands[1]);
13950 /* The only case when this might happen is when
13951 you are looking at the length of a DImode instruction
13952 that has an invalid constant in it. */
13953 if (code0 == REG && code1 != MEM)
13955 gcc_assert (!emit);
13962 unsigned int reg0 = REGNO (operands[0]);
13964 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13966 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13968 switch (GET_CODE (XEXP (operands[1], 0)))
13975 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13976 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13978 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13983 gcc_assert (TARGET_LDRD);
13985 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13992 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13994 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14002 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14004 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14009 gcc_assert (TARGET_LDRD);
14011 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14016 /* Autoicrement addressing modes should never have overlapping
14017 base and destination registers, and overlapping index registers
14018 are already prohibited, so this doesn't need to worry about
14020 otherops[0] = operands[0];
14021 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14022 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14024 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14026 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14028 /* Registers overlap so split out the increment. */
14031 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14032 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14039 /* Use a single insn if we can.
14040 FIXME: IWMMXT allows offsets larger than ldrd can
14041 handle, fix these up with a pair of ldr. */
14043 || GET_CODE (otherops[2]) != CONST_INT
14044 || (INTVAL (otherops[2]) > -256
14045 && INTVAL (otherops[2]) < 256))
14048 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14054 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14055 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14065 /* Use a single insn if we can.
14066 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14067 fix these up with a pair of ldr. */
14069 || GET_CODE (otherops[2]) != CONST_INT
14070 || (INTVAL (otherops[2]) > -256
14071 && INTVAL (otherops[2]) < 256))
14074 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14080 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14081 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14091 /* We might be able to use ldrd %0, %1 here. However the range is
14092 different to ldr/adr, and it is broken on some ARMv7-M
14093 implementations. */
14094 /* Use the second register of the pair to avoid problematic
14096 otherops[1] = operands[1];
14098 output_asm_insn ("adr%?\t%0, %1", otherops);
14099 operands[1] = otherops[0];
14103 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14105 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14112 /* ??? This needs checking for thumb2. */
14114 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14115 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14117 otherops[0] = operands[0];
14118 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14119 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14121 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14123 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14125 switch ((int) INTVAL (otherops[2]))
14129 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14135 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14141 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14145 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14146 operands[1] = otherops[0];
14148 && (GET_CODE (otherops[2]) == REG
14150 || (GET_CODE (otherops[2]) == CONST_INT
14151 && INTVAL (otherops[2]) > -256
14152 && INTVAL (otherops[2]) < 256)))
14154 if (reg_overlap_mentioned_p (operands[0],
14158 /* Swap base and index registers over to
14159 avoid a conflict. */
14161 otherops[1] = otherops[2];
14164 /* If both registers conflict, it will usually
14165 have been fixed by a splitter. */
14166 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14167 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14171 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14172 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14179 otherops[0] = operands[0];
14181 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14186 if (GET_CODE (otherops[2]) == CONST_INT)
14190 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14191 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14193 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14199 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14205 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14212 return "ldr%(d%)\t%0, [%1]";
14214 return "ldm%(ia%)\t%1, %M0";
14218 otherops[1] = adjust_address (operands[1], SImode, 4);
14219 /* Take care of overlapping base/data reg. */
14220 if (reg_mentioned_p (operands[0], operands[1]))
14224 output_asm_insn ("ldr%?\t%0, %1", otherops);
14225 output_asm_insn ("ldr%?\t%0, %1", operands);
14235 output_asm_insn ("ldr%?\t%0, %1", operands);
14236 output_asm_insn ("ldr%?\t%0, %1", otherops);
14246 /* Constraints should ensure this. */
14247 gcc_assert (code0 == MEM && code1 == REG);
14248 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14250 switch (GET_CODE (XEXP (operands[0], 0)))
14256 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14258 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14263 gcc_assert (TARGET_LDRD);
14265 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14272 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14274 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14282 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14284 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14289 gcc_assert (TARGET_LDRD);
14291 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14296 otherops[0] = operands[1];
14297 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14298 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14300 /* IWMMXT allows offsets larger than ldrd can handle,
14301 fix these up with a pair of ldr. */
14303 && GET_CODE (otherops[2]) == CONST_INT
14304 && (INTVAL(otherops[2]) <= -256
14305 || INTVAL(otherops[2]) >= 256))
14307 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14311 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14312 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14321 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14322 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14328 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14331 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14336 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14341 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14342 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14344 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14348 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14355 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14362 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14367 && (GET_CODE (otherops[2]) == REG
14369 || (GET_CODE (otherops[2]) == CONST_INT
14370 && INTVAL (otherops[2]) > -256
14371 && INTVAL (otherops[2]) < 256)))
14373 otherops[0] = operands[1];
14374 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14376 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14382 otherops[0] = adjust_address (operands[0], SImode, 4);
14383 otherops[1] = operands[1];
14386 output_asm_insn ("str%?\t%1, %0", operands);
14387 output_asm_insn ("str%?\t%H1, %0", otherops);
14397 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14398 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14401 output_move_quad (rtx *operands)
14403 if (REG_P (operands[0]))
14405 /* Load, or reg->reg move. */
14407 if (MEM_P (operands[1]))
14409 switch (GET_CODE (XEXP (operands[1], 0)))
14412 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14417 output_asm_insn ("adr%?\t%0, %1", operands);
14418 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14422 gcc_unreachable ();
14430 gcc_assert (REG_P (operands[1]));
14432 dest = REGNO (operands[0]);
14433 src = REGNO (operands[1]);
14435 /* This seems pretty dumb, but hopefully GCC won't try to do it
14438 for (i = 0; i < 4; i++)
14440 ops[0] = gen_rtx_REG (SImode, dest + i);
14441 ops[1] = gen_rtx_REG (SImode, src + i);
14442 output_asm_insn ("mov%?\t%0, %1", ops);
14445 for (i = 3; i >= 0; i--)
14447 ops[0] = gen_rtx_REG (SImode, dest + i);
14448 ops[1] = gen_rtx_REG (SImode, src + i);
14449 output_asm_insn ("mov%?\t%0, %1", ops);
14455 gcc_assert (MEM_P (operands[0]));
14456 gcc_assert (REG_P (operands[1]));
14457 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14459 switch (GET_CODE (XEXP (operands[0], 0)))
14462 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14466 gcc_unreachable ();
14473 /* Output a VFP load or store instruction. */
14476 output_move_vfp (rtx *operands)
14478 rtx reg, mem, addr, ops[2];
14479 int load = REG_P (operands[0]);
14480 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14481 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14484 enum machine_mode mode;
14486 reg = operands[!load];
14487 mem = operands[load];
14489 mode = GET_MODE (reg);
14491 gcc_assert (REG_P (reg));
14492 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14493 gcc_assert (mode == SFmode
14497 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14498 gcc_assert (MEM_P (mem));
14500 addr = XEXP (mem, 0);
14502 switch (GET_CODE (addr))
14505 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14506 ops[0] = XEXP (addr, 0);
14511 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14512 ops[0] = XEXP (addr, 0);
14517 templ = "f%s%c%%?\t%%%s0, %%1%s";
14523 sprintf (buff, templ,
14524 load ? "ld" : "st",
14527 integer_p ? "\t%@ int" : "");
14528 output_asm_insn (buff, ops);
14533 /* Output a Neon quad-word load or store, or a load or store for
14534 larger structure modes.
14536 WARNING: The ordering of elements is weird in big-endian mode,
14537 because we use VSTM, as required by the EABI. GCC RTL defines
14538 element ordering based on in-memory order. This can be differ
14539 from the architectural ordering of elements within a NEON register.
14540 The intrinsics defined in arm_neon.h use the NEON register element
14541 ordering, not the GCC RTL element ordering.
14543 For example, the in-memory ordering of a big-endian a quadword
14544 vector with 16-bit elements when stored from register pair {d0,d1}
14545 will be (lowest address first, d0[N] is NEON register element N):
14547 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14549 When necessary, quadword registers (dN, dN+1) are moved to ARM
14550 registers from rN in the order:
14552 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14554 So that STM/LDM can be used on vectors in ARM registers, and the
14555 same memory layout will result as if VSTM/VLDM were used. */
14558 output_move_neon (rtx *operands)
14560 rtx reg, mem, addr, ops[2];
14561 int regno, load = REG_P (operands[0]);
14564 enum machine_mode mode;
14566 reg = operands[!load];
14567 mem = operands[load];
14569 mode = GET_MODE (reg);
14571 gcc_assert (REG_P (reg));
14572 regno = REGNO (reg);
14573 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14574 || NEON_REGNO_OK_FOR_QUAD (regno));
14575 gcc_assert (VALID_NEON_DREG_MODE (mode)
14576 || VALID_NEON_QREG_MODE (mode)
14577 || VALID_NEON_STRUCT_MODE (mode));
14578 gcc_assert (MEM_P (mem));
14580 addr = XEXP (mem, 0);
14582 /* Strip off const from addresses like (const (plus (...))). */
14583 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14584 addr = XEXP (addr, 0);
14586 switch (GET_CODE (addr))
14589 templ = "v%smia%%?\t%%0!, %%h1";
14590 ops[0] = XEXP (addr, 0);
14595 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14596 templ = "v%smdb%%?\t%%0!, %%h1";
14597 ops[0] = XEXP (addr, 0);
14602 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14603 gcc_unreachable ();
14608 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14611 for (i = 0; i < nregs; i++)
14613 /* We're only using DImode here because it's a convenient size. */
14614 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14615 ops[1] = adjust_address (mem, DImode, 8 * i);
14616 if (reg_overlap_mentioned_p (ops[0], mem))
14618 gcc_assert (overlap == -1);
14623 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14624 output_asm_insn (buff, ops);
14629 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14630 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14631 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14632 output_asm_insn (buff, ops);
14639 templ = "v%smia%%?\t%%m0, %%h1";
14644 sprintf (buff, templ, load ? "ld" : "st");
14645 output_asm_insn (buff, ops);
14650 /* Compute and return the length of neon_mov<mode>, where <mode> is
14651 one of VSTRUCT modes: EI, OI, CI or XI. */
14653 arm_attr_length_move_neon (rtx insn)
14655 rtx reg, mem, addr;
14657 enum machine_mode mode;
14659 extract_insn_cached (insn);
14661 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14663 mode = GET_MODE (recog_data.operand[0]);
14674 gcc_unreachable ();
14678 load = REG_P (recog_data.operand[0]);
14679 reg = recog_data.operand[!load];
14680 mem = recog_data.operand[load];
14682 gcc_assert (MEM_P (mem));
14684 mode = GET_MODE (reg);
14685 addr = XEXP (mem, 0);
14687 /* Strip off const from addresses like (const (plus (...))). */
14688 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14689 addr = XEXP (addr, 0);
14691 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14693 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14700 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14704 arm_address_offset_is_imm (rtx insn)
14708 extract_insn_cached (insn);
14710 if (REG_P (recog_data.operand[0]))
14713 mem = recog_data.operand[0];
14715 gcc_assert (MEM_P (mem));
14717 addr = XEXP (mem, 0);
14719 if (GET_CODE (addr) == REG
14720 || (GET_CODE (addr) == PLUS
14721 && GET_CODE (XEXP (addr, 0)) == REG
14722 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14728 /* Output an ADD r, s, #n where n may be too big for one instruction.
14729 If adding zero to one register, output nothing. */
14731 output_add_immediate (rtx *operands)
14733 HOST_WIDE_INT n = INTVAL (operands[2]);
14735 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14738 output_multi_immediate (operands,
14739 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14742 output_multi_immediate (operands,
14743 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14750 /* Output a multiple immediate operation.
14751 OPERANDS is the vector of operands referred to in the output patterns.
14752 INSTR1 is the output pattern to use for the first constant.
14753 INSTR2 is the output pattern to use for subsequent constants.
14754 IMMED_OP is the index of the constant slot in OPERANDS.
14755 N is the constant value. */
14756 static const char *
14757 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14758 int immed_op, HOST_WIDE_INT n)
14760 #if HOST_BITS_PER_WIDE_INT > 32
14766 /* Quick and easy output. */
14767 operands[immed_op] = const0_rtx;
14768 output_asm_insn (instr1, operands);
14773 const char * instr = instr1;
14775 /* Note that n is never zero here (which would give no output). */
14776 for (i = 0; i < 32; i += 2)
14780 operands[immed_op] = GEN_INT (n & (255 << i));
14781 output_asm_insn (instr, operands);
14791 /* Return the name of a shifter operation. */
14792 static const char *
14793 arm_shift_nmem(enum rtx_code code)
14798 return ARM_LSL_NAME;
14814 /* Return the appropriate ARM instruction for the operation code.
14815 The returned result should not be overwritten. OP is the rtx of the
14816 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14819 arithmetic_instr (rtx op, int shift_first_arg)
14821 switch (GET_CODE (op))
14827 return shift_first_arg ? "rsb" : "sub";
14842 return arm_shift_nmem(GET_CODE(op));
14845 gcc_unreachable ();
14849 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14850 for the operation code. The returned result should not be overwritten.
14851 OP is the rtx code of the shift.
14852 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14854 static const char *
14855 shift_op (rtx op, HOST_WIDE_INT *amountp)
14858 enum rtx_code code = GET_CODE (op);
14860 switch (GET_CODE (XEXP (op, 1)))
14868 *amountp = INTVAL (XEXP (op, 1));
14872 gcc_unreachable ();
14878 gcc_assert (*amountp != -1);
14879 *amountp = 32 - *amountp;
14882 /* Fall through. */
14888 mnem = arm_shift_nmem(code);
14892 /* We never have to worry about the amount being other than a
14893 power of 2, since this case can never be reloaded from a reg. */
14894 gcc_assert (*amountp != -1);
14895 *amountp = int_log2 (*amountp);
14896 return ARM_LSL_NAME;
14899 gcc_unreachable ();
14902 if (*amountp != -1)
14904 /* This is not 100% correct, but follows from the desire to merge
14905 multiplication by a power of 2 with the recognizer for a
14906 shift. >=32 is not a valid shift for "lsl", so we must try and
14907 output a shift that produces the correct arithmetical result.
14908 Using lsr #32 is identical except for the fact that the carry bit
14909 is not set correctly if we set the flags; but we never use the
14910 carry bit from such an operation, so we can ignore that. */
14911 if (code == ROTATERT)
14912 /* Rotate is just modulo 32. */
14914 else if (*amountp != (*amountp & 31))
14916 if (code == ASHIFT)
14921 /* Shifts of 0 are no-ops. */
14929 /* Obtain the shift from the POWER of two. */
14931 static HOST_WIDE_INT
14932 int_log2 (HOST_WIDE_INT power)
14934 HOST_WIDE_INT shift = 0;
14936 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14938 gcc_assert (shift <= 31);
14945 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14946 because /bin/as is horribly restrictive. The judgement about
14947 whether or not each character is 'printable' (and can be output as
14948 is) or not (and must be printed with an octal escape) must be made
14949 with reference to the *host* character set -- the situation is
14950 similar to that discussed in the comments above pp_c_char in
14951 c-pretty-print.c. */
14953 #define MAX_ASCII_LEN 51
14956 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14959 int len_so_far = 0;
14961 fputs ("\t.ascii\t\"", stream);
14963 for (i = 0; i < len; i++)
14967 if (len_so_far >= MAX_ASCII_LEN)
14969 fputs ("\"\n\t.ascii\t\"", stream);
14975 if (c == '\\' || c == '\"')
14977 putc ('\\', stream);
14985 fprintf (stream, "\\%03o", c);
14990 fputs ("\"\n", stream);
14993 /* Compute the register save mask for registers 0 through 12
14994 inclusive. This code is used by arm_compute_save_reg_mask. */
14996 static unsigned long
14997 arm_compute_save_reg0_reg12_mask (void)
14999 unsigned long func_type = arm_current_func_type ();
15000 unsigned long save_reg_mask = 0;
15003 if (IS_INTERRUPT (func_type))
15005 unsigned int max_reg;
15006 /* Interrupt functions must not corrupt any registers,
15007 even call clobbered ones. If this is a leaf function
15008 we can just examine the registers used by the RTL, but
15009 otherwise we have to assume that whatever function is
15010 called might clobber anything, and so we have to save
15011 all the call-clobbered registers as well. */
15012 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15013 /* FIQ handlers have registers r8 - r12 banked, so
15014 we only need to check r0 - r7, Normal ISRs only
15015 bank r14 and r15, so we must check up to r12.
15016 r13 is the stack pointer which is always preserved,
15017 so we do not need to consider it here. */
15022 for (reg = 0; reg <= max_reg; reg++)
15023 if (df_regs_ever_live_p (reg)
15024 || (! current_function_is_leaf && call_used_regs[reg]))
15025 save_reg_mask |= (1 << reg);
15027 /* Also save the pic base register if necessary. */
15029 && !TARGET_SINGLE_PIC_BASE
15030 && arm_pic_register != INVALID_REGNUM
15031 && crtl->uses_pic_offset_table)
15032 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15034 else if (IS_VOLATILE(func_type))
15036 /* For noreturn functions we historically omitted register saves
15037 altogether. However this really messes up debugging. As a
15038 compromise save just the frame pointers. Combined with the link
15039 register saved elsewhere this should be sufficient to get
15041 if (frame_pointer_needed)
15042 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15043 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15044 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15045 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15046 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15050 /* In the normal case we only need to save those registers
15051 which are call saved and which are used by this function. */
15052 for (reg = 0; reg <= 11; reg++)
15053 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15054 save_reg_mask |= (1 << reg);
15056 /* Handle the frame pointer as a special case. */
15057 if (frame_pointer_needed)
15058 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15060 /* If we aren't loading the PIC register,
15061 don't stack it even though it may be live. */
15063 && !TARGET_SINGLE_PIC_BASE
15064 && arm_pic_register != INVALID_REGNUM
15065 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15066 || crtl->uses_pic_offset_table))
15067 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15069 /* The prologue will copy SP into R0, so save it. */
15070 if (IS_STACKALIGN (func_type))
15071 save_reg_mask |= 1;
15074 /* Save registers so the exception handler can modify them. */
15075 if (crtl->calls_eh_return)
15081 reg = EH_RETURN_DATA_REGNO (i);
15082 if (reg == INVALID_REGNUM)
15084 save_reg_mask |= 1 << reg;
15088 return save_reg_mask;
15092 /* Compute the number of bytes used to store the static chain register on the
15093 stack, above the stack frame. We need to know this accurately to get the
15094 alignment of the rest of the stack frame correct. */
15096 static int arm_compute_static_chain_stack_bytes (void)
15098 unsigned long func_type = arm_current_func_type ();
15099 int static_chain_stack_bytes = 0;
15101 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15102 IS_NESTED (func_type) &&
15103 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15104 static_chain_stack_bytes = 4;
15106 return static_chain_stack_bytes;
15110 /* Compute a bit mask of which registers need to be
15111 saved on the stack for the current function.
15112 This is used by arm_get_frame_offsets, which may add extra registers. */
15114 static unsigned long
15115 arm_compute_save_reg_mask (void)
15117 unsigned int save_reg_mask = 0;
15118 unsigned long func_type = arm_current_func_type ();
15121 if (IS_NAKED (func_type))
15122 /* This should never really happen. */
15125 /* If we are creating a stack frame, then we must save the frame pointer,
15126 IP (which will hold the old stack pointer), LR and the PC. */
15127 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15129 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15132 | (1 << PC_REGNUM);
15134 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15136 /* Decide if we need to save the link register.
15137 Interrupt routines have their own banked link register,
15138 so they never need to save it.
15139 Otherwise if we do not use the link register we do not need to save
15140 it. If we are pushing other registers onto the stack however, we
15141 can save an instruction in the epilogue by pushing the link register
15142 now and then popping it back into the PC. This incurs extra memory
15143 accesses though, so we only do it when optimizing for size, and only
15144 if we know that we will not need a fancy return sequence. */
15145 if (df_regs_ever_live_p (LR_REGNUM)
15148 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15149 && !crtl->calls_eh_return))
15150 save_reg_mask |= 1 << LR_REGNUM;
15152 if (cfun->machine->lr_save_eliminated)
15153 save_reg_mask &= ~ (1 << LR_REGNUM);
15155 if (TARGET_REALLY_IWMMXT
15156 && ((bit_count (save_reg_mask)
15157 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15158 arm_compute_static_chain_stack_bytes())
15161 /* The total number of registers that are going to be pushed
15162 onto the stack is odd. We need to ensure that the stack
15163 is 64-bit aligned before we start to save iWMMXt registers,
15164 and also before we start to create locals. (A local variable
15165 might be a double or long long which we will load/store using
15166 an iWMMXt instruction). Therefore we need to push another
15167 ARM register, so that the stack will be 64-bit aligned. We
15168 try to avoid using the arg registers (r0 -r3) as they might be
15169 used to pass values in a tail call. */
15170 for (reg = 4; reg <= 12; reg++)
15171 if ((save_reg_mask & (1 << reg)) == 0)
15175 save_reg_mask |= (1 << reg);
15178 cfun->machine->sibcall_blocked = 1;
15179 save_reg_mask |= (1 << 3);
15183 /* We may need to push an additional register for use initializing the
15184 PIC base register. */
15185 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15186 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15188 reg = thumb_find_work_register (1 << 4);
15189 if (!call_used_regs[reg])
15190 save_reg_mask |= (1 << reg);
15193 return save_reg_mask;
15197 /* Compute a bit mask of which registers need to be
15198 saved on the stack for the current function. */
15199 static unsigned long
15200 thumb1_compute_save_reg_mask (void)
15202 unsigned long mask;
15206 for (reg = 0; reg < 12; reg ++)
15207 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15211 && !TARGET_SINGLE_PIC_BASE
15212 && arm_pic_register != INVALID_REGNUM
15213 && crtl->uses_pic_offset_table)
15214 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15216 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15217 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15218 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15220 /* LR will also be pushed if any lo regs are pushed. */
15221 if (mask & 0xff || thumb_force_lr_save ())
15222 mask |= (1 << LR_REGNUM);
15224 /* Make sure we have a low work register if we need one.
15225 We will need one if we are going to push a high register,
15226 but we are not currently intending to push a low register. */
15227 if ((mask & 0xff) == 0
15228 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15230 /* Use thumb_find_work_register to choose which register
15231 we will use. If the register is live then we will
15232 have to push it. Use LAST_LO_REGNUM as our fallback
15233 choice for the register to select. */
15234 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15235 /* Make sure the register returned by thumb_find_work_register is
15236 not part of the return value. */
15237 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15238 reg = LAST_LO_REGNUM;
15240 if (! call_used_regs[reg])
15244 /* The 504 below is 8 bytes less than 512 because there are two possible
15245 alignment words. We can't tell here if they will be present or not so we
15246 have to play it safe and assume that they are. */
15247 if ((CALLER_INTERWORKING_SLOT_SIZE +
15248 ROUND_UP_WORD (get_frame_size ()) +
15249 crtl->outgoing_args_size) >= 504)
15251 /* This is the same as the code in thumb1_expand_prologue() which
15252 determines which register to use for stack decrement. */
15253 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15254 if (mask & (1 << reg))
15257 if (reg > LAST_LO_REGNUM)
15259 /* Make sure we have a register available for stack decrement. */
15260 mask |= 1 << LAST_LO_REGNUM;
15268 /* Return the number of bytes required to save VFP registers. */
15270 arm_get_vfp_saved_size (void)
15272 unsigned int regno;
15277 /* Space for saved VFP registers. */
15278 if (TARGET_HARD_FLOAT && TARGET_VFP)
15281 for (regno = FIRST_VFP_REGNUM;
15282 regno < LAST_VFP_REGNUM;
15285 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15286 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15290 /* Workaround ARM10 VFPr1 bug. */
15291 if (count == 2 && !arm_arch6)
15293 saved += count * 8;
15302 if (count == 2 && !arm_arch6)
15304 saved += count * 8;
15311 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15312 everything bar the final return instruction. */
15314 output_return_instruction (rtx operand, int really_return, int reverse)
15316 char conditional[10];
15319 unsigned long live_regs_mask;
15320 unsigned long func_type;
15321 arm_stack_offsets *offsets;
15323 func_type = arm_current_func_type ();
15325 if (IS_NAKED (func_type))
15328 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15330 /* If this function was declared non-returning, and we have
15331 found a tail call, then we have to trust that the called
15332 function won't return. */
15337 /* Otherwise, trap an attempted return by aborting. */
15339 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15341 assemble_external_libcall (ops[1]);
15342 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15348 gcc_assert (!cfun->calls_alloca || really_return);
15350 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15352 cfun->machine->return_used_this_function = 1;
15354 offsets = arm_get_frame_offsets ();
15355 live_regs_mask = offsets->saved_regs_mask;
15357 if (live_regs_mask)
15359 const char * return_reg;
15361 /* If we do not have any special requirements for function exit
15362 (e.g. interworking) then we can load the return address
15363 directly into the PC. Otherwise we must load it into LR. */
15365 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15366 return_reg = reg_names[PC_REGNUM];
15368 return_reg = reg_names[LR_REGNUM];
15370 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15372 /* There are three possible reasons for the IP register
15373 being saved. 1) a stack frame was created, in which case
15374 IP contains the old stack pointer, or 2) an ISR routine
15375 corrupted it, or 3) it was saved to align the stack on
15376 iWMMXt. In case 1, restore IP into SP, otherwise just
15378 if (frame_pointer_needed)
15380 live_regs_mask &= ~ (1 << IP_REGNUM);
15381 live_regs_mask |= (1 << SP_REGNUM);
15384 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15387 /* On some ARM architectures it is faster to use LDR rather than
15388 LDM to load a single register. On other architectures, the
15389 cost is the same. In 26 bit mode, or for exception handlers,
15390 we have to use LDM to load the PC so that the CPSR is also
15392 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15393 if (live_regs_mask == (1U << reg))
15396 if (reg <= LAST_ARM_REGNUM
15397 && (reg != LR_REGNUM
15399 || ! IS_INTERRUPT (func_type)))
15401 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15402 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15409 /* Generate the load multiple instruction to restore the
15410 registers. Note we can get here, even if
15411 frame_pointer_needed is true, but only if sp already
15412 points to the base of the saved core registers. */
15413 if (live_regs_mask & (1 << SP_REGNUM))
15415 unsigned HOST_WIDE_INT stack_adjust;
15417 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15418 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15420 if (stack_adjust && arm_arch5 && TARGET_ARM)
15421 if (TARGET_UNIFIED_ASM)
15422 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15424 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15427 /* If we can't use ldmib (SA110 bug),
15428 then try to pop r3 instead. */
15430 live_regs_mask |= 1 << 3;
15432 if (TARGET_UNIFIED_ASM)
15433 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15435 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15439 if (TARGET_UNIFIED_ASM)
15440 sprintf (instr, "pop%s\t{", conditional);
15442 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15444 p = instr + strlen (instr);
15446 for (reg = 0; reg <= SP_REGNUM; reg++)
15447 if (live_regs_mask & (1 << reg))
15449 int l = strlen (reg_names[reg]);
15455 memcpy (p, ", ", 2);
15459 memcpy (p, "%|", 2);
15460 memcpy (p + 2, reg_names[reg], l);
15464 if (live_regs_mask & (1 << LR_REGNUM))
15466 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15467 /* If returning from an interrupt, restore the CPSR. */
15468 if (IS_INTERRUPT (func_type))
15475 output_asm_insn (instr, & operand);
15477 /* See if we need to generate an extra instruction to
15478 perform the actual function return. */
15480 && func_type != ARM_FT_INTERWORKED
15481 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15483 /* The return has already been handled
15484 by loading the LR into the PC. */
15491 switch ((int) ARM_FUNC_TYPE (func_type))
15495 /* ??? This is wrong for unified assembly syntax. */
15496 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15499 case ARM_FT_INTERWORKED:
15500 sprintf (instr, "bx%s\t%%|lr", conditional);
15503 case ARM_FT_EXCEPTION:
15504 /* ??? This is wrong for unified assembly syntax. */
15505 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15509 /* Use bx if it's available. */
15510 if (arm_arch5 || arm_arch4t)
15511 sprintf (instr, "bx%s\t%%|lr", conditional);
15513 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15517 output_asm_insn (instr, & operand);
15523 /* Write the function name into the code section, directly preceding
15524 the function prologue.
15526 Code will be output similar to this:
15528 .ascii "arm_poke_function_name", 0
15531 .word 0xff000000 + (t1 - t0)
15532 arm_poke_function_name
15534 stmfd sp!, {fp, ip, lr, pc}
15537 When performing a stack backtrace, code can inspect the value
15538 of 'pc' stored at 'fp' + 0. If the trace function then looks
15539 at location pc - 12 and the top 8 bits are set, then we know
15540 that there is a function name embedded immediately preceding this
15541 location and has length ((pc[-3]) & 0xff000000).
15543 We assume that pc is declared as a pointer to an unsigned long.
15545 It is of no benefit to output the function name if we are assembling
15546 a leaf function. These function types will not contain a stack
15547 backtrace structure, therefore it is not possible to determine the
15550 arm_poke_function_name (FILE *stream, const char *name)
15552 unsigned long alignlength;
15553 unsigned long length;
15556 length = strlen (name) + 1;
15557 alignlength = ROUND_UP_WORD (length);
15559 ASM_OUTPUT_ASCII (stream, name, length);
15560 ASM_OUTPUT_ALIGN (stream, 2);
15561 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15562 assemble_aligned_integer (UNITS_PER_WORD, x);
15565 /* Place some comments into the assembler stream
15566 describing the current function. */
15568 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15570 unsigned long func_type;
15572 /* ??? Do we want to print some of the below anyway? */
15576 /* Sanity check. */
15577 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15579 func_type = arm_current_func_type ();
15581 switch ((int) ARM_FUNC_TYPE (func_type))
15584 case ARM_FT_NORMAL:
15586 case ARM_FT_INTERWORKED:
15587 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15590 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15593 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15595 case ARM_FT_EXCEPTION:
15596 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15600 if (IS_NAKED (func_type))
15601 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15603 if (IS_VOLATILE (func_type))
15604 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15606 if (IS_NESTED (func_type))
15607 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15608 if (IS_STACKALIGN (func_type))
15609 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15611 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15613 crtl->args.pretend_args_size, frame_size);
15615 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15616 frame_pointer_needed,
15617 cfun->machine->uses_anonymous_args);
15619 if (cfun->machine->lr_save_eliminated)
15620 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15622 if (crtl->calls_eh_return)
15623 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15628 arm_output_epilogue (rtx sibling)
15631 unsigned long saved_regs_mask;
15632 unsigned long func_type;
15633 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15634 frame that is $fp + 4 for a non-variadic function. */
15635 int floats_offset = 0;
15637 FILE * f = asm_out_file;
15638 unsigned int lrm_count = 0;
15639 int really_return = (sibling == NULL);
15641 arm_stack_offsets *offsets;
15643 /* If we have already generated the return instruction
15644 then it is futile to generate anything else. */
15645 if (use_return_insn (FALSE, sibling) &&
15646 (cfun->machine->return_used_this_function != 0))
15649 func_type = arm_current_func_type ();
15651 if (IS_NAKED (func_type))
15652 /* Naked functions don't have epilogues. */
15655 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15659 /* A volatile function should never return. Call abort. */
15660 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15661 assemble_external_libcall (op);
15662 output_asm_insn ("bl\t%a0", &op);
15667 /* If we are throwing an exception, then we really must be doing a
15668 return, so we can't tail-call. */
15669 gcc_assert (!crtl->calls_eh_return || really_return);
15671 offsets = arm_get_frame_offsets ();
15672 saved_regs_mask = offsets->saved_regs_mask;
15675 lrm_count = bit_count (saved_regs_mask);
15677 floats_offset = offsets->saved_args;
15678 /* Compute how far away the floats will be. */
15679 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15680 if (saved_regs_mask & (1 << reg))
15681 floats_offset += 4;
15683 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15685 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15686 int vfp_offset = offsets->frame;
15688 if (TARGET_FPA_EMU2)
15690 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15691 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15693 floats_offset += 12;
15694 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15695 reg, FP_REGNUM, floats_offset - vfp_offset);
15700 start_reg = LAST_FPA_REGNUM;
15702 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15704 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15706 floats_offset += 12;
15708 /* We can't unstack more than four registers at once. */
15709 if (start_reg - reg == 3)
15711 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15712 reg, FP_REGNUM, floats_offset - vfp_offset);
15713 start_reg = reg - 1;
15718 if (reg != start_reg)
15719 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15720 reg + 1, start_reg - reg,
15721 FP_REGNUM, floats_offset - vfp_offset);
15722 start_reg = reg - 1;
15726 /* Just in case the last register checked also needs unstacking. */
15727 if (reg != start_reg)
15728 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15729 reg + 1, start_reg - reg,
15730 FP_REGNUM, floats_offset - vfp_offset);
15733 if (TARGET_HARD_FLOAT && TARGET_VFP)
15737 /* The fldmd insns do not have base+offset addressing
15738 modes, so we use IP to hold the address. */
15739 saved_size = arm_get_vfp_saved_size ();
15741 if (saved_size > 0)
15743 floats_offset += saved_size;
15744 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15745 FP_REGNUM, floats_offset - vfp_offset);
15747 start_reg = FIRST_VFP_REGNUM;
15748 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15750 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15751 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15753 if (start_reg != reg)
15754 vfp_output_fldmd (f, IP_REGNUM,
15755 (start_reg - FIRST_VFP_REGNUM) / 2,
15756 (reg - start_reg) / 2);
15757 start_reg = reg + 2;
15760 if (start_reg != reg)
15761 vfp_output_fldmd (f, IP_REGNUM,
15762 (start_reg - FIRST_VFP_REGNUM) / 2,
15763 (reg - start_reg) / 2);
15768 /* The frame pointer is guaranteed to be non-double-word aligned.
15769 This is because it is set to (old_stack_pointer - 4) and the
15770 old_stack_pointer was double word aligned. Thus the offset to
15771 the iWMMXt registers to be loaded must also be non-double-word
15772 sized, so that the resultant address *is* double-word aligned.
15773 We can ignore floats_offset since that was already included in
15774 the live_regs_mask. */
15775 lrm_count += (lrm_count % 2 ? 2 : 1);
15777 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15778 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15780 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15781 reg, FP_REGNUM, lrm_count * 4);
15786 /* saved_regs_mask should contain the IP, which at the time of stack
15787 frame generation actually contains the old stack pointer. So a
15788 quick way to unwind the stack is just pop the IP register directly
15789 into the stack pointer. */
15790 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15791 saved_regs_mask &= ~ (1 << IP_REGNUM);
15792 saved_regs_mask |= (1 << SP_REGNUM);
15794 /* There are two registers left in saved_regs_mask - LR and PC. We
15795 only need to restore the LR register (the return address), but to
15796 save time we can load it directly into the PC, unless we need a
15797 special function exit sequence, or we are not really returning. */
15799 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15800 && !crtl->calls_eh_return)
15801 /* Delete the LR from the register mask, so that the LR on
15802 the stack is loaded into the PC in the register mask. */
15803 saved_regs_mask &= ~ (1 << LR_REGNUM);
15805 saved_regs_mask &= ~ (1 << PC_REGNUM);
15807 /* We must use SP as the base register, because SP is one of the
15808 registers being restored. If an interrupt or page fault
15809 happens in the ldm instruction, the SP might or might not
15810 have been restored. That would be bad, as then SP will no
15811 longer indicate the safe area of stack, and we can get stack
15812 corruption. Using SP as the base register means that it will
15813 be reset correctly to the original value, should an interrupt
15814 occur. If the stack pointer already points at the right
15815 place, then omit the subtraction. */
15816 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15817 || cfun->calls_alloca)
15818 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15819 4 * bit_count (saved_regs_mask));
15820 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15822 if (IS_INTERRUPT (func_type))
15823 /* Interrupt handlers will have pushed the
15824 IP onto the stack, so restore it now. */
15825 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15829 /* This branch is executed for ARM mode (non-apcs frames) and
15830 Thumb-2 mode. Frame layout is essentially the same for those
15831 cases, except that in ARM mode frame pointer points to the
15832 first saved register, while in Thumb-2 mode the frame pointer points
15833 to the last saved register.
15835 It is possible to make frame pointer point to last saved
15836 register in both cases, and remove some conditionals below.
15837 That means that fp setup in prologue would be just "mov fp, sp"
15838 and sp restore in epilogue would be just "mov sp, fp", whereas
15839 now we have to use add/sub in those cases. However, the value
15840 of that would be marginal, as both mov and add/sub are 32-bit
15841 in ARM mode, and it would require extra conditionals
15842 in arm_expand_prologue to distingish ARM-apcs-frame case
15843 (where frame pointer is required to point at first register)
15844 and ARM-non-apcs-frame. Therefore, such change is postponed
15845 until real need arise. */
15846 unsigned HOST_WIDE_INT amount;
15848 /* Restore stack pointer if necessary. */
15849 if (TARGET_ARM && frame_pointer_needed)
15851 operands[0] = stack_pointer_rtx;
15852 operands[1] = hard_frame_pointer_rtx;
15854 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15855 output_add_immediate (operands);
15859 if (frame_pointer_needed)
15861 /* For Thumb-2 restore sp from the frame pointer.
15862 Operand restrictions mean we have to incrememnt FP, then copy
15864 amount = offsets->locals_base - offsets->saved_regs;
15865 operands[0] = hard_frame_pointer_rtx;
15869 unsigned long count;
15870 operands[0] = stack_pointer_rtx;
15871 amount = offsets->outgoing_args - offsets->saved_regs;
15872 /* pop call clobbered registers if it avoids a
15873 separate stack adjustment. */
15874 count = offsets->saved_regs - offsets->saved_args;
15877 && !crtl->calls_eh_return
15878 && bit_count(saved_regs_mask) * 4 == count
15879 && !IS_INTERRUPT (func_type)
15880 && !IS_STACKALIGN (func_type)
15881 && !crtl->tail_call_emit)
15883 unsigned long mask;
15884 /* Preserve return values, of any size. */
15885 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15887 mask &= ~saved_regs_mask;
15889 while (bit_count (mask) * 4 > amount)
15891 while ((mask & (1 << reg)) == 0)
15893 mask &= ~(1 << reg);
15895 if (bit_count (mask) * 4 == amount) {
15897 saved_regs_mask |= mask;
15904 operands[1] = operands[0];
15905 operands[2] = GEN_INT (amount);
15906 output_add_immediate (operands);
15908 if (frame_pointer_needed)
15909 asm_fprintf (f, "\tmov\t%r, %r\n",
15910 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15913 if (TARGET_FPA_EMU2)
15915 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15916 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15917 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15922 start_reg = FIRST_FPA_REGNUM;
15924 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15926 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15928 if (reg - start_reg == 3)
15930 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15931 start_reg, SP_REGNUM);
15932 start_reg = reg + 1;
15937 if (reg != start_reg)
15938 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15939 start_reg, reg - start_reg,
15942 start_reg = reg + 1;
15946 /* Just in case the last register checked also needs unstacking. */
15947 if (reg != start_reg)
15948 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15949 start_reg, reg - start_reg, SP_REGNUM);
15952 if (TARGET_HARD_FLOAT && TARGET_VFP)
15954 int end_reg = LAST_VFP_REGNUM + 1;
15956 /* Scan the registers in reverse order. We need to match
15957 any groupings made in the prologue and generate matching
15959 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15961 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15962 && (!df_regs_ever_live_p (reg + 1)
15963 || call_used_regs[reg + 1]))
15965 if (end_reg > reg + 2)
15966 vfp_output_fldmd (f, SP_REGNUM,
15967 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15968 (end_reg - (reg + 2)) / 2);
15972 if (end_reg > reg + 2)
15973 vfp_output_fldmd (f, SP_REGNUM, 0,
15974 (end_reg - (reg + 2)) / 2);
15978 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15979 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15980 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15982 /* If we can, restore the LR into the PC. */
15983 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15984 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15985 && !IS_STACKALIGN (func_type)
15987 && crtl->args.pretend_args_size == 0
15988 && saved_regs_mask & (1 << LR_REGNUM)
15989 && !crtl->calls_eh_return)
15991 saved_regs_mask &= ~ (1 << LR_REGNUM);
15992 saved_regs_mask |= (1 << PC_REGNUM);
15993 rfe = IS_INTERRUPT (func_type);
15998 /* Load the registers off the stack. If we only have one register
15999 to load use the LDR instruction - it is faster. For Thumb-2
16000 always use pop and the assembler will pick the best instruction.*/
16001 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
16002 && !IS_INTERRUPT(func_type))
16004 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
16006 else if (saved_regs_mask)
16008 if (saved_regs_mask & (1 << SP_REGNUM))
16009 /* Note - write back to the stack register is not enabled
16010 (i.e. "ldmfd sp!..."). We know that the stack pointer is
16011 in the list of registers and if we add writeback the
16012 instruction becomes UNPREDICTABLE. */
16013 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
16015 else if (TARGET_ARM)
16016 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
16019 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
16022 if (crtl->args.pretend_args_size)
16024 /* Unwind the pre-pushed regs. */
16025 operands[0] = operands[1] = stack_pointer_rtx;
16026 operands[2] = GEN_INT (crtl->args.pretend_args_size);
16027 output_add_immediate (operands);
16031 /* We may have already restored PC directly from the stack. */
16032 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16035 /* Stack adjustment for exception handler. */
16036 if (crtl->calls_eh_return)
16037 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16038 ARM_EH_STACKADJ_REGNUM);
16040 /* Generate the return instruction. */
16041 switch ((int) ARM_FUNC_TYPE (func_type))
16045 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16048 case ARM_FT_EXCEPTION:
16049 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16052 case ARM_FT_INTERWORKED:
16053 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16057 if (IS_STACKALIGN (func_type))
16059 /* See comment in arm_expand_prologue. */
16060 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16062 if (arm_arch5 || arm_arch4t)
16063 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16065 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16073 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16074 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16076 arm_stack_offsets *offsets;
16082 /* Emit any call-via-reg trampolines that are needed for v4t support
16083 of call_reg and call_value_reg type insns. */
16084 for (regno = 0; regno < LR_REGNUM; regno++)
16086 rtx label = cfun->machine->call_via[regno];
16090 switch_to_section (function_section (current_function_decl));
16091 targetm.asm_out.internal_label (asm_out_file, "L",
16092 CODE_LABEL_NUMBER (label));
16093 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16097 /* ??? Probably not safe to set this here, since it assumes that a
16098 function will be emitted as assembly immediately after we generate
16099 RTL for it. This does not happen for inline functions. */
16100 cfun->machine->return_used_this_function = 0;
16102 else /* TARGET_32BIT */
16104 /* We need to take into account any stack-frame rounding. */
16105 offsets = arm_get_frame_offsets ();
16107 gcc_assert (!use_return_insn (FALSE, NULL)
16108 || (cfun->machine->return_used_this_function != 0)
16109 || offsets->saved_regs == offsets->outgoing_args
16110 || frame_pointer_needed);
16112 /* Reset the ARM-specific per-function variables. */
16113 after_arm_reorg = 0;
16117 /* Generate and emit an insn that we will recognize as a push_multi.
16118 Unfortunately, since this insn does not reflect very well the actual
16119 semantics of the operation, we need to annotate the insn for the benefit
16120 of DWARF2 frame unwind information. */
16122 emit_multi_reg_push (unsigned long mask)
16125 int num_dwarf_regs;
16129 int dwarf_par_index;
16132 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16133 if (mask & (1 << i))
16136 gcc_assert (num_regs && num_regs <= 16);
16138 /* We don't record the PC in the dwarf frame information. */
16139 num_dwarf_regs = num_regs;
16140 if (mask & (1 << PC_REGNUM))
16143 /* For the body of the insn we are going to generate an UNSPEC in
16144 parallel with several USEs. This allows the insn to be recognized
16145 by the push_multi pattern in the arm.md file.
16147 The body of the insn looks something like this:
16150 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16151 (const_int:SI <num>)))
16152 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16158 For the frame note however, we try to be more explicit and actually
16159 show each register being stored into the stack frame, plus a (single)
16160 decrement of the stack pointer. We do it this way in order to be
16161 friendly to the stack unwinding code, which only wants to see a single
16162 stack decrement per instruction. The RTL we generate for the note looks
16163 something like this:
16166 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16167 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16168 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16169 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16173 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16174 instead we'd have a parallel expression detailing all
16175 the stores to the various memory addresses so that debug
16176 information is more up-to-date. Remember however while writing
16177 this to take care of the constraints with the push instruction.
16179 Note also that this has to be taken care of for the VFP registers.
16181 For more see PR43399. */
16183 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16184 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16185 dwarf_par_index = 1;
16187 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16189 if (mask & (1 << i))
16191 reg = gen_rtx_REG (SImode, i);
16193 XVECEXP (par, 0, 0)
16194 = gen_rtx_SET (VOIDmode,
16197 gen_rtx_PRE_MODIFY (Pmode,
16200 (stack_pointer_rtx,
16203 gen_rtx_UNSPEC (BLKmode,
16204 gen_rtvec (1, reg),
16205 UNSPEC_PUSH_MULT));
16207 if (i != PC_REGNUM)
16209 tmp = gen_rtx_SET (VOIDmode,
16210 gen_frame_mem (SImode, stack_pointer_rtx),
16212 RTX_FRAME_RELATED_P (tmp) = 1;
16213 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16221 for (j = 1, i++; j < num_regs; i++)
16223 if (mask & (1 << i))
16225 reg = gen_rtx_REG (SImode, i);
16227 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16229 if (i != PC_REGNUM)
16232 = gen_rtx_SET (VOIDmode,
16235 plus_constant (stack_pointer_rtx,
16238 RTX_FRAME_RELATED_P (tmp) = 1;
16239 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16246 par = emit_insn (par);
16248 tmp = gen_rtx_SET (VOIDmode,
16250 plus_constant (stack_pointer_rtx, -4 * num_regs));
16251 RTX_FRAME_RELATED_P (tmp) = 1;
16252 XVECEXP (dwarf, 0, 0) = tmp;
16254 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16259 /* Calculate the size of the return value that is passed in registers. */
16261 arm_size_return_regs (void)
16263 enum machine_mode mode;
16265 if (crtl->return_rtx != 0)
16266 mode = GET_MODE (crtl->return_rtx);
16268 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16270 return GET_MODE_SIZE (mode);
16274 emit_sfm (int base_reg, int count)
16281 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16282 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16284 reg = gen_rtx_REG (XFmode, base_reg++);
16286 XVECEXP (par, 0, 0)
16287 = gen_rtx_SET (VOIDmode,
16290 gen_rtx_PRE_MODIFY (Pmode,
16293 (stack_pointer_rtx,
16296 gen_rtx_UNSPEC (BLKmode,
16297 gen_rtvec (1, reg),
16298 UNSPEC_PUSH_MULT));
16299 tmp = gen_rtx_SET (VOIDmode,
16300 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16301 RTX_FRAME_RELATED_P (tmp) = 1;
16302 XVECEXP (dwarf, 0, 1) = tmp;
16304 for (i = 1; i < count; i++)
16306 reg = gen_rtx_REG (XFmode, base_reg++);
16307 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16309 tmp = gen_rtx_SET (VOIDmode,
16310 gen_frame_mem (XFmode,
16311 plus_constant (stack_pointer_rtx,
16314 RTX_FRAME_RELATED_P (tmp) = 1;
16315 XVECEXP (dwarf, 0, i + 1) = tmp;
16318 tmp = gen_rtx_SET (VOIDmode,
16320 plus_constant (stack_pointer_rtx, -12 * count));
16322 RTX_FRAME_RELATED_P (tmp) = 1;
16323 XVECEXP (dwarf, 0, 0) = tmp;
16325 par = emit_insn (par);
16326 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16332 /* Return true if the current function needs to save/restore LR. */
16335 thumb_force_lr_save (void)
16337 return !cfun->machine->lr_save_eliminated
16338 && (!leaf_function_p ()
16339 || thumb_far_jump_used_p ()
16340 || df_regs_ever_live_p (LR_REGNUM));
16344 /* Return true if r3 is used by any of the tail call insns in the
16345 current function. */
16348 any_sibcall_uses_r3 (void)
16353 if (!crtl->tail_call_emit)
16355 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16356 if (e->flags & EDGE_SIBCALL)
16358 rtx call = BB_END (e->src);
16359 if (!CALL_P (call))
16360 call = prev_nonnote_nondebug_insn (call);
16361 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16362 if (find_regno_fusage (call, USE, 3))
16369 /* Compute the distance from register FROM to register TO.
16370 These can be the arg pointer (26), the soft frame pointer (25),
16371 the stack pointer (13) or the hard frame pointer (11).
16372 In thumb mode r7 is used as the soft frame pointer, if needed.
16373 Typical stack layout looks like this:
16375 old stack pointer -> | |
16378 | | saved arguments for
16379 | | vararg functions
16382 hard FP & arg pointer -> | | \
16390 soft frame pointer -> | | /
16395 locals base pointer -> | | /
16400 current stack pointer -> | | /
16403 For a given function some or all of these stack components
16404 may not be needed, giving rise to the possibility of
16405 eliminating some of the registers.
16407 The values returned by this function must reflect the behavior
16408 of arm_expand_prologue() and arm_compute_save_reg_mask().
16410 The sign of the number returned reflects the direction of stack
16411 growth, so the values are positive for all eliminations except
16412 from the soft frame pointer to the hard frame pointer.
16414 SFP may point just inside the local variables block to ensure correct
16418 /* Calculate stack offsets. These are used to calculate register elimination
16419 offsets and in prologue/epilogue code. Also calculates which registers
16420 should be saved. */
16422 static arm_stack_offsets *
16423 arm_get_frame_offsets (void)
16425 struct arm_stack_offsets *offsets;
16426 unsigned long func_type;
16430 HOST_WIDE_INT frame_size;
16433 offsets = &cfun->machine->stack_offsets;
16435 /* We need to know if we are a leaf function. Unfortunately, it
16436 is possible to be called after start_sequence has been called,
16437 which causes get_insns to return the insns for the sequence,
16438 not the function, which will cause leaf_function_p to return
16439 the incorrect result.
16441 to know about leaf functions once reload has completed, and the
16442 frame size cannot be changed after that time, so we can safely
16443 use the cached value. */
16445 if (reload_completed)
16448 /* Initially this is the size of the local variables. It will translated
16449 into an offset once we have determined the size of preceding data. */
16450 frame_size = ROUND_UP_WORD (get_frame_size ());
16452 leaf = leaf_function_p ();
16454 /* Space for variadic functions. */
16455 offsets->saved_args = crtl->args.pretend_args_size;
16457 /* In Thumb mode this is incorrect, but never used. */
16458 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16459 arm_compute_static_chain_stack_bytes();
16463 unsigned int regno;
16465 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16466 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16467 saved = core_saved;
16469 /* We know that SP will be doubleword aligned on entry, and we must
16470 preserve that condition at any subroutine call. We also require the
16471 soft frame pointer to be doubleword aligned. */
16473 if (TARGET_REALLY_IWMMXT)
16475 /* Check for the call-saved iWMMXt registers. */
16476 for (regno = FIRST_IWMMXT_REGNUM;
16477 regno <= LAST_IWMMXT_REGNUM;
16479 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16483 func_type = arm_current_func_type ();
16484 if (! IS_VOLATILE (func_type))
16486 /* Space for saved FPA registers. */
16487 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16488 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16491 /* Space for saved VFP registers. */
16492 if (TARGET_HARD_FLOAT && TARGET_VFP)
16493 saved += arm_get_vfp_saved_size ();
16496 else /* TARGET_THUMB1 */
16498 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16499 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16500 saved = core_saved;
16501 if (TARGET_BACKTRACE)
16505 /* Saved registers include the stack frame. */
16506 offsets->saved_regs = offsets->saved_args + saved +
16507 arm_compute_static_chain_stack_bytes();
16508 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16509 /* A leaf function does not need any stack alignment if it has nothing
16511 if (leaf && frame_size == 0
16512 /* However if it calls alloca(), we have a dynamically allocated
16513 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16514 && ! cfun->calls_alloca)
16516 offsets->outgoing_args = offsets->soft_frame;
16517 offsets->locals_base = offsets->soft_frame;
16521 /* Ensure SFP has the correct alignment. */
16522 if (ARM_DOUBLEWORD_ALIGN
16523 && (offsets->soft_frame & 7))
16525 offsets->soft_frame += 4;
16526 /* Try to align stack by pushing an extra reg. Don't bother doing this
16527 when there is a stack frame as the alignment will be rolled into
16528 the normal stack adjustment. */
16529 if (frame_size + crtl->outgoing_args_size == 0)
16533 /* If it is safe to use r3, then do so. This sometimes
16534 generates better code on Thumb-2 by avoiding the need to
16535 use 32-bit push/pop instructions. */
16536 if (! any_sibcall_uses_r3 ()
16537 && arm_size_return_regs () <= 12
16538 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16543 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16545 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16554 offsets->saved_regs += 4;
16555 offsets->saved_regs_mask |= (1 << reg);
16560 offsets->locals_base = offsets->soft_frame + frame_size;
16561 offsets->outgoing_args = (offsets->locals_base
16562 + crtl->outgoing_args_size);
16564 if (ARM_DOUBLEWORD_ALIGN)
16566 /* Ensure SP remains doubleword aligned. */
16567 if (offsets->outgoing_args & 7)
16568 offsets->outgoing_args += 4;
16569 gcc_assert (!(offsets->outgoing_args & 7));
16576 /* Calculate the relative offsets for the different stack pointers. Positive
16577 offsets are in the direction of stack growth. */
16580 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16582 arm_stack_offsets *offsets;
16584 offsets = arm_get_frame_offsets ();
16586 /* OK, now we have enough information to compute the distances.
16587 There must be an entry in these switch tables for each pair
16588 of registers in ELIMINABLE_REGS, even if some of the entries
16589 seem to be redundant or useless. */
16592 case ARG_POINTER_REGNUM:
16595 case THUMB_HARD_FRAME_POINTER_REGNUM:
16598 case FRAME_POINTER_REGNUM:
16599 /* This is the reverse of the soft frame pointer
16600 to hard frame pointer elimination below. */
16601 return offsets->soft_frame - offsets->saved_args;
16603 case ARM_HARD_FRAME_POINTER_REGNUM:
16604 /* This is only non-zero in the case where the static chain register
16605 is stored above the frame. */
16606 return offsets->frame - offsets->saved_args - 4;
16608 case STACK_POINTER_REGNUM:
16609 /* If nothing has been pushed on the stack at all
16610 then this will return -4. This *is* correct! */
16611 return offsets->outgoing_args - (offsets->saved_args + 4);
16614 gcc_unreachable ();
16616 gcc_unreachable ();
16618 case FRAME_POINTER_REGNUM:
16621 case THUMB_HARD_FRAME_POINTER_REGNUM:
16624 case ARM_HARD_FRAME_POINTER_REGNUM:
16625 /* The hard frame pointer points to the top entry in the
16626 stack frame. The soft frame pointer to the bottom entry
16627 in the stack frame. If there is no stack frame at all,
16628 then they are identical. */
16630 return offsets->frame - offsets->soft_frame;
16632 case STACK_POINTER_REGNUM:
16633 return offsets->outgoing_args - offsets->soft_frame;
16636 gcc_unreachable ();
16638 gcc_unreachable ();
16641 /* You cannot eliminate from the stack pointer.
16642 In theory you could eliminate from the hard frame
16643 pointer to the stack pointer, but this will never
16644 happen, since if a stack frame is not needed the
16645 hard frame pointer will never be used. */
16646 gcc_unreachable ();
16650 /* Given FROM and TO register numbers, say whether this elimination is
16651 allowed. Frame pointer elimination is automatically handled.
16653 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16654 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16655 pointer, we must eliminate FRAME_POINTER_REGNUM into
16656 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16657 ARG_POINTER_REGNUM. */
16660 arm_can_eliminate (const int from, const int to)
16662 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16663 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16664 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16665 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16669 /* Emit RTL to save coprocessor registers on function entry. Returns the
16670 number of bytes pushed. */
16673 arm_save_coproc_regs(void)
16675 int saved_size = 0;
16677 unsigned start_reg;
16680 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16681 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16683 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16684 insn = gen_rtx_MEM (V2SImode, insn);
16685 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16686 RTX_FRAME_RELATED_P (insn) = 1;
16690 /* Save any floating point call-saved registers used by this
16692 if (TARGET_FPA_EMU2)
16694 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16695 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16697 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16698 insn = gen_rtx_MEM (XFmode, insn);
16699 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16700 RTX_FRAME_RELATED_P (insn) = 1;
16706 start_reg = LAST_FPA_REGNUM;
16708 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16710 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16712 if (start_reg - reg == 3)
16714 insn = emit_sfm (reg, 4);
16715 RTX_FRAME_RELATED_P (insn) = 1;
16717 start_reg = reg - 1;
16722 if (start_reg != reg)
16724 insn = emit_sfm (reg + 1, start_reg - reg);
16725 RTX_FRAME_RELATED_P (insn) = 1;
16726 saved_size += (start_reg - reg) * 12;
16728 start_reg = reg - 1;
16732 if (start_reg != reg)
16734 insn = emit_sfm (reg + 1, start_reg - reg);
16735 saved_size += (start_reg - reg) * 12;
16736 RTX_FRAME_RELATED_P (insn) = 1;
16739 if (TARGET_HARD_FLOAT && TARGET_VFP)
16741 start_reg = FIRST_VFP_REGNUM;
16743 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16745 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16746 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16748 if (start_reg != reg)
16749 saved_size += vfp_emit_fstmd (start_reg,
16750 (reg - start_reg) / 2);
16751 start_reg = reg + 2;
16754 if (start_reg != reg)
16755 saved_size += vfp_emit_fstmd (start_reg,
16756 (reg - start_reg) / 2);
16762 /* Set the Thumb frame pointer from the stack pointer. */
16765 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16767 HOST_WIDE_INT amount;
16770 amount = offsets->outgoing_args - offsets->locals_base;
16772 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16773 stack_pointer_rtx, GEN_INT (amount)));
16776 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16777 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16778 expects the first two operands to be the same. */
16781 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16783 hard_frame_pointer_rtx));
16787 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16788 hard_frame_pointer_rtx,
16789 stack_pointer_rtx));
16791 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16792 plus_constant (stack_pointer_rtx, amount));
16793 RTX_FRAME_RELATED_P (dwarf) = 1;
16794 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16797 RTX_FRAME_RELATED_P (insn) = 1;
16800 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16803 arm_expand_prologue (void)
16808 unsigned long live_regs_mask;
16809 unsigned long func_type;
16811 int saved_pretend_args = 0;
16812 int saved_regs = 0;
16813 unsigned HOST_WIDE_INT args_to_push;
16814 arm_stack_offsets *offsets;
16816 func_type = arm_current_func_type ();
16818 /* Naked functions don't have prologues. */
16819 if (IS_NAKED (func_type))
16822 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16823 args_to_push = crtl->args.pretend_args_size;
16825 /* Compute which register we will have to save onto the stack. */
16826 offsets = arm_get_frame_offsets ();
16827 live_regs_mask = offsets->saved_regs_mask;
16829 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16831 if (IS_STACKALIGN (func_type))
16835 /* Handle a word-aligned stack pointer. We generate the following:
16840 <save and restore r0 in normal prologue/epilogue>
16844 The unwinder doesn't need to know about the stack realignment.
16845 Just tell it we saved SP in r0. */
16846 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16848 r0 = gen_rtx_REG (SImode, 0);
16849 r1 = gen_rtx_REG (SImode, 1);
16851 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16852 RTX_FRAME_RELATED_P (insn) = 1;
16853 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16855 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16857 /* ??? The CFA changes here, which may cause GDB to conclude that it
16858 has entered a different function. That said, the unwind info is
16859 correct, individually, before and after this instruction because
16860 we've described the save of SP, which will override the default
16861 handling of SP as restoring from the CFA. */
16862 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16865 /* For APCS frames, if IP register is clobbered
16866 when creating frame, save that register in a special
16868 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16870 if (IS_INTERRUPT (func_type))
16872 /* Interrupt functions must not corrupt any registers.
16873 Creating a frame pointer however, corrupts the IP
16874 register, so we must push it first. */
16875 emit_multi_reg_push (1 << IP_REGNUM);
16877 /* Do not set RTX_FRAME_RELATED_P on this insn.
16878 The dwarf stack unwinding code only wants to see one
16879 stack decrement per function, and this is not it. If
16880 this instruction is labeled as being part of the frame
16881 creation sequence then dwarf2out_frame_debug_expr will
16882 die when it encounters the assignment of IP to FP
16883 later on, since the use of SP here establishes SP as
16884 the CFA register and not IP.
16886 Anyway this instruction is not really part of the stack
16887 frame creation although it is part of the prologue. */
16889 else if (IS_NESTED (func_type))
16891 /* The Static chain register is the same as the IP register
16892 used as a scratch register during stack frame creation.
16893 To get around this need to find somewhere to store IP
16894 whilst the frame is being created. We try the following
16897 1. The last argument register.
16898 2. A slot on the stack above the frame. (This only
16899 works if the function is not a varargs function).
16900 3. Register r3, after pushing the argument registers
16903 Note - we only need to tell the dwarf2 backend about the SP
16904 adjustment in the second variant; the static chain register
16905 doesn't need to be unwound, as it doesn't contain a value
16906 inherited from the caller. */
16908 if (df_regs_ever_live_p (3) == false)
16909 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16910 else if (args_to_push == 0)
16914 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16917 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16918 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16921 /* Just tell the dwarf backend that we adjusted SP. */
16922 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16923 plus_constant (stack_pointer_rtx,
16925 RTX_FRAME_RELATED_P (insn) = 1;
16926 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16930 /* Store the args on the stack. */
16931 if (cfun->machine->uses_anonymous_args)
16932 insn = emit_multi_reg_push
16933 ((0xf0 >> (args_to_push / 4)) & 0xf);
16936 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16937 GEN_INT (- args_to_push)));
16939 RTX_FRAME_RELATED_P (insn) = 1;
16941 saved_pretend_args = 1;
16942 fp_offset = args_to_push;
16945 /* Now reuse r3 to preserve IP. */
16946 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16950 insn = emit_set_insn (ip_rtx,
16951 plus_constant (stack_pointer_rtx, fp_offset));
16952 RTX_FRAME_RELATED_P (insn) = 1;
16957 /* Push the argument registers, or reserve space for them. */
16958 if (cfun->machine->uses_anonymous_args)
16959 insn = emit_multi_reg_push
16960 ((0xf0 >> (args_to_push / 4)) & 0xf);
16963 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16964 GEN_INT (- args_to_push)));
16965 RTX_FRAME_RELATED_P (insn) = 1;
16968 /* If this is an interrupt service routine, and the link register
16969 is going to be pushed, and we're not generating extra
16970 push of IP (needed when frame is needed and frame layout if apcs),
16971 subtracting four from LR now will mean that the function return
16972 can be done with a single instruction. */
16973 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16974 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16975 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16978 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16980 emit_set_insn (lr, plus_constant (lr, -4));
16983 if (live_regs_mask)
16985 saved_regs += bit_count (live_regs_mask) * 4;
16986 if (optimize_size && !frame_pointer_needed
16987 && saved_regs == offsets->saved_regs - offsets->saved_args)
16989 /* If no coprocessor registers are being pushed and we don't have
16990 to worry about a frame pointer then push extra registers to
16991 create the stack frame. This is done is a way that does not
16992 alter the frame layout, so is independent of the epilogue. */
16996 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16998 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16999 if (frame && n * 4 >= frame)
17002 live_regs_mask |= (1 << n) - 1;
17003 saved_regs += frame;
17006 insn = emit_multi_reg_push (live_regs_mask);
17007 RTX_FRAME_RELATED_P (insn) = 1;
17010 if (! IS_VOLATILE (func_type))
17011 saved_regs += arm_save_coproc_regs ();
17013 if (frame_pointer_needed && TARGET_ARM)
17015 /* Create the new frame pointer. */
17016 if (TARGET_APCS_FRAME)
17018 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17019 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17020 RTX_FRAME_RELATED_P (insn) = 1;
17022 if (IS_NESTED (func_type))
17024 /* Recover the static chain register. */
17025 if (!df_regs_ever_live_p (3)
17026 || saved_pretend_args)
17027 insn = gen_rtx_REG (SImode, 3);
17028 else /* if (crtl->args.pretend_args_size == 0) */
17030 insn = plus_constant (hard_frame_pointer_rtx, 4);
17031 insn = gen_frame_mem (SImode, insn);
17033 emit_set_insn (ip_rtx, insn);
17034 /* Add a USE to stop propagate_one_insn() from barfing. */
17035 emit_insn (gen_prologue_use (ip_rtx));
17040 insn = GEN_INT (saved_regs - 4);
17041 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17042 stack_pointer_rtx, insn));
17043 RTX_FRAME_RELATED_P (insn) = 1;
17047 if (flag_stack_usage_info)
17048 current_function_static_stack_size
17049 = offsets->outgoing_args - offsets->saved_args;
17051 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17053 /* This add can produce multiple insns for a large constant, so we
17054 need to get tricky. */
17055 rtx last = get_last_insn ();
17057 amount = GEN_INT (offsets->saved_args + saved_regs
17058 - offsets->outgoing_args);
17060 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17064 last = last ? NEXT_INSN (last) : get_insns ();
17065 RTX_FRAME_RELATED_P (last) = 1;
17067 while (last != insn);
17069 /* If the frame pointer is needed, emit a special barrier that
17070 will prevent the scheduler from moving stores to the frame
17071 before the stack adjustment. */
17072 if (frame_pointer_needed)
17073 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17074 hard_frame_pointer_rtx));
17078 if (frame_pointer_needed && TARGET_THUMB2)
17079 thumb_set_frame_pointer (offsets);
17081 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17083 unsigned long mask;
17085 mask = live_regs_mask;
17086 mask &= THUMB2_WORK_REGS;
17087 if (!IS_NESTED (func_type))
17088 mask |= (1 << IP_REGNUM);
17089 arm_load_pic_register (mask);
17092 /* If we are profiling, make sure no instructions are scheduled before
17093 the call to mcount. Similarly if the user has requested no
17094 scheduling in the prolog. Similarly if we want non-call exceptions
17095 using the EABI unwinder, to prevent faulting instructions from being
17096 swapped with a stack adjustment. */
17097 if (crtl->profile || !TARGET_SCHED_PROLOG
17098 || (arm_except_unwind_info (&global_options) == UI_TARGET
17099 && cfun->can_throw_non_call_exceptions))
17100 emit_insn (gen_blockage ());
17102 /* If the link register is being kept alive, with the return address in it,
17103 then make sure that it does not get reused by the ce2 pass. */
17104 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17105 cfun->machine->lr_save_eliminated = 1;
17108 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17110 arm_print_condition (FILE *stream)
17112 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17114 /* Branch conversion is not implemented for Thumb-2. */
17117 output_operand_lossage ("predicated Thumb instruction");
17120 if (current_insn_predicate != NULL)
17122 output_operand_lossage
17123 ("predicated instruction in conditional sequence");
17127 fputs (arm_condition_codes[arm_current_cc], stream);
17129 else if (current_insn_predicate)
17131 enum arm_cond_code code;
17135 output_operand_lossage ("predicated Thumb instruction");
17139 code = get_arm_condition_code (current_insn_predicate);
17140 fputs (arm_condition_codes[code], stream);
17145 /* If CODE is 'd', then the X is a condition operand and the instruction
17146 should only be executed if the condition is true.
17147 if CODE is 'D', then the X is a condition operand and the instruction
17148 should only be executed if the condition is false: however, if the mode
17149 of the comparison is CCFPEmode, then always execute the instruction -- we
17150 do this because in these circumstances !GE does not necessarily imply LT;
17151 in these cases the instruction pattern will take care to make sure that
17152 an instruction containing %d will follow, thereby undoing the effects of
17153 doing this instruction unconditionally.
17154 If CODE is 'N' then X is a floating point operand that must be negated
17156 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17157 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17159 arm_print_operand (FILE *stream, rtx x, int code)
17164 fputs (ASM_COMMENT_START, stream);
17168 fputs (user_label_prefix, stream);
17172 fputs (REGISTER_PREFIX, stream);
17176 arm_print_condition (stream);
17180 /* Nothing in unified syntax, otherwise the current condition code. */
17181 if (!TARGET_UNIFIED_ASM)
17182 arm_print_condition (stream);
17186 /* The current condition code in unified syntax, otherwise nothing. */
17187 if (TARGET_UNIFIED_ASM)
17188 arm_print_condition (stream);
17192 /* The current condition code for a condition code setting instruction.
17193 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17194 if (TARGET_UNIFIED_ASM)
17196 fputc('s', stream);
17197 arm_print_condition (stream);
17201 arm_print_condition (stream);
17202 fputc('s', stream);
17207 /* If the instruction is conditionally executed then print
17208 the current condition code, otherwise print 's'. */
17209 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17210 if (current_insn_predicate)
17211 arm_print_condition (stream);
17213 fputc('s', stream);
17216 /* %# is a "break" sequence. It doesn't output anything, but is used to
17217 separate e.g. operand numbers from following text, if that text consists
17218 of further digits which we don't want to be part of the operand
17226 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17227 r = real_value_negate (&r);
17228 fprintf (stream, "%s", fp_const_from_val (&r));
17232 /* An integer or symbol address without a preceding # sign. */
17234 switch (GET_CODE (x))
17237 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17241 output_addr_const (stream, x);
17245 if (GET_CODE (XEXP (x, 0)) == PLUS
17246 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17248 output_addr_const (stream, x);
17251 /* Fall through. */
17254 output_operand_lossage ("Unsupported operand for code '%c'", code);
17259 if (GET_CODE (x) == CONST_INT)
17262 val = ARM_SIGN_EXTEND (~INTVAL (x));
17263 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17267 putc ('~', stream);
17268 output_addr_const (stream, x);
17273 /* The low 16 bits of an immediate constant. */
17274 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17278 fprintf (stream, "%s", arithmetic_instr (x, 1));
17281 /* Truncate Cirrus shift counts. */
17283 if (GET_CODE (x) == CONST_INT)
17285 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17288 arm_print_operand (stream, x, 0);
17292 fprintf (stream, "%s", arithmetic_instr (x, 0));
17300 if (!shift_operator (x, SImode))
17302 output_operand_lossage ("invalid shift operand");
17306 shift = shift_op (x, &val);
17310 fprintf (stream, ", %s ", shift);
17312 arm_print_operand (stream, XEXP (x, 1), 0);
17314 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17319 /* An explanation of the 'Q', 'R' and 'H' register operands:
17321 In a pair of registers containing a DI or DF value the 'Q'
17322 operand returns the register number of the register containing
17323 the least significant part of the value. The 'R' operand returns
17324 the register number of the register containing the most
17325 significant part of the value.
17327 The 'H' operand returns the higher of the two register numbers.
17328 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17329 same as the 'Q' operand, since the most significant part of the
17330 value is held in the lower number register. The reverse is true
17331 on systems where WORDS_BIG_ENDIAN is false.
17333 The purpose of these operands is to distinguish between cases
17334 where the endian-ness of the values is important (for example
17335 when they are added together), and cases where the endian-ness
17336 is irrelevant, but the order of register operations is important.
17337 For example when loading a value from memory into a register
17338 pair, the endian-ness does not matter. Provided that the value
17339 from the lower memory address is put into the lower numbered
17340 register, and the value from the higher address is put into the
17341 higher numbered register, the load will work regardless of whether
17342 the value being loaded is big-wordian or little-wordian. The
17343 order of the two register loads can matter however, if the address
17344 of the memory location is actually held in one of the registers
17345 being overwritten by the load.
17347 The 'Q' and 'R' constraints are also available for 64-bit
17350 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17352 rtx part = gen_lowpart (SImode, x);
17353 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17357 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17359 output_operand_lossage ("invalid operand for code '%c'", code);
17363 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17367 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17369 enum machine_mode mode = GET_MODE (x);
17372 if (mode == VOIDmode)
17374 part = gen_highpart_mode (SImode, mode, x);
17375 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17379 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17381 output_operand_lossage ("invalid operand for code '%c'", code);
17385 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17389 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17391 output_operand_lossage ("invalid operand for code '%c'", code);
17395 asm_fprintf (stream, "%r", REGNO (x) + 1);
17399 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17401 output_operand_lossage ("invalid operand for code '%c'", code);
17405 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17409 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17411 output_operand_lossage ("invalid operand for code '%c'", code);
17415 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17419 asm_fprintf (stream, "%r",
17420 GET_CODE (XEXP (x, 0)) == REG
17421 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17425 asm_fprintf (stream, "{%r-%r}",
17427 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17430 /* Like 'M', but writing doubleword vector registers, for use by Neon
17434 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17435 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17437 asm_fprintf (stream, "{d%d}", regno);
17439 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17444 /* CONST_TRUE_RTX means always -- that's the default. */
17445 if (x == const_true_rtx)
17448 if (!COMPARISON_P (x))
17450 output_operand_lossage ("invalid operand for code '%c'", code);
17454 fputs (arm_condition_codes[get_arm_condition_code (x)],
17459 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17460 want to do that. */
17461 if (x == const_true_rtx)
17463 output_operand_lossage ("instruction never executed");
17466 if (!COMPARISON_P (x))
17468 output_operand_lossage ("invalid operand for code '%c'", code);
17472 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17473 (get_arm_condition_code (x))],
17477 /* Cirrus registers can be accessed in a variety of ways:
17478 single floating point (f)
17479 double floating point (d)
17481 64bit integer (dx). */
17482 case 'W': /* Cirrus register in F mode. */
17483 case 'X': /* Cirrus register in D mode. */
17484 case 'Y': /* Cirrus register in FX mode. */
17485 case 'Z': /* Cirrus register in DX mode. */
17486 gcc_assert (GET_CODE (x) == REG
17487 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17489 fprintf (stream, "mv%s%s",
17491 : code == 'X' ? "d"
17492 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17496 /* Print cirrus register in the mode specified by the register's mode. */
17499 int mode = GET_MODE (x);
17501 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17503 output_operand_lossage ("invalid operand for code '%c'", code);
17507 fprintf (stream, "mv%s%s",
17508 mode == DFmode ? "d"
17509 : mode == SImode ? "fx"
17510 : mode == DImode ? "dx"
17511 : "f", reg_names[REGNO (x)] + 2);
17517 if (GET_CODE (x) != REG
17518 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17519 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17520 /* Bad value for wCG register number. */
17522 output_operand_lossage ("invalid operand for code '%c'", code);
17527 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17530 /* Print an iWMMXt control register name. */
17532 if (GET_CODE (x) != CONST_INT
17534 || INTVAL (x) >= 16)
17535 /* Bad value for wC register number. */
17537 output_operand_lossage ("invalid operand for code '%c'", code);
17543 static const char * wc_reg_names [16] =
17545 "wCID", "wCon", "wCSSF", "wCASF",
17546 "wC4", "wC5", "wC6", "wC7",
17547 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17548 "wC12", "wC13", "wC14", "wC15"
17551 fprintf (stream, wc_reg_names [INTVAL (x)]);
17555 /* Print the high single-precision register of a VFP double-precision
17559 int mode = GET_MODE (x);
17562 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17564 output_operand_lossage ("invalid operand for code '%c'", code);
17569 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17571 output_operand_lossage ("invalid operand for code '%c'", code);
17575 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17579 /* Print a VFP/Neon double precision or quad precision register name. */
17583 int mode = GET_MODE (x);
17584 int is_quad = (code == 'q');
17587 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17589 output_operand_lossage ("invalid operand for code '%c'", code);
17593 if (GET_CODE (x) != REG
17594 || !IS_VFP_REGNUM (REGNO (x)))
17596 output_operand_lossage ("invalid operand for code '%c'", code);
17601 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17602 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17604 output_operand_lossage ("invalid operand for code '%c'", code);
17608 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17609 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17613 /* These two codes print the low/high doubleword register of a Neon quad
17614 register, respectively. For pair-structure types, can also print
17615 low/high quadword registers. */
17619 int mode = GET_MODE (x);
17622 if ((GET_MODE_SIZE (mode) != 16
17623 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17625 output_operand_lossage ("invalid operand for code '%c'", code);
17630 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17632 output_operand_lossage ("invalid operand for code '%c'", code);
17636 if (GET_MODE_SIZE (mode) == 16)
17637 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17638 + (code == 'f' ? 1 : 0));
17640 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17641 + (code == 'f' ? 1 : 0));
17645 /* Print a VFPv3 floating-point constant, represented as an integer
17649 int index = vfp3_const_double_index (x);
17650 gcc_assert (index != -1);
17651 fprintf (stream, "%d", index);
17655 /* Print bits representing opcode features for Neon.
17657 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17658 and polynomials as unsigned.
17660 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17662 Bit 2 is 1 for rounding functions, 0 otherwise. */
17664 /* Identify the type as 's', 'u', 'p' or 'f'. */
17667 HOST_WIDE_INT bits = INTVAL (x);
17668 fputc ("uspf"[bits & 3], stream);
17672 /* Likewise, but signed and unsigned integers are both 'i'. */
17675 HOST_WIDE_INT bits = INTVAL (x);
17676 fputc ("iipf"[bits & 3], stream);
17680 /* As for 'T', but emit 'u' instead of 'p'. */
17683 HOST_WIDE_INT bits = INTVAL (x);
17684 fputc ("usuf"[bits & 3], stream);
17688 /* Bit 2: rounding (vs none). */
17691 HOST_WIDE_INT bits = INTVAL (x);
17692 fputs ((bits & 4) != 0 ? "r" : "", stream);
17696 /* Memory operand for vld1/vst1 instruction. */
17700 bool postinc = FALSE;
17701 unsigned align, memsize, align_bits;
17703 gcc_assert (GET_CODE (x) == MEM);
17704 addr = XEXP (x, 0);
17705 if (GET_CODE (addr) == POST_INC)
17708 addr = XEXP (addr, 0);
17710 asm_fprintf (stream, "[%r", REGNO (addr));
17712 /* We know the alignment of this access, so we can emit a hint in the
17713 instruction (for some alignments) as an aid to the memory subsystem
17715 align = MEM_ALIGN (x) >> 3;
17716 memsize = MEM_SIZE (x);
17718 /* Only certain alignment specifiers are supported by the hardware. */
17719 if (memsize == 16 && (align % 32) == 0)
17721 else if (memsize == 16 && (align % 16) == 0)
17723 else if (memsize >= 8 && (align % 8) == 0)
17728 if (align_bits != 0)
17729 asm_fprintf (stream, ":%d", align_bits);
17731 asm_fprintf (stream, "]");
17734 fputs("!", stream);
17742 gcc_assert (GET_CODE (x) == MEM);
17743 addr = XEXP (x, 0);
17744 gcc_assert (GET_CODE (addr) == REG);
17745 asm_fprintf (stream, "[%r]", REGNO (addr));
17749 /* Translate an S register number into a D register number and element index. */
17752 int mode = GET_MODE (x);
17755 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17757 output_operand_lossage ("invalid operand for code '%c'", code);
17762 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17764 output_operand_lossage ("invalid operand for code '%c'", code);
17768 regno = regno - FIRST_VFP_REGNUM;
17769 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17774 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17775 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17778 /* Register specifier for vld1.16/vst1.16. Translate the S register
17779 number into a D register number and element index. */
17782 int mode = GET_MODE (x);
17785 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17787 output_operand_lossage ("invalid operand for code '%c'", code);
17792 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17794 output_operand_lossage ("invalid operand for code '%c'", code);
17798 regno = regno - FIRST_VFP_REGNUM;
17799 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17806 output_operand_lossage ("missing operand");
17810 switch (GET_CODE (x))
17813 asm_fprintf (stream, "%r", REGNO (x));
17817 output_memory_reference_mode = GET_MODE (x);
17818 output_address (XEXP (x, 0));
17825 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17826 sizeof (fpstr), 0, 1);
17827 fprintf (stream, "#%s", fpstr);
17830 fprintf (stream, "#%s", fp_immediate_constant (x));
17834 gcc_assert (GET_CODE (x) != NEG);
17835 fputc ('#', stream);
17836 if (GET_CODE (x) == HIGH)
17838 fputs (":lower16:", stream);
17842 output_addr_const (stream, x);
17848 /* Target hook for printing a memory address. */
17850 arm_print_operand_address (FILE *stream, rtx x)
17854 int is_minus = GET_CODE (x) == MINUS;
17856 if (GET_CODE (x) == REG)
17857 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17858 else if (GET_CODE (x) == PLUS || is_minus)
17860 rtx base = XEXP (x, 0);
17861 rtx index = XEXP (x, 1);
17862 HOST_WIDE_INT offset = 0;
17863 if (GET_CODE (base) != REG
17864 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17866 /* Ensure that BASE is a register. */
17867 /* (one of them must be). */
17868 /* Also ensure the SP is not used as in index register. */
17873 switch (GET_CODE (index))
17876 offset = INTVAL (index);
17879 asm_fprintf (stream, "[%r, #%wd]",
17880 REGNO (base), offset);
17884 asm_fprintf (stream, "[%r, %s%r]",
17885 REGNO (base), is_minus ? "-" : "",
17895 asm_fprintf (stream, "[%r, %s%r",
17896 REGNO (base), is_minus ? "-" : "",
17897 REGNO (XEXP (index, 0)));
17898 arm_print_operand (stream, index, 'S');
17899 fputs ("]", stream);
17904 gcc_unreachable ();
17907 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17908 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17910 extern enum machine_mode output_memory_reference_mode;
17912 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17914 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17915 asm_fprintf (stream, "[%r, #%s%d]!",
17916 REGNO (XEXP (x, 0)),
17917 GET_CODE (x) == PRE_DEC ? "-" : "",
17918 GET_MODE_SIZE (output_memory_reference_mode));
17920 asm_fprintf (stream, "[%r], #%s%d",
17921 REGNO (XEXP (x, 0)),
17922 GET_CODE (x) == POST_DEC ? "-" : "",
17923 GET_MODE_SIZE (output_memory_reference_mode));
17925 else if (GET_CODE (x) == PRE_MODIFY)
17927 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17928 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17929 asm_fprintf (stream, "#%wd]!",
17930 INTVAL (XEXP (XEXP (x, 1), 1)));
17932 asm_fprintf (stream, "%r]!",
17933 REGNO (XEXP (XEXP (x, 1), 1)));
17935 else if (GET_CODE (x) == POST_MODIFY)
17937 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17938 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17939 asm_fprintf (stream, "#%wd",
17940 INTVAL (XEXP (XEXP (x, 1), 1)));
17942 asm_fprintf (stream, "%r",
17943 REGNO (XEXP (XEXP (x, 1), 1)));
17945 else output_addr_const (stream, x);
17949 if (GET_CODE (x) == REG)
17950 asm_fprintf (stream, "[%r]", REGNO (x));
17951 else if (GET_CODE (x) == POST_INC)
17952 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17953 else if (GET_CODE (x) == PLUS)
17955 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17957 asm_fprintf (stream, "[%r, #%wd]",
17958 REGNO (XEXP (x, 0)),
17959 INTVAL (XEXP (x, 1)));
17961 asm_fprintf (stream, "[%r, %r]",
17962 REGNO (XEXP (x, 0)),
17963 REGNO (XEXP (x, 1)));
17966 output_addr_const (stream, x);
17970 /* Target hook for indicating whether a punctuation character for
17971 TARGET_PRINT_OPERAND is valid. */
17973 arm_print_operand_punct_valid_p (unsigned char code)
17975 return (code == '@' || code == '|' || code == '.'
17976 || code == '(' || code == ')' || code == '#'
17977 || (TARGET_32BIT && (code == '?'))
17978 || (TARGET_THUMB2 && (code == '!'))
17979 || (TARGET_THUMB && (code == '_')));
17982 /* Target hook for assembling integer objects. The ARM version needs to
17983 handle word-sized values specially. */
17985 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17987 enum machine_mode mode;
17989 if (size == UNITS_PER_WORD && aligned_p)
17991 fputs ("\t.word\t", asm_out_file);
17992 output_addr_const (asm_out_file, x);
17994 /* Mark symbols as position independent. We only do this in the
17995 .text segment, not in the .data segment. */
17996 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17997 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17999 /* See legitimize_pic_address for an explanation of the
18000 TARGET_VXWORKS_RTP check. */
18001 if (TARGET_VXWORKS_RTP
18002 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18003 fputs ("(GOT)", asm_out_file);
18005 fputs ("(GOTOFF)", asm_out_file);
18007 fputc ('\n', asm_out_file);
18011 mode = GET_MODE (x);
18013 if (arm_vector_mode_supported_p (mode))
18017 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18019 units = CONST_VECTOR_NUNITS (x);
18020 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18022 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18023 for (i = 0; i < units; i++)
18025 rtx elt = CONST_VECTOR_ELT (x, i);
18027 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18030 for (i = 0; i < units; i++)
18032 rtx elt = CONST_VECTOR_ELT (x, i);
18033 REAL_VALUE_TYPE rval;
18035 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18038 (rval, GET_MODE_INNER (mode),
18039 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18045 return default_assemble_integer (x, size, aligned_p);
18049 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18053 if (!TARGET_AAPCS_BASED)
18056 default_named_section_asm_out_constructor
18057 : default_named_section_asm_out_destructor) (symbol, priority);
18061 /* Put these in the .init_array section, using a special relocation. */
18062 if (priority != DEFAULT_INIT_PRIORITY)
18065 sprintf (buf, "%s.%.5u",
18066 is_ctor ? ".init_array" : ".fini_array",
18068 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18075 switch_to_section (s);
18076 assemble_align (POINTER_SIZE);
18077 fputs ("\t.word\t", asm_out_file);
18078 output_addr_const (asm_out_file, symbol);
18079 fputs ("(target1)\n", asm_out_file);
18082 /* Add a function to the list of static constructors. */
18085 arm_elf_asm_constructor (rtx symbol, int priority)
18087 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18090 /* Add a function to the list of static destructors. */
18093 arm_elf_asm_destructor (rtx symbol, int priority)
18095 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18098 /* A finite state machine takes care of noticing whether or not instructions
18099 can be conditionally executed, and thus decrease execution time and code
18100 size by deleting branch instructions. The fsm is controlled by
18101 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18103 /* The state of the fsm controlling condition codes are:
18104 0: normal, do nothing special
18105 1: make ASM_OUTPUT_OPCODE not output this instruction
18106 2: make ASM_OUTPUT_OPCODE not output this instruction
18107 3: make instructions conditional
18108 4: make instructions conditional
18110 State transitions (state->state by whom under condition):
18111 0 -> 1 final_prescan_insn if the `target' is a label
18112 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18113 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18114 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18115 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18116 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18117 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18118 (the target insn is arm_target_insn).
18120 If the jump clobbers the conditions then we use states 2 and 4.
18122 A similar thing can be done with conditional return insns.
18124 XXX In case the `target' is an unconditional branch, this conditionalising
18125 of the instructions always reduces code size, but not always execution
18126 time. But then, I want to reduce the code size to somewhere near what
18127 /bin/cc produces. */
18129 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18130 instructions. When a COND_EXEC instruction is seen the subsequent
18131 instructions are scanned so that multiple conditional instructions can be
18132 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18133 specify the length and true/false mask for the IT block. These will be
18134 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18136 /* Returns the index of the ARM condition code string in
18137 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18138 COMPARISON should be an rtx like `(eq (...) (...))'. */
18141 maybe_get_arm_condition_code (rtx comparison)
18143 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18144 enum arm_cond_code code;
18145 enum rtx_code comp_code = GET_CODE (comparison);
18147 if (GET_MODE_CLASS (mode) != MODE_CC)
18148 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18149 XEXP (comparison, 1));
18153 case CC_DNEmode: code = ARM_NE; goto dominance;
18154 case CC_DEQmode: code = ARM_EQ; goto dominance;
18155 case CC_DGEmode: code = ARM_GE; goto dominance;
18156 case CC_DGTmode: code = ARM_GT; goto dominance;
18157 case CC_DLEmode: code = ARM_LE; goto dominance;
18158 case CC_DLTmode: code = ARM_LT; goto dominance;
18159 case CC_DGEUmode: code = ARM_CS; goto dominance;
18160 case CC_DGTUmode: code = ARM_HI; goto dominance;
18161 case CC_DLEUmode: code = ARM_LS; goto dominance;
18162 case CC_DLTUmode: code = ARM_CC;
18165 if (comp_code == EQ)
18166 return ARM_INVERSE_CONDITION_CODE (code);
18167 if (comp_code == NE)
18174 case NE: return ARM_NE;
18175 case EQ: return ARM_EQ;
18176 case GE: return ARM_PL;
18177 case LT: return ARM_MI;
18178 default: return ARM_NV;
18184 case NE: return ARM_NE;
18185 case EQ: return ARM_EQ;
18186 default: return ARM_NV;
18192 case NE: return ARM_MI;
18193 case EQ: return ARM_PL;
18194 default: return ARM_NV;
18199 /* These encodings assume that AC=1 in the FPA system control
18200 byte. This allows us to handle all cases except UNEQ and
18204 case GE: return ARM_GE;
18205 case GT: return ARM_GT;
18206 case LE: return ARM_LS;
18207 case LT: return ARM_MI;
18208 case NE: return ARM_NE;
18209 case EQ: return ARM_EQ;
18210 case ORDERED: return ARM_VC;
18211 case UNORDERED: return ARM_VS;
18212 case UNLT: return ARM_LT;
18213 case UNLE: return ARM_LE;
18214 case UNGT: return ARM_HI;
18215 case UNGE: return ARM_PL;
18216 /* UNEQ and LTGT do not have a representation. */
18217 case UNEQ: /* Fall through. */
18218 case LTGT: /* Fall through. */
18219 default: return ARM_NV;
18225 case NE: return ARM_NE;
18226 case EQ: return ARM_EQ;
18227 case GE: return ARM_LE;
18228 case GT: return ARM_LT;
18229 case LE: return ARM_GE;
18230 case LT: return ARM_GT;
18231 case GEU: return ARM_LS;
18232 case GTU: return ARM_CC;
18233 case LEU: return ARM_CS;
18234 case LTU: return ARM_HI;
18235 default: return ARM_NV;
18241 case LTU: return ARM_CS;
18242 case GEU: return ARM_CC;
18243 default: return ARM_NV;
18249 case NE: return ARM_NE;
18250 case EQ: return ARM_EQ;
18251 case GEU: return ARM_CS;
18252 case GTU: return ARM_HI;
18253 case LEU: return ARM_LS;
18254 case LTU: return ARM_CC;
18255 default: return ARM_NV;
18261 case GE: return ARM_GE;
18262 case LT: return ARM_LT;
18263 case GEU: return ARM_CS;
18264 case LTU: return ARM_CC;
18265 default: return ARM_NV;
18271 case NE: return ARM_NE;
18272 case EQ: return ARM_EQ;
18273 case GE: return ARM_GE;
18274 case GT: return ARM_GT;
18275 case LE: return ARM_LE;
18276 case LT: return ARM_LT;
18277 case GEU: return ARM_CS;
18278 case GTU: return ARM_HI;
18279 case LEU: return ARM_LS;
18280 case LTU: return ARM_CC;
18281 default: return ARM_NV;
18284 default: gcc_unreachable ();
18288 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18289 static enum arm_cond_code
18290 get_arm_condition_code (rtx comparison)
18292 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18293 gcc_assert (code != ARM_NV);
18297 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18300 thumb2_final_prescan_insn (rtx insn)
18302 rtx first_insn = insn;
18303 rtx body = PATTERN (insn);
18305 enum arm_cond_code code;
18309 /* Remove the previous insn from the count of insns to be output. */
18310 if (arm_condexec_count)
18311 arm_condexec_count--;
18313 /* Nothing to do if we are already inside a conditional block. */
18314 if (arm_condexec_count)
18317 if (GET_CODE (body) != COND_EXEC)
18320 /* Conditional jumps are implemented directly. */
18321 if (GET_CODE (insn) == JUMP_INSN)
18324 predicate = COND_EXEC_TEST (body);
18325 arm_current_cc = get_arm_condition_code (predicate);
18327 n = get_attr_ce_count (insn);
18328 arm_condexec_count = 1;
18329 arm_condexec_mask = (1 << n) - 1;
18330 arm_condexec_masklen = n;
18331 /* See if subsequent instructions can be combined into the same block. */
18334 insn = next_nonnote_insn (insn);
18336 /* Jumping into the middle of an IT block is illegal, so a label or
18337 barrier terminates the block. */
18338 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18341 body = PATTERN (insn);
18342 /* USE and CLOBBER aren't really insns, so just skip them. */
18343 if (GET_CODE (body) == USE
18344 || GET_CODE (body) == CLOBBER)
18347 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18348 if (GET_CODE (body) != COND_EXEC)
18350 /* Allow up to 4 conditionally executed instructions in a block. */
18351 n = get_attr_ce_count (insn);
18352 if (arm_condexec_masklen + n > 4)
18355 predicate = COND_EXEC_TEST (body);
18356 code = get_arm_condition_code (predicate);
18357 mask = (1 << n) - 1;
18358 if (arm_current_cc == code)
18359 arm_condexec_mask |= (mask << arm_condexec_masklen);
18360 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18363 arm_condexec_count++;
18364 arm_condexec_masklen += n;
18366 /* A jump must be the last instruction in a conditional block. */
18367 if (GET_CODE(insn) == JUMP_INSN)
18370 /* Restore recog_data (getting the attributes of other insns can
18371 destroy this array, but final.c assumes that it remains intact
18372 across this call). */
18373 extract_constrain_insn_cached (first_insn);
18377 arm_final_prescan_insn (rtx insn)
18379 /* BODY will hold the body of INSN. */
18380 rtx body = PATTERN (insn);
18382 /* This will be 1 if trying to repeat the trick, and things need to be
18383 reversed if it appears to fail. */
18386 /* If we start with a return insn, we only succeed if we find another one. */
18387 int seeking_return = 0;
18388 enum rtx_code return_code = UNKNOWN;
18390 /* START_INSN will hold the insn from where we start looking. This is the
18391 first insn after the following code_label if REVERSE is true. */
18392 rtx start_insn = insn;
18394 /* If in state 4, check if the target branch is reached, in order to
18395 change back to state 0. */
18396 if (arm_ccfsm_state == 4)
18398 if (insn == arm_target_insn)
18400 arm_target_insn = NULL;
18401 arm_ccfsm_state = 0;
18406 /* If in state 3, it is possible to repeat the trick, if this insn is an
18407 unconditional branch to a label, and immediately following this branch
18408 is the previous target label which is only used once, and the label this
18409 branch jumps to is not too far off. */
18410 if (arm_ccfsm_state == 3)
18412 if (simplejump_p (insn))
18414 start_insn = next_nonnote_insn (start_insn);
18415 if (GET_CODE (start_insn) == BARRIER)
18417 /* XXX Isn't this always a barrier? */
18418 start_insn = next_nonnote_insn (start_insn);
18420 if (GET_CODE (start_insn) == CODE_LABEL
18421 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18422 && LABEL_NUSES (start_insn) == 1)
18427 else if (ANY_RETURN_P (body))
18429 start_insn = next_nonnote_insn (start_insn);
18430 if (GET_CODE (start_insn) == BARRIER)
18431 start_insn = next_nonnote_insn (start_insn);
18432 if (GET_CODE (start_insn) == CODE_LABEL
18433 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18434 && LABEL_NUSES (start_insn) == 1)
18437 seeking_return = 1;
18438 return_code = GET_CODE (body);
18447 gcc_assert (!arm_ccfsm_state || reverse);
18448 if (GET_CODE (insn) != JUMP_INSN)
18451 /* This jump might be paralleled with a clobber of the condition codes
18452 the jump should always come first */
18453 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18454 body = XVECEXP (body, 0, 0);
18457 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18458 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18461 int fail = FALSE, succeed = FALSE;
18462 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18463 int then_not_else = TRUE;
18464 rtx this_insn = start_insn, label = 0;
18466 /* Register the insn jumped to. */
18469 if (!seeking_return)
18470 label = XEXP (SET_SRC (body), 0);
18472 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18473 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18474 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18476 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18477 then_not_else = FALSE;
18479 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18481 seeking_return = 1;
18482 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18484 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18486 seeking_return = 1;
18487 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18488 then_not_else = FALSE;
18491 gcc_unreachable ();
18493 /* See how many insns this branch skips, and what kind of insns. If all
18494 insns are okay, and the label or unconditional branch to the same
18495 label is not too far away, succeed. */
18496 for (insns_skipped = 0;
18497 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18501 this_insn = next_nonnote_insn (this_insn);
18505 switch (GET_CODE (this_insn))
18508 /* Succeed if it is the target label, otherwise fail since
18509 control falls in from somewhere else. */
18510 if (this_insn == label)
18512 arm_ccfsm_state = 1;
18520 /* Succeed if the following insn is the target label.
18522 If return insns are used then the last insn in a function
18523 will be a barrier. */
18524 this_insn = next_nonnote_insn (this_insn);
18525 if (this_insn && this_insn == label)
18527 arm_ccfsm_state = 1;
18535 /* The AAPCS says that conditional calls should not be
18536 used since they make interworking inefficient (the
18537 linker can't transform BL<cond> into BLX). That's
18538 only a problem if the machine has BLX. */
18545 /* Succeed if the following insn is the target label, or
18546 if the following two insns are a barrier and the
18548 this_insn = next_nonnote_insn (this_insn);
18549 if (this_insn && GET_CODE (this_insn) == BARRIER)
18550 this_insn = next_nonnote_insn (this_insn);
18552 if (this_insn && this_insn == label
18553 && insns_skipped < max_insns_skipped)
18555 arm_ccfsm_state = 1;
18563 /* If this is an unconditional branch to the same label, succeed.
18564 If it is to another label, do nothing. If it is conditional,
18566 /* XXX Probably, the tests for SET and the PC are
18569 scanbody = PATTERN (this_insn);
18570 if (GET_CODE (scanbody) == SET
18571 && GET_CODE (SET_DEST (scanbody)) == PC)
18573 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18574 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18576 arm_ccfsm_state = 2;
18579 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18582 /* Fail if a conditional return is undesirable (e.g. on a
18583 StrongARM), but still allow this if optimizing for size. */
18584 else if (GET_CODE (scanbody) == return_code
18585 && !use_return_insn (TRUE, NULL)
18588 else if (GET_CODE (scanbody) == return_code)
18590 arm_ccfsm_state = 2;
18593 else if (GET_CODE (scanbody) == PARALLEL)
18595 switch (get_attr_conds (this_insn))
18605 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18610 /* Instructions using or affecting the condition codes make it
18612 scanbody = PATTERN (this_insn);
18613 if (!(GET_CODE (scanbody) == SET
18614 || GET_CODE (scanbody) == PARALLEL)
18615 || get_attr_conds (this_insn) != CONDS_NOCOND)
18618 /* A conditional cirrus instruction must be followed by
18619 a non Cirrus instruction. However, since we
18620 conditionalize instructions in this function and by
18621 the time we get here we can't add instructions
18622 (nops), because shorten_branches() has already been
18623 called, we will disable conditionalizing Cirrus
18624 instructions to be safe. */
18625 if (GET_CODE (scanbody) != USE
18626 && GET_CODE (scanbody) != CLOBBER
18627 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18637 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18638 arm_target_label = CODE_LABEL_NUMBER (label);
18641 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18643 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18645 this_insn = next_nonnote_insn (this_insn);
18646 gcc_assert (!this_insn
18647 || (GET_CODE (this_insn) != BARRIER
18648 && GET_CODE (this_insn) != CODE_LABEL));
18652 /* Oh, dear! we ran off the end.. give up. */
18653 extract_constrain_insn_cached (insn);
18654 arm_ccfsm_state = 0;
18655 arm_target_insn = NULL;
18658 arm_target_insn = this_insn;
18661 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18664 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18666 if (reverse || then_not_else)
18667 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18670 /* Restore recog_data (getting the attributes of other insns can
18671 destroy this array, but final.c assumes that it remains intact
18672 across this call. */
18673 extract_constrain_insn_cached (insn);
18677 /* Output IT instructions. */
18679 thumb2_asm_output_opcode (FILE * stream)
18684 if (arm_condexec_mask)
18686 for (n = 0; n < arm_condexec_masklen; n++)
18687 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18689 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18690 arm_condition_codes[arm_current_cc]);
18691 arm_condexec_mask = 0;
18695 /* Returns true if REGNO is a valid register
18696 for holding a quantity of type MODE. */
18698 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18700 if (GET_MODE_CLASS (mode) == MODE_CC)
18701 return (regno == CC_REGNUM
18702 || (TARGET_HARD_FLOAT && TARGET_VFP
18703 && regno == VFPCC_REGNUM));
18706 /* For the Thumb we only allow values bigger than SImode in
18707 registers 0 - 6, so that there is always a second low
18708 register available to hold the upper part of the value.
18709 We probably we ought to ensure that the register is the
18710 start of an even numbered register pair. */
18711 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18713 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18714 && IS_CIRRUS_REGNUM (regno))
18715 /* We have outlawed SI values in Cirrus registers because they
18716 reside in the lower 32 bits, but SF values reside in the
18717 upper 32 bits. This causes gcc all sorts of grief. We can't
18718 even split the registers into pairs because Cirrus SI values
18719 get sign extended to 64bits-- aldyh. */
18720 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18722 if (TARGET_HARD_FLOAT && TARGET_VFP
18723 && IS_VFP_REGNUM (regno))
18725 if (mode == SFmode || mode == SImode)
18726 return VFP_REGNO_OK_FOR_SINGLE (regno);
18728 if (mode == DFmode)
18729 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18731 /* VFP registers can hold HFmode values, but there is no point in
18732 putting them there unless we have hardware conversion insns. */
18733 if (mode == HFmode)
18734 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18737 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18738 || (VALID_NEON_QREG_MODE (mode)
18739 && NEON_REGNO_OK_FOR_QUAD (regno))
18740 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18741 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18742 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18743 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18744 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18749 if (TARGET_REALLY_IWMMXT)
18751 if (IS_IWMMXT_GR_REGNUM (regno))
18752 return mode == SImode;
18754 if (IS_IWMMXT_REGNUM (regno))
18755 return VALID_IWMMXT_REG_MODE (mode);
18758 /* We allow almost any value to be stored in the general registers.
18759 Restrict doubleword quantities to even register pairs so that we can
18760 use ldrd. Do not allow very large Neon structure opaque modes in
18761 general registers; they would use too many. */
18762 if (regno <= LAST_ARM_REGNUM)
18763 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18764 && ARM_NUM_REGS (mode) <= 4;
18766 if (regno == FRAME_POINTER_REGNUM
18767 || regno == ARG_POINTER_REGNUM)
18768 /* We only allow integers in the fake hard registers. */
18769 return GET_MODE_CLASS (mode) == MODE_INT;
18771 /* The only registers left are the FPA registers
18772 which we only allow to hold FP values. */
18773 return (TARGET_HARD_FLOAT && TARGET_FPA
18774 && GET_MODE_CLASS (mode) == MODE_FLOAT
18775 && regno >= FIRST_FPA_REGNUM
18776 && regno <= LAST_FPA_REGNUM);
18779 /* Implement MODES_TIEABLE_P. */
18782 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18784 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18787 /* We specifically want to allow elements of "structure" modes to
18788 be tieable to the structure. This more general condition allows
18789 other rarer situations too. */
18791 && (VALID_NEON_DREG_MODE (mode1)
18792 || VALID_NEON_QREG_MODE (mode1)
18793 || VALID_NEON_STRUCT_MODE (mode1))
18794 && (VALID_NEON_DREG_MODE (mode2)
18795 || VALID_NEON_QREG_MODE (mode2)
18796 || VALID_NEON_STRUCT_MODE (mode2)))
18802 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18803 not used in arm mode. */
18806 arm_regno_class (int regno)
18810 if (regno == STACK_POINTER_REGNUM)
18812 if (regno == CC_REGNUM)
18819 if (TARGET_THUMB2 && regno < 8)
18822 if ( regno <= LAST_ARM_REGNUM
18823 || regno == FRAME_POINTER_REGNUM
18824 || regno == ARG_POINTER_REGNUM)
18825 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18827 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18828 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18830 if (IS_CIRRUS_REGNUM (regno))
18831 return CIRRUS_REGS;
18833 if (IS_VFP_REGNUM (regno))
18835 if (regno <= D7_VFP_REGNUM)
18836 return VFP_D0_D7_REGS;
18837 else if (regno <= LAST_LO_VFP_REGNUM)
18838 return VFP_LO_REGS;
18840 return VFP_HI_REGS;
18843 if (IS_IWMMXT_REGNUM (regno))
18844 return IWMMXT_REGS;
18846 if (IS_IWMMXT_GR_REGNUM (regno))
18847 return IWMMXT_GR_REGS;
18852 /* Handle a special case when computing the offset
18853 of an argument from the frame pointer. */
18855 arm_debugger_arg_offset (int value, rtx addr)
18859 /* We are only interested if dbxout_parms() failed to compute the offset. */
18863 /* We can only cope with the case where the address is held in a register. */
18864 if (GET_CODE (addr) != REG)
18867 /* If we are using the frame pointer to point at the argument, then
18868 an offset of 0 is correct. */
18869 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18872 /* If we are using the stack pointer to point at the
18873 argument, then an offset of 0 is correct. */
18874 /* ??? Check this is consistent with thumb2 frame layout. */
18875 if ((TARGET_THUMB || !frame_pointer_needed)
18876 && REGNO (addr) == SP_REGNUM)
18879 /* Oh dear. The argument is pointed to by a register rather
18880 than being held in a register, or being stored at a known
18881 offset from the frame pointer. Since GDB only understands
18882 those two kinds of argument we must translate the address
18883 held in the register into an offset from the frame pointer.
18884 We do this by searching through the insns for the function
18885 looking to see where this register gets its value. If the
18886 register is initialized from the frame pointer plus an offset
18887 then we are in luck and we can continue, otherwise we give up.
18889 This code is exercised by producing debugging information
18890 for a function with arguments like this:
18892 double func (double a, double b, int c, double d) {return d;}
18894 Without this code the stab for parameter 'd' will be set to
18895 an offset of 0 from the frame pointer, rather than 8. */
18897 /* The if() statement says:
18899 If the insn is a normal instruction
18900 and if the insn is setting the value in a register
18901 and if the register being set is the register holding the address of the argument
18902 and if the address is computing by an addition
18903 that involves adding to a register
18904 which is the frame pointer
18909 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18911 if ( GET_CODE (insn) == INSN
18912 && GET_CODE (PATTERN (insn)) == SET
18913 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18914 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18915 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18916 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18917 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18920 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18929 warning (0, "unable to compute real location of stacked parameter");
18930 value = 8; /* XXX magic hack */
18950 T_MAX /* Size of enum. Keep last. */
18951 } neon_builtin_type_mode;
18953 #define TYPE_MODE_BIT(X) (1 << (X))
18955 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18956 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18957 | TYPE_MODE_BIT (T_DI))
18958 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18959 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18960 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18962 #define v8qi_UP T_V8QI
18963 #define v4hi_UP T_V4HI
18964 #define v2si_UP T_V2SI
18965 #define v2sf_UP T_V2SF
18967 #define v16qi_UP T_V16QI
18968 #define v8hi_UP T_V8HI
18969 #define v4si_UP T_V4SI
18970 #define v4sf_UP T_V4SF
18971 #define v2di_UP T_V2DI
18976 #define UP(X) X##_UP
19009 NEON_LOADSTRUCTLANE,
19011 NEON_STORESTRUCTLANE,
19020 const neon_itype itype;
19021 const neon_builtin_type_mode mode;
19022 const enum insn_code code;
19023 unsigned int fcode;
19024 } neon_builtin_datum;
19026 #define CF(N,X) CODE_FOR_neon_##N##X
19028 #define VAR1(T, N, A) \
19029 {#N, NEON_##T, UP (A), CF (N, A), 0}
19030 #define VAR2(T, N, A, B) \
19032 {#N, NEON_##T, UP (B), CF (N, B), 0}
19033 #define VAR3(T, N, A, B, C) \
19034 VAR2 (T, N, A, B), \
19035 {#N, NEON_##T, UP (C), CF (N, C), 0}
19036 #define VAR4(T, N, A, B, C, D) \
19037 VAR3 (T, N, A, B, C), \
19038 {#N, NEON_##T, UP (D), CF (N, D), 0}
19039 #define VAR5(T, N, A, B, C, D, E) \
19040 VAR4 (T, N, A, B, C, D), \
19041 {#N, NEON_##T, UP (E), CF (N, E), 0}
19042 #define VAR6(T, N, A, B, C, D, E, F) \
19043 VAR5 (T, N, A, B, C, D, E), \
19044 {#N, NEON_##T, UP (F), CF (N, F), 0}
19045 #define VAR7(T, N, A, B, C, D, E, F, G) \
19046 VAR6 (T, N, A, B, C, D, E, F), \
19047 {#N, NEON_##T, UP (G), CF (N, G), 0}
19048 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19049 VAR7 (T, N, A, B, C, D, E, F, G), \
19050 {#N, NEON_##T, UP (H), CF (N, H), 0}
19051 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19052 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19053 {#N, NEON_##T, UP (I), CF (N, I), 0}
19054 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19055 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19056 {#N, NEON_##T, UP (J), CF (N, J), 0}
19058 /* The mode entries in the following table correspond to the "key" type of the
19059 instruction variant, i.e. equivalent to that which would be specified after
19060 the assembler mnemonic, which usually refers to the last vector operand.
19061 (Signed/unsigned/polynomial types are not differentiated between though, and
19062 are all mapped onto the same mode for a given element size.) The modes
19063 listed per instruction should be the same as those defined for that
19064 instruction's pattern in neon.md. */
19066 static neon_builtin_datum neon_builtin_data[] =
19068 VAR10 (BINOP, vadd,
19069 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19070 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19071 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19072 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19073 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19074 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19075 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19076 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19077 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19078 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19079 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19080 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19081 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19082 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19083 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19084 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19085 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19086 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19087 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19088 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19089 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19090 VAR2 (BINOP, vqdmull, v4hi, v2si),
19091 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19092 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19093 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19094 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19095 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19096 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19097 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19098 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19099 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19100 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19101 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19102 VAR10 (BINOP, vsub,
19103 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19104 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19105 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19106 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19107 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19108 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19109 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19110 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19111 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19112 VAR2 (BINOP, vcage, v2sf, v4sf),
19113 VAR2 (BINOP, vcagt, v2sf, v4sf),
19114 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19115 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19116 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19117 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19118 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19119 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19120 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19121 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19122 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19123 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19124 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19125 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19126 VAR2 (BINOP, vrecps, v2sf, v4sf),
19127 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19128 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19129 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19130 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19131 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19132 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19133 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19134 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19135 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19136 VAR2 (UNOP, vcnt, v8qi, v16qi),
19137 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19138 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19139 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19140 /* FIXME: vget_lane supports more variants than this! */
19141 VAR10 (GETLANE, vget_lane,
19142 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19143 VAR10 (SETLANE, vset_lane,
19144 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19145 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19146 VAR10 (DUP, vdup_n,
19147 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19148 VAR10 (DUPLANE, vdup_lane,
19149 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19150 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19151 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19152 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19153 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19154 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19155 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19156 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19157 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19158 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19159 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19160 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19161 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19162 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19163 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19164 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19165 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19166 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19167 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19168 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19169 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19170 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19171 VAR10 (BINOP, vext,
19172 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19173 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19174 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19175 VAR2 (UNOP, vrev16, v8qi, v16qi),
19176 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19177 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19178 VAR10 (SELECT, vbsl,
19179 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19180 VAR1 (VTBL, vtbl1, v8qi),
19181 VAR1 (VTBL, vtbl2, v8qi),
19182 VAR1 (VTBL, vtbl3, v8qi),
19183 VAR1 (VTBL, vtbl4, v8qi),
19184 VAR1 (VTBX, vtbx1, v8qi),
19185 VAR1 (VTBX, vtbx2, v8qi),
19186 VAR1 (VTBX, vtbx3, v8qi),
19187 VAR1 (VTBX, vtbx4, v8qi),
19188 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19189 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19190 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19191 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19192 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19193 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19194 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19195 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19196 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19197 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19198 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19199 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19200 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19201 VAR10 (LOAD1, vld1,
19202 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19203 VAR10 (LOAD1LANE, vld1_lane,
19204 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19205 VAR10 (LOAD1, vld1_dup,
19206 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19207 VAR10 (STORE1, vst1,
19208 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19209 VAR10 (STORE1LANE, vst1_lane,
19210 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19212 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19213 VAR7 (LOADSTRUCTLANE, vld2_lane,
19214 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19215 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19216 VAR9 (STORESTRUCT, vst2,
19217 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19218 VAR7 (STORESTRUCTLANE, vst2_lane,
19219 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19221 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19222 VAR7 (LOADSTRUCTLANE, vld3_lane,
19223 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19224 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19225 VAR9 (STORESTRUCT, vst3,
19226 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19227 VAR7 (STORESTRUCTLANE, vst3_lane,
19228 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19229 VAR9 (LOADSTRUCT, vld4,
19230 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19231 VAR7 (LOADSTRUCTLANE, vld4_lane,
19232 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19233 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19234 VAR9 (STORESTRUCT, vst4,
19235 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19236 VAR7 (STORESTRUCTLANE, vst4_lane,
19237 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19238 VAR10 (LOGICBINOP, vand,
19239 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19240 VAR10 (LOGICBINOP, vorr,
19241 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19242 VAR10 (BINOP, veor,
19243 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19244 VAR10 (LOGICBINOP, vbic,
19245 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19246 VAR10 (LOGICBINOP, vorn,
19247 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19262 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19263 symbolic names defined here (which would require too much duplication).
19267 ARM_BUILTIN_GETWCX,
19268 ARM_BUILTIN_SETWCX,
19272 ARM_BUILTIN_WAVG2BR,
19273 ARM_BUILTIN_WAVG2HR,
19274 ARM_BUILTIN_WAVG2B,
19275 ARM_BUILTIN_WAVG2H,
19282 ARM_BUILTIN_WMACSZ,
19284 ARM_BUILTIN_WMACUZ,
19287 ARM_BUILTIN_WSADBZ,
19289 ARM_BUILTIN_WSADHZ,
19291 ARM_BUILTIN_WALIGN,
19294 ARM_BUILTIN_TMIAPH,
19295 ARM_BUILTIN_TMIABB,
19296 ARM_BUILTIN_TMIABT,
19297 ARM_BUILTIN_TMIATB,
19298 ARM_BUILTIN_TMIATT,
19300 ARM_BUILTIN_TMOVMSKB,
19301 ARM_BUILTIN_TMOVMSKH,
19302 ARM_BUILTIN_TMOVMSKW,
19304 ARM_BUILTIN_TBCSTB,
19305 ARM_BUILTIN_TBCSTH,
19306 ARM_BUILTIN_TBCSTW,
19308 ARM_BUILTIN_WMADDS,
19309 ARM_BUILTIN_WMADDU,
19311 ARM_BUILTIN_WPACKHSS,
19312 ARM_BUILTIN_WPACKWSS,
19313 ARM_BUILTIN_WPACKDSS,
19314 ARM_BUILTIN_WPACKHUS,
19315 ARM_BUILTIN_WPACKWUS,
19316 ARM_BUILTIN_WPACKDUS,
19321 ARM_BUILTIN_WADDSSB,
19322 ARM_BUILTIN_WADDSSH,
19323 ARM_BUILTIN_WADDSSW,
19324 ARM_BUILTIN_WADDUSB,
19325 ARM_BUILTIN_WADDUSH,
19326 ARM_BUILTIN_WADDUSW,
19330 ARM_BUILTIN_WSUBSSB,
19331 ARM_BUILTIN_WSUBSSH,
19332 ARM_BUILTIN_WSUBSSW,
19333 ARM_BUILTIN_WSUBUSB,
19334 ARM_BUILTIN_WSUBUSH,
19335 ARM_BUILTIN_WSUBUSW,
19342 ARM_BUILTIN_WCMPEQB,
19343 ARM_BUILTIN_WCMPEQH,
19344 ARM_BUILTIN_WCMPEQW,
19345 ARM_BUILTIN_WCMPGTUB,
19346 ARM_BUILTIN_WCMPGTUH,
19347 ARM_BUILTIN_WCMPGTUW,
19348 ARM_BUILTIN_WCMPGTSB,
19349 ARM_BUILTIN_WCMPGTSH,
19350 ARM_BUILTIN_WCMPGTSW,
19352 ARM_BUILTIN_TEXTRMSB,
19353 ARM_BUILTIN_TEXTRMSH,
19354 ARM_BUILTIN_TEXTRMSW,
19355 ARM_BUILTIN_TEXTRMUB,
19356 ARM_BUILTIN_TEXTRMUH,
19357 ARM_BUILTIN_TEXTRMUW,
19358 ARM_BUILTIN_TINSRB,
19359 ARM_BUILTIN_TINSRH,
19360 ARM_BUILTIN_TINSRW,
19362 ARM_BUILTIN_WMAXSW,
19363 ARM_BUILTIN_WMAXSH,
19364 ARM_BUILTIN_WMAXSB,
19365 ARM_BUILTIN_WMAXUW,
19366 ARM_BUILTIN_WMAXUH,
19367 ARM_BUILTIN_WMAXUB,
19368 ARM_BUILTIN_WMINSW,
19369 ARM_BUILTIN_WMINSH,
19370 ARM_BUILTIN_WMINSB,
19371 ARM_BUILTIN_WMINUW,
19372 ARM_BUILTIN_WMINUH,
19373 ARM_BUILTIN_WMINUB,
19375 ARM_BUILTIN_WMULUM,
19376 ARM_BUILTIN_WMULSM,
19377 ARM_BUILTIN_WMULUL,
19379 ARM_BUILTIN_PSADBH,
19380 ARM_BUILTIN_WSHUFH,
19394 ARM_BUILTIN_WSLLHI,
19395 ARM_BUILTIN_WSLLWI,
19396 ARM_BUILTIN_WSLLDI,
19397 ARM_BUILTIN_WSRAHI,
19398 ARM_BUILTIN_WSRAWI,
19399 ARM_BUILTIN_WSRADI,
19400 ARM_BUILTIN_WSRLHI,
19401 ARM_BUILTIN_WSRLWI,
19402 ARM_BUILTIN_WSRLDI,
19403 ARM_BUILTIN_WRORHI,
19404 ARM_BUILTIN_WRORWI,
19405 ARM_BUILTIN_WRORDI,
19407 ARM_BUILTIN_WUNPCKIHB,
19408 ARM_BUILTIN_WUNPCKIHH,
19409 ARM_BUILTIN_WUNPCKIHW,
19410 ARM_BUILTIN_WUNPCKILB,
19411 ARM_BUILTIN_WUNPCKILH,
19412 ARM_BUILTIN_WUNPCKILW,
19414 ARM_BUILTIN_WUNPCKEHSB,
19415 ARM_BUILTIN_WUNPCKEHSH,
19416 ARM_BUILTIN_WUNPCKEHSW,
19417 ARM_BUILTIN_WUNPCKEHUB,
19418 ARM_BUILTIN_WUNPCKEHUH,
19419 ARM_BUILTIN_WUNPCKEHUW,
19420 ARM_BUILTIN_WUNPCKELSB,
19421 ARM_BUILTIN_WUNPCKELSH,
19422 ARM_BUILTIN_WUNPCKELSW,
19423 ARM_BUILTIN_WUNPCKELUB,
19424 ARM_BUILTIN_WUNPCKELUH,
19425 ARM_BUILTIN_WUNPCKELUW,
19427 ARM_BUILTIN_THREAD_POINTER,
19429 ARM_BUILTIN_NEON_BASE,
19431 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19434 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19437 arm_init_neon_builtins (void)
19439 unsigned int i, fcode;
19442 tree neon_intQI_type_node;
19443 tree neon_intHI_type_node;
19444 tree neon_polyQI_type_node;
19445 tree neon_polyHI_type_node;
19446 tree neon_intSI_type_node;
19447 tree neon_intDI_type_node;
19448 tree neon_float_type_node;
19450 tree intQI_pointer_node;
19451 tree intHI_pointer_node;
19452 tree intSI_pointer_node;
19453 tree intDI_pointer_node;
19454 tree float_pointer_node;
19456 tree const_intQI_node;
19457 tree const_intHI_node;
19458 tree const_intSI_node;
19459 tree const_intDI_node;
19460 tree const_float_node;
19462 tree const_intQI_pointer_node;
19463 tree const_intHI_pointer_node;
19464 tree const_intSI_pointer_node;
19465 tree const_intDI_pointer_node;
19466 tree const_float_pointer_node;
19468 tree V8QI_type_node;
19469 tree V4HI_type_node;
19470 tree V2SI_type_node;
19471 tree V2SF_type_node;
19472 tree V16QI_type_node;
19473 tree V8HI_type_node;
19474 tree V4SI_type_node;
19475 tree V4SF_type_node;
19476 tree V2DI_type_node;
19478 tree intUQI_type_node;
19479 tree intUHI_type_node;
19480 tree intUSI_type_node;
19481 tree intUDI_type_node;
19483 tree intEI_type_node;
19484 tree intOI_type_node;
19485 tree intCI_type_node;
19486 tree intXI_type_node;
19488 tree V8QI_pointer_node;
19489 tree V4HI_pointer_node;
19490 tree V2SI_pointer_node;
19491 tree V2SF_pointer_node;
19492 tree V16QI_pointer_node;
19493 tree V8HI_pointer_node;
19494 tree V4SI_pointer_node;
19495 tree V4SF_pointer_node;
19496 tree V2DI_pointer_node;
19498 tree void_ftype_pv8qi_v8qi_v8qi;
19499 tree void_ftype_pv4hi_v4hi_v4hi;
19500 tree void_ftype_pv2si_v2si_v2si;
19501 tree void_ftype_pv2sf_v2sf_v2sf;
19502 tree void_ftype_pdi_di_di;
19503 tree void_ftype_pv16qi_v16qi_v16qi;
19504 tree void_ftype_pv8hi_v8hi_v8hi;
19505 tree void_ftype_pv4si_v4si_v4si;
19506 tree void_ftype_pv4sf_v4sf_v4sf;
19507 tree void_ftype_pv2di_v2di_v2di;
19509 tree reinterp_ftype_dreg[5][5];
19510 tree reinterp_ftype_qreg[5][5];
19511 tree dreg_types[5], qreg_types[5];
19513 /* Create distinguished type nodes for NEON vector element types,
19514 and pointers to values of such types, so we can detect them later. */
19515 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19516 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19517 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19518 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19519 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19520 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19521 neon_float_type_node = make_node (REAL_TYPE);
19522 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19523 layout_type (neon_float_type_node);
19525 /* Define typedefs which exactly correspond to the modes we are basing vector
19526 types on. If you change these names you'll need to change
19527 the table used by arm_mangle_type too. */
19528 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19529 "__builtin_neon_qi");
19530 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19531 "__builtin_neon_hi");
19532 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19533 "__builtin_neon_si");
19534 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19535 "__builtin_neon_sf");
19536 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19537 "__builtin_neon_di");
19538 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19539 "__builtin_neon_poly8");
19540 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19541 "__builtin_neon_poly16");
19543 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19544 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19545 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19546 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19547 float_pointer_node = build_pointer_type (neon_float_type_node);
19549 /* Next create constant-qualified versions of the above types. */
19550 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19552 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19554 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19556 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19558 const_float_node = build_qualified_type (neon_float_type_node,
19561 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19562 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19563 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19564 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19565 const_float_pointer_node = build_pointer_type (const_float_node);
19567 /* Now create vector types based on our NEON element types. */
19568 /* 64-bit vectors. */
19570 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19572 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19574 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19576 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19577 /* 128-bit vectors. */
19579 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19581 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19583 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19585 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19587 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19589 /* Unsigned integer types for various mode sizes. */
19590 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19591 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19592 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19593 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19595 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19596 "__builtin_neon_uqi");
19597 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19598 "__builtin_neon_uhi");
19599 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19600 "__builtin_neon_usi");
19601 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19602 "__builtin_neon_udi");
19604 /* Opaque integer types for structures of vectors. */
19605 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19606 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19607 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19608 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19610 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19611 "__builtin_neon_ti");
19612 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19613 "__builtin_neon_ei");
19614 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19615 "__builtin_neon_oi");
19616 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19617 "__builtin_neon_ci");
19618 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19619 "__builtin_neon_xi");
19621 /* Pointers to vector types. */
19622 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19623 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19624 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19625 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19626 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19627 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19628 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19629 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19630 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19632 /* Operations which return results as pairs. */
19633 void_ftype_pv8qi_v8qi_v8qi =
19634 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19635 V8QI_type_node, NULL);
19636 void_ftype_pv4hi_v4hi_v4hi =
19637 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19638 V4HI_type_node, NULL);
19639 void_ftype_pv2si_v2si_v2si =
19640 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19641 V2SI_type_node, NULL);
19642 void_ftype_pv2sf_v2sf_v2sf =
19643 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19644 V2SF_type_node, NULL);
19645 void_ftype_pdi_di_di =
19646 build_function_type_list (void_type_node, intDI_pointer_node,
19647 neon_intDI_type_node, neon_intDI_type_node, NULL);
19648 void_ftype_pv16qi_v16qi_v16qi =
19649 build_function_type_list (void_type_node, V16QI_pointer_node,
19650 V16QI_type_node, V16QI_type_node, NULL);
19651 void_ftype_pv8hi_v8hi_v8hi =
19652 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19653 V8HI_type_node, NULL);
19654 void_ftype_pv4si_v4si_v4si =
19655 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19656 V4SI_type_node, NULL);
19657 void_ftype_pv4sf_v4sf_v4sf =
19658 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19659 V4SF_type_node, NULL);
19660 void_ftype_pv2di_v2di_v2di =
19661 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19662 V2DI_type_node, NULL);
19664 dreg_types[0] = V8QI_type_node;
19665 dreg_types[1] = V4HI_type_node;
19666 dreg_types[2] = V2SI_type_node;
19667 dreg_types[3] = V2SF_type_node;
19668 dreg_types[4] = neon_intDI_type_node;
19670 qreg_types[0] = V16QI_type_node;
19671 qreg_types[1] = V8HI_type_node;
19672 qreg_types[2] = V4SI_type_node;
19673 qreg_types[3] = V4SF_type_node;
19674 qreg_types[4] = V2DI_type_node;
19676 for (i = 0; i < 5; i++)
19679 for (j = 0; j < 5; j++)
19681 reinterp_ftype_dreg[i][j]
19682 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19683 reinterp_ftype_qreg[i][j]
19684 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19688 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19689 i < ARRAY_SIZE (neon_builtin_data);
19692 neon_builtin_datum *d = &neon_builtin_data[i];
19694 const char* const modenames[] = {
19695 "v8qi", "v4hi", "v2si", "v2sf", "di",
19696 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19701 int is_load = 0, is_store = 0;
19703 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19710 case NEON_LOAD1LANE:
19711 case NEON_LOADSTRUCT:
19712 case NEON_LOADSTRUCTLANE:
19714 /* Fall through. */
19716 case NEON_STORE1LANE:
19717 case NEON_STORESTRUCT:
19718 case NEON_STORESTRUCTLANE:
19721 /* Fall through. */
19724 case NEON_LOGICBINOP:
19725 case NEON_SHIFTINSERT:
19732 case NEON_SHIFTIMM:
19733 case NEON_SHIFTACC:
19739 case NEON_LANEMULL:
19740 case NEON_LANEMULH:
19742 case NEON_SCALARMUL:
19743 case NEON_SCALARMULL:
19744 case NEON_SCALARMULH:
19745 case NEON_SCALARMAC:
19751 tree return_type = void_type_node, args = void_list_node;
19753 /* Build a function type directly from the insn_data for
19754 this builtin. The build_function_type() function takes
19755 care of removing duplicates for us. */
19756 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19760 if (is_load && k == 1)
19762 /* Neon load patterns always have the memory
19763 operand in the operand 1 position. */
19764 gcc_assert (insn_data[d->code].operand[k].predicate
19765 == neon_struct_operand);
19771 eltype = const_intQI_pointer_node;
19776 eltype = const_intHI_pointer_node;
19781 eltype = const_intSI_pointer_node;
19786 eltype = const_float_pointer_node;
19791 eltype = const_intDI_pointer_node;
19794 default: gcc_unreachable ();
19797 else if (is_store && k == 0)
19799 /* Similarly, Neon store patterns use operand 0 as
19800 the memory location to store to. */
19801 gcc_assert (insn_data[d->code].operand[k].predicate
19802 == neon_struct_operand);
19808 eltype = intQI_pointer_node;
19813 eltype = intHI_pointer_node;
19818 eltype = intSI_pointer_node;
19823 eltype = float_pointer_node;
19828 eltype = intDI_pointer_node;
19831 default: gcc_unreachable ();
19836 switch (insn_data[d->code].operand[k].mode)
19838 case VOIDmode: eltype = void_type_node; break;
19840 case QImode: eltype = neon_intQI_type_node; break;
19841 case HImode: eltype = neon_intHI_type_node; break;
19842 case SImode: eltype = neon_intSI_type_node; break;
19843 case SFmode: eltype = neon_float_type_node; break;
19844 case DImode: eltype = neon_intDI_type_node; break;
19845 case TImode: eltype = intTI_type_node; break;
19846 case EImode: eltype = intEI_type_node; break;
19847 case OImode: eltype = intOI_type_node; break;
19848 case CImode: eltype = intCI_type_node; break;
19849 case XImode: eltype = intXI_type_node; break;
19850 /* 64-bit vectors. */
19851 case V8QImode: eltype = V8QI_type_node; break;
19852 case V4HImode: eltype = V4HI_type_node; break;
19853 case V2SImode: eltype = V2SI_type_node; break;
19854 case V2SFmode: eltype = V2SF_type_node; break;
19855 /* 128-bit vectors. */
19856 case V16QImode: eltype = V16QI_type_node; break;
19857 case V8HImode: eltype = V8HI_type_node; break;
19858 case V4SImode: eltype = V4SI_type_node; break;
19859 case V4SFmode: eltype = V4SF_type_node; break;
19860 case V2DImode: eltype = V2DI_type_node; break;
19861 default: gcc_unreachable ();
19865 if (k == 0 && !is_store)
19866 return_type = eltype;
19868 args = tree_cons (NULL_TREE, eltype, args);
19871 ftype = build_function_type (return_type, args);
19875 case NEON_RESULTPAIR:
19877 switch (insn_data[d->code].operand[1].mode)
19879 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19880 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19881 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19882 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19883 case DImode: ftype = void_ftype_pdi_di_di; break;
19884 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19885 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19886 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19887 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19888 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19889 default: gcc_unreachable ();
19894 case NEON_REINTERP:
19896 /* We iterate over 5 doubleword types, then 5 quadword
19898 int rhs = d->mode % 5;
19899 switch (insn_data[d->code].operand[0].mode)
19901 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19902 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19903 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19904 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19905 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19906 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19907 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19908 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19909 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19910 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19911 default: gcc_unreachable ();
19917 gcc_unreachable ();
19920 gcc_assert (ftype != NULL);
19922 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19924 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19926 arm_builtin_decls[fcode] = decl;
19930 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19933 if ((MASK) & insn_flags) \
19936 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19937 BUILT_IN_MD, NULL, NULL_TREE); \
19938 arm_builtin_decls[CODE] = bdecl; \
19943 struct builtin_description
19945 const unsigned int mask;
19946 const enum insn_code icode;
19947 const char * const name;
19948 const enum arm_builtins code;
19949 const enum rtx_code comparison;
19950 const unsigned int flag;
19953 static const struct builtin_description bdesc_2arg[] =
19955 #define IWMMXT_BUILTIN(code, string, builtin) \
19956 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19957 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19959 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19960 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19961 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19962 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19963 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19964 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19965 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19966 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19967 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19968 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19969 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19970 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19971 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19972 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19973 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19974 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19975 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19976 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19977 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19978 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19979 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19980 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19981 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19982 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19983 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19984 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19985 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19986 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19987 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19988 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19989 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19990 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19991 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19992 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19993 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19994 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19995 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19996 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19997 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19998 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19999 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20000 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20001 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20002 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20003 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20004 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20005 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20006 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20007 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20008 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20009 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20010 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20011 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20012 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20013 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20014 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20015 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
20016 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
20018 #define IWMMXT_BUILTIN2(code, builtin) \
20019 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20021 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20022 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20023 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20024 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20025 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20026 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20027 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
20028 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20029 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
20030 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20031 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
20032 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
20033 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
20034 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20035 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
20036 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20037 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
20038 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
20039 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
20040 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20041 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
20042 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20043 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
20044 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
20045 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
20046 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
20047 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
20048 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
20049 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
20050 IWMMXT_BUILTIN2 (rordi3, WRORDI)
20051 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20052 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20055 static const struct builtin_description bdesc_1arg[] =
20057 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20058 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20059 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20060 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20061 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20062 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20063 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20064 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20065 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20066 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20067 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20068 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20069 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20070 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20071 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20072 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20073 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20074 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20077 /* Set up all the iWMMXt builtins. This is not called if
20078 TARGET_IWMMXT is zero. */
20081 arm_init_iwmmxt_builtins (void)
20083 const struct builtin_description * d;
20086 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20087 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20088 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20091 = build_function_type_list (integer_type_node,
20092 integer_type_node, NULL_TREE);
20093 tree v8qi_ftype_v8qi_v8qi_int
20094 = build_function_type_list (V8QI_type_node,
20095 V8QI_type_node, V8QI_type_node,
20096 integer_type_node, NULL_TREE);
20097 tree v4hi_ftype_v4hi_int
20098 = build_function_type_list (V4HI_type_node,
20099 V4HI_type_node, integer_type_node, NULL_TREE);
20100 tree v2si_ftype_v2si_int
20101 = build_function_type_list (V2SI_type_node,
20102 V2SI_type_node, integer_type_node, NULL_TREE);
20103 tree v2si_ftype_di_di
20104 = build_function_type_list (V2SI_type_node,
20105 long_long_integer_type_node,
20106 long_long_integer_type_node,
20108 tree di_ftype_di_int
20109 = build_function_type_list (long_long_integer_type_node,
20110 long_long_integer_type_node,
20111 integer_type_node, NULL_TREE);
20112 tree di_ftype_di_int_int
20113 = build_function_type_list (long_long_integer_type_node,
20114 long_long_integer_type_node,
20116 integer_type_node, NULL_TREE);
20117 tree int_ftype_v8qi
20118 = build_function_type_list (integer_type_node,
20119 V8QI_type_node, NULL_TREE);
20120 tree int_ftype_v4hi
20121 = build_function_type_list (integer_type_node,
20122 V4HI_type_node, NULL_TREE);
20123 tree int_ftype_v2si
20124 = build_function_type_list (integer_type_node,
20125 V2SI_type_node, NULL_TREE);
20126 tree int_ftype_v8qi_int
20127 = build_function_type_list (integer_type_node,
20128 V8QI_type_node, integer_type_node, NULL_TREE);
20129 tree int_ftype_v4hi_int
20130 = build_function_type_list (integer_type_node,
20131 V4HI_type_node, integer_type_node, NULL_TREE);
20132 tree int_ftype_v2si_int
20133 = build_function_type_list (integer_type_node,
20134 V2SI_type_node, integer_type_node, NULL_TREE);
20135 tree v8qi_ftype_v8qi_int_int
20136 = build_function_type_list (V8QI_type_node,
20137 V8QI_type_node, integer_type_node,
20138 integer_type_node, NULL_TREE);
20139 tree v4hi_ftype_v4hi_int_int
20140 = build_function_type_list (V4HI_type_node,
20141 V4HI_type_node, integer_type_node,
20142 integer_type_node, NULL_TREE);
20143 tree v2si_ftype_v2si_int_int
20144 = build_function_type_list (V2SI_type_node,
20145 V2SI_type_node, integer_type_node,
20146 integer_type_node, NULL_TREE);
20147 /* Miscellaneous. */
20148 tree v8qi_ftype_v4hi_v4hi
20149 = build_function_type_list (V8QI_type_node,
20150 V4HI_type_node, V4HI_type_node, NULL_TREE);
20151 tree v4hi_ftype_v2si_v2si
20152 = build_function_type_list (V4HI_type_node,
20153 V2SI_type_node, V2SI_type_node, NULL_TREE);
20154 tree v2si_ftype_v4hi_v4hi
20155 = build_function_type_list (V2SI_type_node,
20156 V4HI_type_node, V4HI_type_node, NULL_TREE);
20157 tree v2si_ftype_v8qi_v8qi
20158 = build_function_type_list (V2SI_type_node,
20159 V8QI_type_node, V8QI_type_node, NULL_TREE);
20160 tree v4hi_ftype_v4hi_di
20161 = build_function_type_list (V4HI_type_node,
20162 V4HI_type_node, long_long_integer_type_node,
20164 tree v2si_ftype_v2si_di
20165 = build_function_type_list (V2SI_type_node,
20166 V2SI_type_node, long_long_integer_type_node,
20168 tree void_ftype_int_int
20169 = build_function_type_list (void_type_node,
20170 integer_type_node, integer_type_node,
20173 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20175 = build_function_type_list (long_long_integer_type_node,
20176 V8QI_type_node, NULL_TREE);
20178 = build_function_type_list (long_long_integer_type_node,
20179 V4HI_type_node, NULL_TREE);
20181 = build_function_type_list (long_long_integer_type_node,
20182 V2SI_type_node, NULL_TREE);
20183 tree v2si_ftype_v4hi
20184 = build_function_type_list (V2SI_type_node,
20185 V4HI_type_node, NULL_TREE);
20186 tree v4hi_ftype_v8qi
20187 = build_function_type_list (V4HI_type_node,
20188 V8QI_type_node, NULL_TREE);
20190 tree di_ftype_di_v4hi_v4hi
20191 = build_function_type_list (long_long_unsigned_type_node,
20192 long_long_unsigned_type_node,
20193 V4HI_type_node, V4HI_type_node,
20196 tree di_ftype_v4hi_v4hi
20197 = build_function_type_list (long_long_unsigned_type_node,
20198 V4HI_type_node,V4HI_type_node,
20201 /* Normal vector binops. */
20202 tree v8qi_ftype_v8qi_v8qi
20203 = build_function_type_list (V8QI_type_node,
20204 V8QI_type_node, V8QI_type_node, NULL_TREE);
20205 tree v4hi_ftype_v4hi_v4hi
20206 = build_function_type_list (V4HI_type_node,
20207 V4HI_type_node,V4HI_type_node, NULL_TREE);
20208 tree v2si_ftype_v2si_v2si
20209 = build_function_type_list (V2SI_type_node,
20210 V2SI_type_node, V2SI_type_node, NULL_TREE);
20211 tree di_ftype_di_di
20212 = build_function_type_list (long_long_unsigned_type_node,
20213 long_long_unsigned_type_node,
20214 long_long_unsigned_type_node,
20217 /* Add all builtins that are more or less simple operations on two
20219 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20221 /* Use one of the operands; the target can have a different mode for
20222 mask-generating compares. */
20223 enum machine_mode mode;
20229 mode = insn_data[d->icode].operand[1].mode;
20234 type = v8qi_ftype_v8qi_v8qi;
20237 type = v4hi_ftype_v4hi_v4hi;
20240 type = v2si_ftype_v2si_v2si;
20243 type = di_ftype_di_di;
20247 gcc_unreachable ();
20250 def_mbuiltin (d->mask, d->name, type, d->code);
20253 /* Add the remaining MMX insns with somewhat more complicated types. */
20254 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20256 ARM_BUILTIN_ ## CODE)
20258 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20259 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20260 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20262 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20263 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20264 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20265 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20266 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20267 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20269 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20270 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20271 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20272 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20273 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20274 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20276 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20277 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20278 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20279 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20280 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20281 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20283 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20284 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20285 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20286 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20287 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20288 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20290 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20292 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20293 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20294 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20295 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20297 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20298 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20299 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20300 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20301 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20302 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20303 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20304 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20305 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20307 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20308 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20309 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20311 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20312 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20313 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20315 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20316 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20317 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20318 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20319 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20320 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20322 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20323 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20324 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20325 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20326 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20327 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20328 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20329 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20330 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20331 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20332 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20333 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20335 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20336 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20337 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20338 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20340 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20341 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20342 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20343 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20344 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20345 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20346 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20348 #undef iwmmx_mbuiltin
20352 arm_init_tls_builtins (void)
20356 ftype = build_function_type (ptr_type_node, void_list_node);
20357 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20358 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20360 TREE_NOTHROW (decl) = 1;
20361 TREE_READONLY (decl) = 1;
20362 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20366 arm_init_fp16_builtins (void)
20368 tree fp16_type = make_node (REAL_TYPE);
20369 TYPE_PRECISION (fp16_type) = 16;
20370 layout_type (fp16_type);
20371 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20375 arm_init_builtins (void)
20377 arm_init_tls_builtins ();
20379 if (TARGET_REALLY_IWMMXT)
20380 arm_init_iwmmxt_builtins ();
20383 arm_init_neon_builtins ();
20385 if (arm_fp16_format)
20386 arm_init_fp16_builtins ();
20389 /* Return the ARM builtin for CODE. */
20392 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20394 if (code >= ARM_BUILTIN_MAX)
20395 return error_mark_node;
20397 return arm_builtin_decls[code];
20400 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20402 static const char *
20403 arm_invalid_parameter_type (const_tree t)
20405 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20406 return N_("function parameters cannot have __fp16 type");
20410 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20412 static const char *
20413 arm_invalid_return_type (const_tree t)
20415 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20416 return N_("functions cannot return __fp16 type");
20420 /* Implement TARGET_PROMOTED_TYPE. */
20423 arm_promoted_type (const_tree t)
20425 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20426 return float_type_node;
20430 /* Implement TARGET_CONVERT_TO_TYPE.
20431 Specifically, this hook implements the peculiarity of the ARM
20432 half-precision floating-point C semantics that requires conversions between
20433 __fp16 to or from double to do an intermediate conversion to float. */
20436 arm_convert_to_type (tree type, tree expr)
20438 tree fromtype = TREE_TYPE (expr);
20439 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20441 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20442 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20443 return convert (type, convert (float_type_node, expr));
20447 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20448 This simply adds HFmode as a supported mode; even though we don't
20449 implement arithmetic on this type directly, it's supported by
20450 optabs conversions, much the way the double-word arithmetic is
20451 special-cased in the default hook. */
20454 arm_scalar_mode_supported_p (enum machine_mode mode)
20456 if (mode == HFmode)
20457 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20458 else if (ALL_FIXED_POINT_MODE_P (mode))
20461 return default_scalar_mode_supported_p (mode);
20464 /* Errors in the source file can cause expand_expr to return const0_rtx
20465 where we expect a vector. To avoid crashing, use one of the vector
20466 clear instructions. */
20469 safe_vector_operand (rtx x, enum machine_mode mode)
20471 if (x != const0_rtx)
20473 x = gen_reg_rtx (mode);
20475 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20476 : gen_rtx_SUBREG (DImode, x, 0)));
20480 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20483 arm_expand_binop_builtin (enum insn_code icode,
20484 tree exp, rtx target)
20487 tree arg0 = CALL_EXPR_ARG (exp, 0);
20488 tree arg1 = CALL_EXPR_ARG (exp, 1);
20489 rtx op0 = expand_normal (arg0);
20490 rtx op1 = expand_normal (arg1);
20491 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20492 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20493 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20495 if (VECTOR_MODE_P (mode0))
20496 op0 = safe_vector_operand (op0, mode0);
20497 if (VECTOR_MODE_P (mode1))
20498 op1 = safe_vector_operand (op1, mode1);
20501 || GET_MODE (target) != tmode
20502 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20503 target = gen_reg_rtx (tmode);
20505 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20507 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20508 op0 = copy_to_mode_reg (mode0, op0);
20509 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20510 op1 = copy_to_mode_reg (mode1, op1);
20512 pat = GEN_FCN (icode) (target, op0, op1);
20519 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20522 arm_expand_unop_builtin (enum insn_code icode,
20523 tree exp, rtx target, int do_load)
20526 tree arg0 = CALL_EXPR_ARG (exp, 0);
20527 rtx op0 = expand_normal (arg0);
20528 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20529 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20532 || GET_MODE (target) != tmode
20533 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20534 target = gen_reg_rtx (tmode);
20536 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20539 if (VECTOR_MODE_P (mode0))
20540 op0 = safe_vector_operand (op0, mode0);
20542 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20543 op0 = copy_to_mode_reg (mode0, op0);
20546 pat = GEN_FCN (icode) (target, op0);
20554 NEON_ARG_COPY_TO_REG,
20560 #define NEON_MAX_BUILTIN_ARGS 5
20562 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20563 and return an expression for the accessed memory.
20565 The intrinsic function operates on a block of registers that has
20566 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20567 The function references the memory at EXP in mode MEM_MODE;
20568 this mode may be BLKmode if no more suitable mode is available. */
20571 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20572 enum machine_mode reg_mode,
20573 neon_builtin_type_mode type_mode)
20575 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20576 tree elem_type, upper_bound, array_type;
20578 /* Work out the size of the register block in bytes. */
20579 reg_size = GET_MODE_SIZE (reg_mode);
20581 /* Work out the size of each vector in bytes. */
20582 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20583 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20585 /* Work out how many vectors there are. */
20586 gcc_assert (reg_size % vector_size == 0);
20587 nvectors = reg_size / vector_size;
20589 /* Work out how many elements are being loaded or stored.
20590 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20591 and memory elements; anything else implies a lane load or store. */
20592 if (mem_mode == reg_mode)
20593 nelems = vector_size * nvectors;
20597 /* Work out the type of each element. */
20598 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20599 elem_type = TREE_TYPE (TREE_TYPE (exp));
20601 /* Create a type that describes the full access. */
20602 upper_bound = build_int_cst (size_type_node, nelems - 1);
20603 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20605 /* Dereference EXP using that type. */
20606 exp = convert (build_pointer_type (array_type), exp);
20607 return fold_build2 (MEM_REF, array_type, exp,
20608 build_int_cst (TREE_TYPE (exp), 0));
20611 /* Expand a Neon builtin. */
20613 arm_expand_neon_args (rtx target, int icode, int have_retval,
20614 neon_builtin_type_mode type_mode,
20619 tree arg[NEON_MAX_BUILTIN_ARGS];
20620 rtx op[NEON_MAX_BUILTIN_ARGS];
20621 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20622 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20623 enum machine_mode other_mode;
20629 || GET_MODE (target) != tmode
20630 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20631 target = gen_reg_rtx (tmode);
20633 va_start (ap, exp);
20637 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20639 if (thisarg == NEON_ARG_STOP)
20643 opno = argc + have_retval;
20644 mode[argc] = insn_data[icode].operand[opno].mode;
20645 arg[argc] = CALL_EXPR_ARG (exp, argc);
20646 if (thisarg == NEON_ARG_MEMORY)
20648 other_mode = insn_data[icode].operand[1 - opno].mode;
20649 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20650 other_mode, type_mode);
20652 op[argc] = expand_normal (arg[argc]);
20656 case NEON_ARG_COPY_TO_REG:
20657 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20658 if (!(*insn_data[icode].operand[opno].predicate)
20659 (op[argc], mode[argc]))
20660 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20663 case NEON_ARG_CONSTANT:
20664 /* FIXME: This error message is somewhat unhelpful. */
20665 if (!(*insn_data[icode].operand[opno].predicate)
20666 (op[argc], mode[argc]))
20667 error ("argument must be a constant");
20670 case NEON_ARG_MEMORY:
20671 gcc_assert (MEM_P (op[argc]));
20672 PUT_MODE (op[argc], mode[argc]);
20673 /* ??? arm_neon.h uses the same built-in functions for signed
20674 and unsigned accesses, casting where necessary. This isn't
20676 set_mem_alias_set (op[argc], 0);
20677 if (!(*insn_data[icode].operand[opno].predicate)
20678 (op[argc], mode[argc]))
20679 op[argc] = (replace_equiv_address
20680 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20683 case NEON_ARG_STOP:
20684 gcc_unreachable ();
20697 pat = GEN_FCN (icode) (target, op[0]);
20701 pat = GEN_FCN (icode) (target, op[0], op[1]);
20705 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20709 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20713 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20717 gcc_unreachable ();
20723 pat = GEN_FCN (icode) (op[0]);
20727 pat = GEN_FCN (icode) (op[0], op[1]);
20731 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20735 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20739 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20743 gcc_unreachable ();
20754 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20755 constants defined per-instruction or per instruction-variant. Instead, the
20756 required info is looked up in the table neon_builtin_data. */
20758 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20760 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20761 neon_itype itype = d->itype;
20762 enum insn_code icode = d->code;
20763 neon_builtin_type_mode type_mode = d->mode;
20770 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20771 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20775 case NEON_SCALARMUL:
20776 case NEON_SCALARMULL:
20777 case NEON_SCALARMULH:
20778 case NEON_SHIFTINSERT:
20779 case NEON_LOGICBINOP:
20780 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20781 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20785 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20786 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20787 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20791 case NEON_SHIFTIMM:
20792 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20793 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20797 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20798 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20802 case NEON_REINTERP:
20803 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20804 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20808 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20809 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20811 case NEON_RESULTPAIR:
20812 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20813 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20817 case NEON_LANEMULL:
20818 case NEON_LANEMULH:
20819 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20820 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20821 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20824 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20825 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20826 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20828 case NEON_SHIFTACC:
20829 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20830 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20831 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20833 case NEON_SCALARMAC:
20834 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20835 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20836 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20840 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20841 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20845 case NEON_LOADSTRUCT:
20846 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20847 NEON_ARG_MEMORY, NEON_ARG_STOP);
20849 case NEON_LOAD1LANE:
20850 case NEON_LOADSTRUCTLANE:
20851 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20852 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20856 case NEON_STORESTRUCT:
20857 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20858 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20860 case NEON_STORE1LANE:
20861 case NEON_STORESTRUCTLANE:
20862 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20863 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20867 gcc_unreachable ();
20870 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20872 neon_reinterpret (rtx dest, rtx src)
20874 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20877 /* Emit code to place a Neon pair result in memory locations (with equal
20880 neon_emit_pair_result_insn (enum machine_mode mode,
20881 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20884 rtx mem = gen_rtx_MEM (mode, destaddr);
20885 rtx tmp1 = gen_reg_rtx (mode);
20886 rtx tmp2 = gen_reg_rtx (mode);
20888 emit_insn (intfn (tmp1, op1, op2, tmp2));
20890 emit_move_insn (mem, tmp1);
20891 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20892 emit_move_insn (mem, tmp2);
20895 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20896 not to early-clobber SRC registers in the process.
20898 We assume that the operands described by SRC and DEST represent a
20899 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20900 number of components into which the copy has been decomposed. */
20902 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20906 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20907 || REGNO (operands[0]) < REGNO (operands[1]))
20909 for (i = 0; i < count; i++)
20911 operands[2 * i] = dest[i];
20912 operands[2 * i + 1] = src[i];
20917 for (i = 0; i < count; i++)
20919 operands[2 * i] = dest[count - i - 1];
20920 operands[2 * i + 1] = src[count - i - 1];
20925 /* Split operands into moves from op[1] + op[2] into op[0]. */
20928 neon_split_vcombine (rtx operands[3])
20930 unsigned int dest = REGNO (operands[0]);
20931 unsigned int src1 = REGNO (operands[1]);
20932 unsigned int src2 = REGNO (operands[2]);
20933 enum machine_mode halfmode = GET_MODE (operands[1]);
20934 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20935 rtx destlo, desthi;
20937 if (src1 == dest && src2 == dest + halfregs)
20939 /* No-op move. Can't split to nothing; emit something. */
20940 emit_note (NOTE_INSN_DELETED);
20944 /* Preserve register attributes for variable tracking. */
20945 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20946 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20947 GET_MODE_SIZE (halfmode));
20949 /* Special case of reversed high/low parts. Use VSWP. */
20950 if (src2 == dest && src1 == dest + halfregs)
20952 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20953 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20954 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20958 if (!reg_overlap_mentioned_p (operands[2], destlo))
20960 /* Try to avoid unnecessary moves if part of the result
20961 is in the right place already. */
20963 emit_move_insn (destlo, operands[1]);
20964 if (src2 != dest + halfregs)
20965 emit_move_insn (desthi, operands[2]);
20969 if (src2 != dest + halfregs)
20970 emit_move_insn (desthi, operands[2]);
20972 emit_move_insn (destlo, operands[1]);
20976 /* Expand an expression EXP that calls a built-in function,
20977 with result going to TARGET if that's convenient
20978 (and in mode MODE if that's convenient).
20979 SUBTARGET may be used as the target for computing one of EXP's operands.
20980 IGNORE is nonzero if the value is to be ignored. */
20983 arm_expand_builtin (tree exp,
20985 rtx subtarget ATTRIBUTE_UNUSED,
20986 enum machine_mode mode ATTRIBUTE_UNUSED,
20987 int ignore ATTRIBUTE_UNUSED)
20989 const struct builtin_description * d;
20990 enum insn_code icode;
20991 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20999 int fcode = DECL_FUNCTION_CODE (fndecl);
21001 enum machine_mode tmode;
21002 enum machine_mode mode0;
21003 enum machine_mode mode1;
21004 enum machine_mode mode2;
21006 if (fcode >= ARM_BUILTIN_NEON_BASE)
21007 return arm_expand_neon_builtin (fcode, exp, target);
21011 case ARM_BUILTIN_TEXTRMSB:
21012 case ARM_BUILTIN_TEXTRMUB:
21013 case ARM_BUILTIN_TEXTRMSH:
21014 case ARM_BUILTIN_TEXTRMUH:
21015 case ARM_BUILTIN_TEXTRMSW:
21016 case ARM_BUILTIN_TEXTRMUW:
21017 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21018 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21019 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21020 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21021 : CODE_FOR_iwmmxt_textrmw);
21023 arg0 = CALL_EXPR_ARG (exp, 0);
21024 arg1 = CALL_EXPR_ARG (exp, 1);
21025 op0 = expand_normal (arg0);
21026 op1 = expand_normal (arg1);
21027 tmode = insn_data[icode].operand[0].mode;
21028 mode0 = insn_data[icode].operand[1].mode;
21029 mode1 = insn_data[icode].operand[2].mode;
21031 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21032 op0 = copy_to_mode_reg (mode0, op0);
21033 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21035 /* @@@ better error message */
21036 error ("selector must be an immediate");
21037 return gen_reg_rtx (tmode);
21040 || GET_MODE (target) != tmode
21041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21042 target = gen_reg_rtx (tmode);
21043 pat = GEN_FCN (icode) (target, op0, op1);
21049 case ARM_BUILTIN_TINSRB:
21050 case ARM_BUILTIN_TINSRH:
21051 case ARM_BUILTIN_TINSRW:
21052 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21053 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21054 : CODE_FOR_iwmmxt_tinsrw);
21055 arg0 = CALL_EXPR_ARG (exp, 0);
21056 arg1 = CALL_EXPR_ARG (exp, 1);
21057 arg2 = CALL_EXPR_ARG (exp, 2);
21058 op0 = expand_normal (arg0);
21059 op1 = expand_normal (arg1);
21060 op2 = expand_normal (arg2);
21061 tmode = insn_data[icode].operand[0].mode;
21062 mode0 = insn_data[icode].operand[1].mode;
21063 mode1 = insn_data[icode].operand[2].mode;
21064 mode2 = insn_data[icode].operand[3].mode;
21066 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21067 op0 = copy_to_mode_reg (mode0, op0);
21068 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21069 op1 = copy_to_mode_reg (mode1, op1);
21070 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21072 /* @@@ better error message */
21073 error ("selector must be an immediate");
21077 || GET_MODE (target) != tmode
21078 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21079 target = gen_reg_rtx (tmode);
21080 pat = GEN_FCN (icode) (target, op0, op1, op2);
21086 case ARM_BUILTIN_SETWCX:
21087 arg0 = CALL_EXPR_ARG (exp, 0);
21088 arg1 = CALL_EXPR_ARG (exp, 1);
21089 op0 = force_reg (SImode, expand_normal (arg0));
21090 op1 = expand_normal (arg1);
21091 emit_insn (gen_iwmmxt_tmcr (op1, op0));
21094 case ARM_BUILTIN_GETWCX:
21095 arg0 = CALL_EXPR_ARG (exp, 0);
21096 op0 = expand_normal (arg0);
21097 target = gen_reg_rtx (SImode);
21098 emit_insn (gen_iwmmxt_tmrc (target, op0));
21101 case ARM_BUILTIN_WSHUFH:
21102 icode = CODE_FOR_iwmmxt_wshufh;
21103 arg0 = CALL_EXPR_ARG (exp, 0);
21104 arg1 = CALL_EXPR_ARG (exp, 1);
21105 op0 = expand_normal (arg0);
21106 op1 = expand_normal (arg1);
21107 tmode = insn_data[icode].operand[0].mode;
21108 mode1 = insn_data[icode].operand[1].mode;
21109 mode2 = insn_data[icode].operand[2].mode;
21111 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21112 op0 = copy_to_mode_reg (mode1, op0);
21113 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21115 /* @@@ better error message */
21116 error ("mask must be an immediate");
21120 || GET_MODE (target) != tmode
21121 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21122 target = gen_reg_rtx (tmode);
21123 pat = GEN_FCN (icode) (target, op0, op1);
21129 case ARM_BUILTIN_WSADB:
21130 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21131 case ARM_BUILTIN_WSADH:
21132 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21133 case ARM_BUILTIN_WSADBZ:
21134 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21135 case ARM_BUILTIN_WSADHZ:
21136 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21138 /* Several three-argument builtins. */
21139 case ARM_BUILTIN_WMACS:
21140 case ARM_BUILTIN_WMACU:
21141 case ARM_BUILTIN_WALIGN:
21142 case ARM_BUILTIN_TMIA:
21143 case ARM_BUILTIN_TMIAPH:
21144 case ARM_BUILTIN_TMIATT:
21145 case ARM_BUILTIN_TMIATB:
21146 case ARM_BUILTIN_TMIABT:
21147 case ARM_BUILTIN_TMIABB:
21148 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21149 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21150 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21151 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21152 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21153 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21154 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21155 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21156 : CODE_FOR_iwmmxt_walign);
21157 arg0 = CALL_EXPR_ARG (exp, 0);
21158 arg1 = CALL_EXPR_ARG (exp, 1);
21159 arg2 = CALL_EXPR_ARG (exp, 2);
21160 op0 = expand_normal (arg0);
21161 op1 = expand_normal (arg1);
21162 op2 = expand_normal (arg2);
21163 tmode = insn_data[icode].operand[0].mode;
21164 mode0 = insn_data[icode].operand[1].mode;
21165 mode1 = insn_data[icode].operand[2].mode;
21166 mode2 = insn_data[icode].operand[3].mode;
21168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21169 op0 = copy_to_mode_reg (mode0, op0);
21170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21171 op1 = copy_to_mode_reg (mode1, op1);
21172 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21173 op2 = copy_to_mode_reg (mode2, op2);
21175 || GET_MODE (target) != tmode
21176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21177 target = gen_reg_rtx (tmode);
21178 pat = GEN_FCN (icode) (target, op0, op1, op2);
21184 case ARM_BUILTIN_WZERO:
21185 target = gen_reg_rtx (DImode);
21186 emit_insn (gen_iwmmxt_clrdi (target));
21189 case ARM_BUILTIN_THREAD_POINTER:
21190 return arm_load_tp (target);
21196 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21197 if (d->code == (const enum arm_builtins) fcode)
21198 return arm_expand_binop_builtin (d->icode, exp, target);
21200 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21201 if (d->code == (const enum arm_builtins) fcode)
21202 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21204 /* @@@ Should really do something sensible here. */
21208 /* Return the number (counting from 0) of
21209 the least significant set bit in MASK. */
21212 number_of_first_bit_set (unsigned mask)
21214 return ctz_hwi (mask);
21217 /* Like emit_multi_reg_push, but allowing for a different set of
21218 registers to be described as saved. MASK is the set of registers
21219 to be saved; REAL_REGS is the set of registers to be described as
21220 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21223 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21225 unsigned long regno;
21226 rtx par[10], tmp, reg, insn;
21229 /* Build the parallel of the registers actually being stored. */
21230 for (i = 0; mask; ++i, mask &= mask - 1)
21232 regno = ctz_hwi (mask);
21233 reg = gen_rtx_REG (SImode, regno);
21236 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21238 tmp = gen_rtx_USE (VOIDmode, reg);
21243 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21244 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21245 tmp = gen_frame_mem (BLKmode, tmp);
21246 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21249 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21250 insn = emit_insn (tmp);
21252 /* Always build the stack adjustment note for unwind info. */
21253 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21254 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21257 /* Build the parallel of the registers recorded as saved for unwind. */
21258 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21260 regno = ctz_hwi (real_regs);
21261 reg = gen_rtx_REG (SImode, regno);
21263 tmp = plus_constant (stack_pointer_rtx, j * 4);
21264 tmp = gen_frame_mem (SImode, tmp);
21265 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21266 RTX_FRAME_RELATED_P (tmp) = 1;
21274 RTX_FRAME_RELATED_P (par[0]) = 1;
21275 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21278 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21283 /* Emit code to push or pop registers to or from the stack. F is the
21284 assembly file. MASK is the registers to pop. */
21286 thumb_pop (FILE *f, unsigned long mask)
21289 int lo_mask = mask & 0xFF;
21290 int pushed_words = 0;
21294 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21296 /* Special case. Do not generate a POP PC statement here, do it in
21298 thumb_exit (f, -1);
21302 fprintf (f, "\tpop\t{");
21304 /* Look at the low registers first. */
21305 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21309 asm_fprintf (f, "%r", regno);
21311 if ((lo_mask & ~1) != 0)
21318 if (mask & (1 << PC_REGNUM))
21320 /* Catch popping the PC. */
21321 if (TARGET_INTERWORK || TARGET_BACKTRACE
21322 || crtl->calls_eh_return)
21324 /* The PC is never poped directly, instead
21325 it is popped into r3 and then BX is used. */
21326 fprintf (f, "}\n");
21328 thumb_exit (f, -1);
21337 asm_fprintf (f, "%r", PC_REGNUM);
21341 fprintf (f, "}\n");
21344 /* Generate code to return from a thumb function.
21345 If 'reg_containing_return_addr' is -1, then the return address is
21346 actually on the stack, at the stack pointer. */
21348 thumb_exit (FILE *f, int reg_containing_return_addr)
21350 unsigned regs_available_for_popping;
21351 unsigned regs_to_pop;
21353 unsigned available;
21357 int restore_a4 = FALSE;
21359 /* Compute the registers we need to pop. */
21363 if (reg_containing_return_addr == -1)
21365 regs_to_pop |= 1 << LR_REGNUM;
21369 if (TARGET_BACKTRACE)
21371 /* Restore the (ARM) frame pointer and stack pointer. */
21372 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21376 /* If there is nothing to pop then just emit the BX instruction and
21378 if (pops_needed == 0)
21380 if (crtl->calls_eh_return)
21381 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21383 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21386 /* Otherwise if we are not supporting interworking and we have not created
21387 a backtrace structure and the function was not entered in ARM mode then
21388 just pop the return address straight into the PC. */
21389 else if (!TARGET_INTERWORK
21390 && !TARGET_BACKTRACE
21391 && !is_called_in_ARM_mode (current_function_decl)
21392 && !crtl->calls_eh_return)
21394 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21398 /* Find out how many of the (return) argument registers we can corrupt. */
21399 regs_available_for_popping = 0;
21401 /* If returning via __builtin_eh_return, the bottom three registers
21402 all contain information needed for the return. */
21403 if (crtl->calls_eh_return)
21407 /* If we can deduce the registers used from the function's
21408 return value. This is more reliable that examining
21409 df_regs_ever_live_p () because that will be set if the register is
21410 ever used in the function, not just if the register is used
21411 to hold a return value. */
21413 if (crtl->return_rtx != 0)
21414 mode = GET_MODE (crtl->return_rtx);
21416 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21418 size = GET_MODE_SIZE (mode);
21422 /* In a void function we can use any argument register.
21423 In a function that returns a structure on the stack
21424 we can use the second and third argument registers. */
21425 if (mode == VOIDmode)
21426 regs_available_for_popping =
21427 (1 << ARG_REGISTER (1))
21428 | (1 << ARG_REGISTER (2))
21429 | (1 << ARG_REGISTER (3));
21431 regs_available_for_popping =
21432 (1 << ARG_REGISTER (2))
21433 | (1 << ARG_REGISTER (3));
21435 else if (size <= 4)
21436 regs_available_for_popping =
21437 (1 << ARG_REGISTER (2))
21438 | (1 << ARG_REGISTER (3));
21439 else if (size <= 8)
21440 regs_available_for_popping =
21441 (1 << ARG_REGISTER (3));
21444 /* Match registers to be popped with registers into which we pop them. */
21445 for (available = regs_available_for_popping,
21446 required = regs_to_pop;
21447 required != 0 && available != 0;
21448 available &= ~(available & - available),
21449 required &= ~(required & - required))
21452 /* If we have any popping registers left over, remove them. */
21454 regs_available_for_popping &= ~available;
21456 /* Otherwise if we need another popping register we can use
21457 the fourth argument register. */
21458 else if (pops_needed)
21460 /* If we have not found any free argument registers and
21461 reg a4 contains the return address, we must move it. */
21462 if (regs_available_for_popping == 0
21463 && reg_containing_return_addr == LAST_ARG_REGNUM)
21465 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21466 reg_containing_return_addr = LR_REGNUM;
21468 else if (size > 12)
21470 /* Register a4 is being used to hold part of the return value,
21471 but we have dire need of a free, low register. */
21474 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21477 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21479 /* The fourth argument register is available. */
21480 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21486 /* Pop as many registers as we can. */
21487 thumb_pop (f, regs_available_for_popping);
21489 /* Process the registers we popped. */
21490 if (reg_containing_return_addr == -1)
21492 /* The return address was popped into the lowest numbered register. */
21493 regs_to_pop &= ~(1 << LR_REGNUM);
21495 reg_containing_return_addr =
21496 number_of_first_bit_set (regs_available_for_popping);
21498 /* Remove this register for the mask of available registers, so that
21499 the return address will not be corrupted by further pops. */
21500 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21503 /* If we popped other registers then handle them here. */
21504 if (regs_available_for_popping)
21508 /* Work out which register currently contains the frame pointer. */
21509 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21511 /* Move it into the correct place. */
21512 asm_fprintf (f, "\tmov\t%r, %r\n",
21513 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21515 /* (Temporarily) remove it from the mask of popped registers. */
21516 regs_available_for_popping &= ~(1 << frame_pointer);
21517 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21519 if (regs_available_for_popping)
21523 /* We popped the stack pointer as well,
21524 find the register that contains it. */
21525 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21527 /* Move it into the stack register. */
21528 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21530 /* At this point we have popped all necessary registers, so
21531 do not worry about restoring regs_available_for_popping
21532 to its correct value:
21534 assert (pops_needed == 0)
21535 assert (regs_available_for_popping == (1 << frame_pointer))
21536 assert (regs_to_pop == (1 << STACK_POINTER)) */
21540 /* Since we have just move the popped value into the frame
21541 pointer, the popping register is available for reuse, and
21542 we know that we still have the stack pointer left to pop. */
21543 regs_available_for_popping |= (1 << frame_pointer);
21547 /* If we still have registers left on the stack, but we no longer have
21548 any registers into which we can pop them, then we must move the return
21549 address into the link register and make available the register that
21551 if (regs_available_for_popping == 0 && pops_needed > 0)
21553 regs_available_for_popping |= 1 << reg_containing_return_addr;
21555 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21556 reg_containing_return_addr);
21558 reg_containing_return_addr = LR_REGNUM;
21561 /* If we have registers left on the stack then pop some more.
21562 We know that at most we will want to pop FP and SP. */
21563 if (pops_needed > 0)
21568 thumb_pop (f, regs_available_for_popping);
21570 /* We have popped either FP or SP.
21571 Move whichever one it is into the correct register. */
21572 popped_into = number_of_first_bit_set (regs_available_for_popping);
21573 move_to = number_of_first_bit_set (regs_to_pop);
21575 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21577 regs_to_pop &= ~(1 << move_to);
21582 /* If we still have not popped everything then we must have only
21583 had one register available to us and we are now popping the SP. */
21584 if (pops_needed > 0)
21588 thumb_pop (f, regs_available_for_popping);
21590 popped_into = number_of_first_bit_set (regs_available_for_popping);
21592 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21594 assert (regs_to_pop == (1 << STACK_POINTER))
21595 assert (pops_needed == 1)
21599 /* If necessary restore the a4 register. */
21602 if (reg_containing_return_addr != LR_REGNUM)
21604 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21605 reg_containing_return_addr = LR_REGNUM;
21608 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21611 if (crtl->calls_eh_return)
21612 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21614 /* Return to caller. */
21615 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21618 /* Scan INSN just before assembler is output for it.
21619 For Thumb-1, we track the status of the condition codes; this
21620 information is used in the cbranchsi4_insn pattern. */
21622 thumb1_final_prescan_insn (rtx insn)
21624 if (flag_print_asm_name)
21625 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21626 INSN_ADDRESSES (INSN_UID (insn)));
21627 /* Don't overwrite the previous setter when we get to a cbranch. */
21628 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21630 enum attr_conds conds;
21632 if (cfun->machine->thumb1_cc_insn)
21634 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21635 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21638 conds = get_attr_conds (insn);
21639 if (conds == CONDS_SET)
21641 rtx set = single_set (insn);
21642 cfun->machine->thumb1_cc_insn = insn;
21643 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21644 cfun->machine->thumb1_cc_op1 = const0_rtx;
21645 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21646 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21648 rtx src1 = XEXP (SET_SRC (set), 1);
21649 if (src1 == const0_rtx)
21650 cfun->machine->thumb1_cc_mode = CCmode;
21653 else if (conds != CONDS_NOCOND)
21654 cfun->machine->thumb1_cc_insn = NULL_RTX;
21659 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21661 unsigned HOST_WIDE_INT mask = 0xff;
21664 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21665 if (val == 0) /* XXX */
21668 for (i = 0; i < 25; i++)
21669 if ((val & (mask << i)) == val)
21675 /* Returns nonzero if the current function contains,
21676 or might contain a far jump. */
21678 thumb_far_jump_used_p (void)
21682 /* This test is only important for leaf functions. */
21683 /* assert (!leaf_function_p ()); */
21685 /* If we have already decided that far jumps may be used,
21686 do not bother checking again, and always return true even if
21687 it turns out that they are not being used. Once we have made
21688 the decision that far jumps are present (and that hence the link
21689 register will be pushed onto the stack) we cannot go back on it. */
21690 if (cfun->machine->far_jump_used)
21693 /* If this function is not being called from the prologue/epilogue
21694 generation code then it must be being called from the
21695 INITIAL_ELIMINATION_OFFSET macro. */
21696 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21698 /* In this case we know that we are being asked about the elimination
21699 of the arg pointer register. If that register is not being used,
21700 then there are no arguments on the stack, and we do not have to
21701 worry that a far jump might force the prologue to push the link
21702 register, changing the stack offsets. In this case we can just
21703 return false, since the presence of far jumps in the function will
21704 not affect stack offsets.
21706 If the arg pointer is live (or if it was live, but has now been
21707 eliminated and so set to dead) then we do have to test to see if
21708 the function might contain a far jump. This test can lead to some
21709 false negatives, since before reload is completed, then length of
21710 branch instructions is not known, so gcc defaults to returning their
21711 longest length, which in turn sets the far jump attribute to true.
21713 A false negative will not result in bad code being generated, but it
21714 will result in a needless push and pop of the link register. We
21715 hope that this does not occur too often.
21717 If we need doubleword stack alignment this could affect the other
21718 elimination offsets so we can't risk getting it wrong. */
21719 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21720 cfun->machine->arg_pointer_live = 1;
21721 else if (!cfun->machine->arg_pointer_live)
21725 /* Check to see if the function contains a branch
21726 insn with the far jump attribute set. */
21727 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21729 if (GET_CODE (insn) == JUMP_INSN
21730 /* Ignore tablejump patterns. */
21731 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21732 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21733 && get_attr_far_jump (insn) == FAR_JUMP_YES
21736 /* Record the fact that we have decided that
21737 the function does use far jumps. */
21738 cfun->machine->far_jump_used = 1;
21746 /* Return nonzero if FUNC must be entered in ARM mode. */
21748 is_called_in_ARM_mode (tree func)
21750 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21752 /* Ignore the problem about functions whose address is taken. */
21753 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21757 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21763 /* Given the stack offsets and register mask in OFFSETS, decide how
21764 many additional registers to push instead of subtracting a constant
21765 from SP. For epilogues the principle is the same except we use pop.
21766 FOR_PROLOGUE indicates which we're generating. */
21768 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21770 HOST_WIDE_INT amount;
21771 unsigned long live_regs_mask = offsets->saved_regs_mask;
21772 /* Extract a mask of the ones we can give to the Thumb's push/pop
21774 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21775 /* Then count how many other high registers will need to be pushed. */
21776 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21777 int n_free, reg_base;
21779 if (!for_prologue && frame_pointer_needed)
21780 amount = offsets->locals_base - offsets->saved_regs;
21782 amount = offsets->outgoing_args - offsets->saved_regs;
21784 /* If the stack frame size is 512 exactly, we can save one load
21785 instruction, which should make this a win even when optimizing
21787 if (!optimize_size && amount != 512)
21790 /* Can't do this if there are high registers to push. */
21791 if (high_regs_pushed != 0)
21794 /* Shouldn't do it in the prologue if no registers would normally
21795 be pushed at all. In the epilogue, also allow it if we'll have
21796 a pop insn for the PC. */
21799 || TARGET_BACKTRACE
21800 || (live_regs_mask & 1 << LR_REGNUM) == 0
21801 || TARGET_INTERWORK
21802 || crtl->args.pretend_args_size != 0))
21805 /* Don't do this if thumb_expand_prologue wants to emit instructions
21806 between the push and the stack frame allocation. */
21808 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21809 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21816 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21817 live_regs_mask >>= reg_base;
21820 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21821 && (for_prologue || call_used_regs[reg_base + n_free]))
21823 live_regs_mask >>= 1;
21829 gcc_assert (amount / 4 * 4 == amount);
21831 if (amount >= 512 && (amount - n_free * 4) < 512)
21832 return (amount - 508) / 4;
21833 if (amount <= n_free * 4)
21838 /* The bits which aren't usefully expanded as rtl. */
21840 thumb_unexpanded_epilogue (void)
21842 arm_stack_offsets *offsets;
21844 unsigned long live_regs_mask = 0;
21845 int high_regs_pushed = 0;
21847 int had_to_push_lr;
21850 if (cfun->machine->return_used_this_function != 0)
21853 if (IS_NAKED (arm_current_func_type ()))
21856 offsets = arm_get_frame_offsets ();
21857 live_regs_mask = offsets->saved_regs_mask;
21858 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21860 /* If we can deduce the registers used from the function's return value.
21861 This is more reliable that examining df_regs_ever_live_p () because that
21862 will be set if the register is ever used in the function, not just if
21863 the register is used to hold a return value. */
21864 size = arm_size_return_regs ();
21866 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21869 unsigned long extra_mask = (1 << extra_pop) - 1;
21870 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21874 /* The prolog may have pushed some high registers to use as
21875 work registers. e.g. the testsuite file:
21876 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21877 compiles to produce:
21878 push {r4, r5, r6, r7, lr}
21882 as part of the prolog. We have to undo that pushing here. */
21884 if (high_regs_pushed)
21886 unsigned long mask = live_regs_mask & 0xff;
21889 /* The available low registers depend on the size of the value we are
21897 /* Oh dear! We have no low registers into which we can pop
21900 ("no low registers available for popping high registers");
21902 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21903 if (live_regs_mask & (1 << next_hi_reg))
21906 while (high_regs_pushed)
21908 /* Find lo register(s) into which the high register(s) can
21910 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21912 if (mask & (1 << regno))
21913 high_regs_pushed--;
21914 if (high_regs_pushed == 0)
21918 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21920 /* Pop the values into the low register(s). */
21921 thumb_pop (asm_out_file, mask);
21923 /* Move the value(s) into the high registers. */
21924 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21926 if (mask & (1 << regno))
21928 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21931 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21932 if (live_regs_mask & (1 << next_hi_reg))
21937 live_regs_mask &= ~0x0f00;
21940 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21941 live_regs_mask &= 0xff;
21943 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21945 /* Pop the return address into the PC. */
21946 if (had_to_push_lr)
21947 live_regs_mask |= 1 << PC_REGNUM;
21949 /* Either no argument registers were pushed or a backtrace
21950 structure was created which includes an adjusted stack
21951 pointer, so just pop everything. */
21952 if (live_regs_mask)
21953 thumb_pop (asm_out_file, live_regs_mask);
21955 /* We have either just popped the return address into the
21956 PC or it is was kept in LR for the entire function.
21957 Note that thumb_pop has already called thumb_exit if the
21958 PC was in the list. */
21959 if (!had_to_push_lr)
21960 thumb_exit (asm_out_file, LR_REGNUM);
21964 /* Pop everything but the return address. */
21965 if (live_regs_mask)
21966 thumb_pop (asm_out_file, live_regs_mask);
21968 if (had_to_push_lr)
21972 /* We have no free low regs, so save one. */
21973 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21977 /* Get the return address into a temporary register. */
21978 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21982 /* Move the return address to lr. */
21983 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21985 /* Restore the low register. */
21986 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21991 regno = LAST_ARG_REGNUM;
21996 /* Remove the argument registers that were pushed onto the stack. */
21997 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21998 SP_REGNUM, SP_REGNUM,
21999 crtl->args.pretend_args_size);
22001 thumb_exit (asm_out_file, regno);
22007 /* Functions to save and restore machine-specific function data. */
22008 static struct machine_function *
22009 arm_init_machine_status (void)
22011 struct machine_function *machine;
22012 machine = ggc_alloc_cleared_machine_function ();
22014 #if ARM_FT_UNKNOWN != 0
22015 machine->func_type = ARM_FT_UNKNOWN;
22020 /* Return an RTX indicating where the return address to the
22021 calling function can be found. */
22023 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22028 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22031 /* Do anything needed before RTL is emitted for each function. */
22033 arm_init_expanders (void)
22035 /* Arrange to initialize and mark the machine per-function status. */
22036 init_machine_status = arm_init_machine_status;
22038 /* This is to stop the combine pass optimizing away the alignment
22039 adjustment of va_arg. */
22040 /* ??? It is claimed that this should not be necessary. */
22042 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22046 /* Like arm_compute_initial_elimination offset. Simpler because there
22047 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22048 to point at the base of the local variables after static stack
22049 space for a function has been allocated. */
22052 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22054 arm_stack_offsets *offsets;
22056 offsets = arm_get_frame_offsets ();
22060 case ARG_POINTER_REGNUM:
22063 case STACK_POINTER_REGNUM:
22064 return offsets->outgoing_args - offsets->saved_args;
22066 case FRAME_POINTER_REGNUM:
22067 return offsets->soft_frame - offsets->saved_args;
22069 case ARM_HARD_FRAME_POINTER_REGNUM:
22070 return offsets->saved_regs - offsets->saved_args;
22072 case THUMB_HARD_FRAME_POINTER_REGNUM:
22073 return offsets->locals_base - offsets->saved_args;
22076 gcc_unreachable ();
22080 case FRAME_POINTER_REGNUM:
22083 case STACK_POINTER_REGNUM:
22084 return offsets->outgoing_args - offsets->soft_frame;
22086 case ARM_HARD_FRAME_POINTER_REGNUM:
22087 return offsets->saved_regs - offsets->soft_frame;
22089 case THUMB_HARD_FRAME_POINTER_REGNUM:
22090 return offsets->locals_base - offsets->soft_frame;
22093 gcc_unreachable ();
22098 gcc_unreachable ();
22102 /* Generate the function's prologue. */
22105 thumb1_expand_prologue (void)
22109 HOST_WIDE_INT amount;
22110 arm_stack_offsets *offsets;
22111 unsigned long func_type;
22113 unsigned long live_regs_mask;
22114 unsigned long l_mask;
22115 unsigned high_regs_pushed = 0;
22117 func_type = arm_current_func_type ();
22119 /* Naked functions don't have prologues. */
22120 if (IS_NAKED (func_type))
22123 if (IS_INTERRUPT (func_type))
22125 error ("interrupt Service Routines cannot be coded in Thumb mode");
22129 if (is_called_in_ARM_mode (current_function_decl))
22130 emit_insn (gen_prologue_thumb1_interwork ());
22132 offsets = arm_get_frame_offsets ();
22133 live_regs_mask = offsets->saved_regs_mask;
22135 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22136 l_mask = live_regs_mask & 0x40ff;
22137 /* Then count how many other high registers will need to be pushed. */
22138 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22140 if (crtl->args.pretend_args_size)
22142 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22144 if (cfun->machine->uses_anonymous_args)
22146 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22147 unsigned long mask;
22149 mask = 1ul << (LAST_ARG_REGNUM + 1);
22150 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22152 insn = thumb1_emit_multi_reg_push (mask, 0);
22156 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22157 stack_pointer_rtx, x));
22159 RTX_FRAME_RELATED_P (insn) = 1;
22162 if (TARGET_BACKTRACE)
22164 HOST_WIDE_INT offset = 0;
22165 unsigned work_register;
22166 rtx work_reg, x, arm_hfp_rtx;
22168 /* We have been asked to create a stack backtrace structure.
22169 The code looks like this:
22173 0 sub SP, #16 Reserve space for 4 registers.
22174 2 push {R7} Push low registers.
22175 4 add R7, SP, #20 Get the stack pointer before the push.
22176 6 str R7, [SP, #8] Store the stack pointer
22177 (before reserving the space).
22178 8 mov R7, PC Get hold of the start of this code + 12.
22179 10 str R7, [SP, #16] Store it.
22180 12 mov R7, FP Get hold of the current frame pointer.
22181 14 str R7, [SP, #4] Store it.
22182 16 mov R7, LR Get hold of the current return address.
22183 18 str R7, [SP, #12] Store it.
22184 20 add R7, SP, #16 Point at the start of the
22185 backtrace structure.
22186 22 mov FP, R7 Put this value into the frame pointer. */
22188 work_register = thumb_find_work_register (live_regs_mask);
22189 work_reg = gen_rtx_REG (SImode, work_register);
22190 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22192 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22193 stack_pointer_rtx, GEN_INT (-16)));
22194 RTX_FRAME_RELATED_P (insn) = 1;
22198 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22199 RTX_FRAME_RELATED_P (insn) = 1;
22201 offset = bit_count (l_mask) * UNITS_PER_WORD;
22204 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22205 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22207 x = plus_constant (stack_pointer_rtx, offset + 4);
22208 x = gen_frame_mem (SImode, x);
22209 emit_move_insn (x, work_reg);
22211 /* Make sure that the instruction fetching the PC is in the right place
22212 to calculate "start of backtrace creation code + 12". */
22213 /* ??? The stores using the common WORK_REG ought to be enough to
22214 prevent the scheduler from doing anything weird. Failing that
22215 we could always move all of the following into an UNSPEC_VOLATILE. */
22218 x = gen_rtx_REG (SImode, PC_REGNUM);
22219 emit_move_insn (work_reg, x);
22221 x = plus_constant (stack_pointer_rtx, offset + 12);
22222 x = gen_frame_mem (SImode, x);
22223 emit_move_insn (x, work_reg);
22225 emit_move_insn (work_reg, arm_hfp_rtx);
22227 x = plus_constant (stack_pointer_rtx, offset);
22228 x = gen_frame_mem (SImode, x);
22229 emit_move_insn (x, work_reg);
22233 emit_move_insn (work_reg, arm_hfp_rtx);
22235 x = plus_constant (stack_pointer_rtx, offset);
22236 x = gen_frame_mem (SImode, x);
22237 emit_move_insn (x, work_reg);
22239 x = gen_rtx_REG (SImode, PC_REGNUM);
22240 emit_move_insn (work_reg, x);
22242 x = plus_constant (stack_pointer_rtx, offset + 12);
22243 x = gen_frame_mem (SImode, x);
22244 emit_move_insn (x, work_reg);
22247 x = gen_rtx_REG (SImode, LR_REGNUM);
22248 emit_move_insn (work_reg, x);
22250 x = plus_constant (stack_pointer_rtx, offset + 8);
22251 x = gen_frame_mem (SImode, x);
22252 emit_move_insn (x, work_reg);
22254 x = GEN_INT (offset + 12);
22255 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22257 emit_move_insn (arm_hfp_rtx, work_reg);
22259 /* Optimization: If we are not pushing any low registers but we are going
22260 to push some high registers then delay our first push. This will just
22261 be a push of LR and we can combine it with the push of the first high
22263 else if ((l_mask & 0xff) != 0
22264 || (high_regs_pushed == 0 && l_mask))
22266 unsigned long mask = l_mask;
22267 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22268 insn = thumb1_emit_multi_reg_push (mask, mask);
22269 RTX_FRAME_RELATED_P (insn) = 1;
22272 if (high_regs_pushed)
22274 unsigned pushable_regs;
22275 unsigned next_hi_reg;
22277 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22278 if (live_regs_mask & (1 << next_hi_reg))
22281 pushable_regs = l_mask & 0xff;
22283 if (pushable_regs == 0)
22284 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22286 while (high_regs_pushed > 0)
22288 unsigned long real_regs_mask = 0;
22290 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22292 if (pushable_regs & (1 << regno))
22294 emit_move_insn (gen_rtx_REG (SImode, regno),
22295 gen_rtx_REG (SImode, next_hi_reg));
22297 high_regs_pushed --;
22298 real_regs_mask |= (1 << next_hi_reg);
22300 if (high_regs_pushed)
22302 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22304 if (live_regs_mask & (1 << next_hi_reg))
22309 pushable_regs &= ~((1 << regno) - 1);
22315 /* If we had to find a work register and we have not yet
22316 saved the LR then add it to the list of regs to push. */
22317 if (l_mask == (1 << LR_REGNUM))
22319 pushable_regs |= l_mask;
22320 real_regs_mask |= l_mask;
22324 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22325 RTX_FRAME_RELATED_P (insn) = 1;
22329 /* Load the pic register before setting the frame pointer,
22330 so we can use r7 as a temporary work register. */
22331 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22332 arm_load_pic_register (live_regs_mask);
22334 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22335 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22336 stack_pointer_rtx);
22338 if (flag_stack_usage_info)
22339 current_function_static_stack_size
22340 = offsets->outgoing_args - offsets->saved_args;
22342 amount = offsets->outgoing_args - offsets->saved_regs;
22343 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22348 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22349 GEN_INT (- amount)));
22350 RTX_FRAME_RELATED_P (insn) = 1;
22356 /* The stack decrement is too big for an immediate value in a single
22357 insn. In theory we could issue multiple subtracts, but after
22358 three of them it becomes more space efficient to place the full
22359 value in the constant pool and load into a register. (Also the
22360 ARM debugger really likes to see only one stack decrement per
22361 function). So instead we look for a scratch register into which
22362 we can load the decrement, and then we subtract this from the
22363 stack pointer. Unfortunately on the thumb the only available
22364 scratch registers are the argument registers, and we cannot use
22365 these as they may hold arguments to the function. Instead we
22366 attempt to locate a call preserved register which is used by this
22367 function. If we can find one, then we know that it will have
22368 been pushed at the start of the prologue and so we can corrupt
22370 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22371 if (live_regs_mask & (1 << regno))
22374 gcc_assert(regno <= LAST_LO_REGNUM);
22376 reg = gen_rtx_REG (SImode, regno);
22378 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22380 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22381 stack_pointer_rtx, reg));
22383 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22384 plus_constant (stack_pointer_rtx,
22386 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22387 RTX_FRAME_RELATED_P (insn) = 1;
22391 if (frame_pointer_needed)
22392 thumb_set_frame_pointer (offsets);
22394 /* If we are profiling, make sure no instructions are scheduled before
22395 the call to mcount. Similarly if the user has requested no
22396 scheduling in the prolog. Similarly if we want non-call exceptions
22397 using the EABI unwinder, to prevent faulting instructions from being
22398 swapped with a stack adjustment. */
22399 if (crtl->profile || !TARGET_SCHED_PROLOG
22400 || (arm_except_unwind_info (&global_options) == UI_TARGET
22401 && cfun->can_throw_non_call_exceptions))
22402 emit_insn (gen_blockage ());
22404 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22405 if (live_regs_mask & 0xff)
22406 cfun->machine->lr_save_eliminated = 0;
22411 thumb1_expand_epilogue (void)
22413 HOST_WIDE_INT amount;
22414 arm_stack_offsets *offsets;
22417 /* Naked functions don't have prologues. */
22418 if (IS_NAKED (arm_current_func_type ()))
22421 offsets = arm_get_frame_offsets ();
22422 amount = offsets->outgoing_args - offsets->saved_regs;
22424 if (frame_pointer_needed)
22426 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22427 amount = offsets->locals_base - offsets->saved_regs;
22429 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22431 gcc_assert (amount >= 0);
22434 emit_insn (gen_blockage ());
22437 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22438 GEN_INT (amount)));
22441 /* r3 is always free in the epilogue. */
22442 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22444 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22445 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22449 /* Emit a USE (stack_pointer_rtx), so that
22450 the stack adjustment will not be deleted. */
22451 emit_insn (gen_prologue_use (stack_pointer_rtx));
22453 if (crtl->profile || !TARGET_SCHED_PROLOG)
22454 emit_insn (gen_blockage ());
22456 /* Emit a clobber for each insn that will be restored in the epilogue,
22457 so that flow2 will get register lifetimes correct. */
22458 for (regno = 0; regno < 13; regno++)
22459 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22460 emit_clobber (gen_rtx_REG (SImode, regno));
22462 if (! df_regs_ever_live_p (LR_REGNUM))
22463 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22466 /* Implementation of insn prologue_thumb1_interwork. This is the first
22467 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22470 thumb1_output_interwork (void)
22473 FILE *f = asm_out_file;
22475 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22476 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22478 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22480 /* Generate code sequence to switch us into Thumb mode. */
22481 /* The .code 32 directive has already been emitted by
22482 ASM_DECLARE_FUNCTION_NAME. */
22483 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22484 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22486 /* Generate a label, so that the debugger will notice the
22487 change in instruction sets. This label is also used by
22488 the assembler to bypass the ARM code when this function
22489 is called from a Thumb encoded function elsewhere in the
22490 same file. Hence the definition of STUB_NAME here must
22491 agree with the definition in gas/config/tc-arm.c. */
22493 #define STUB_NAME ".real_start_of"
22495 fprintf (f, "\t.code\t16\n");
22497 if (arm_dllexport_name_p (name))
22498 name = arm_strip_name_encoding (name);
22500 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22501 fprintf (f, "\t.thumb_func\n");
22502 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22507 /* Handle the case of a double word load into a low register from
22508 a computed memory address. The computed address may involve a
22509 register which is overwritten by the load. */
22511 thumb_load_double_from_address (rtx *operands)
22519 gcc_assert (GET_CODE (operands[0]) == REG);
22520 gcc_assert (GET_CODE (operands[1]) == MEM);
22522 /* Get the memory address. */
22523 addr = XEXP (operands[1], 0);
22525 /* Work out how the memory address is computed. */
22526 switch (GET_CODE (addr))
22529 operands[2] = adjust_address (operands[1], SImode, 4);
22531 if (REGNO (operands[0]) == REGNO (addr))
22533 output_asm_insn ("ldr\t%H0, %2", operands);
22534 output_asm_insn ("ldr\t%0, %1", operands);
22538 output_asm_insn ("ldr\t%0, %1", operands);
22539 output_asm_insn ("ldr\t%H0, %2", operands);
22544 /* Compute <address> + 4 for the high order load. */
22545 operands[2] = adjust_address (operands[1], SImode, 4);
22547 output_asm_insn ("ldr\t%0, %1", operands);
22548 output_asm_insn ("ldr\t%H0, %2", operands);
22552 arg1 = XEXP (addr, 0);
22553 arg2 = XEXP (addr, 1);
22555 if (CONSTANT_P (arg1))
22556 base = arg2, offset = arg1;
22558 base = arg1, offset = arg2;
22560 gcc_assert (GET_CODE (base) == REG);
22562 /* Catch the case of <address> = <reg> + <reg> */
22563 if (GET_CODE (offset) == REG)
22565 int reg_offset = REGNO (offset);
22566 int reg_base = REGNO (base);
22567 int reg_dest = REGNO (operands[0]);
22569 /* Add the base and offset registers together into the
22570 higher destination register. */
22571 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22572 reg_dest + 1, reg_base, reg_offset);
22574 /* Load the lower destination register from the address in
22575 the higher destination register. */
22576 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22577 reg_dest, reg_dest + 1);
22579 /* Load the higher destination register from its own address
22581 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22582 reg_dest + 1, reg_dest + 1);
22586 /* Compute <address> + 4 for the high order load. */
22587 operands[2] = adjust_address (operands[1], SImode, 4);
22589 /* If the computed address is held in the low order register
22590 then load the high order register first, otherwise always
22591 load the low order register first. */
22592 if (REGNO (operands[0]) == REGNO (base))
22594 output_asm_insn ("ldr\t%H0, %2", operands);
22595 output_asm_insn ("ldr\t%0, %1", operands);
22599 output_asm_insn ("ldr\t%0, %1", operands);
22600 output_asm_insn ("ldr\t%H0, %2", operands);
22606 /* With no registers to worry about we can just load the value
22608 operands[2] = adjust_address (operands[1], SImode, 4);
22610 output_asm_insn ("ldr\t%H0, %2", operands);
22611 output_asm_insn ("ldr\t%0, %1", operands);
22615 gcc_unreachable ();
22622 thumb_output_move_mem_multiple (int n, rtx *operands)
22629 if (REGNO (operands[4]) > REGNO (operands[5]))
22632 operands[4] = operands[5];
22635 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22636 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22640 if (REGNO (operands[4]) > REGNO (operands[5]))
22643 operands[4] = operands[5];
22646 if (REGNO (operands[5]) > REGNO (operands[6]))
22649 operands[5] = operands[6];
22652 if (REGNO (operands[4]) > REGNO (operands[5]))
22655 operands[4] = operands[5];
22659 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22660 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22664 gcc_unreachable ();
22670 /* Output a call-via instruction for thumb state. */
22672 thumb_call_via_reg (rtx reg)
22674 int regno = REGNO (reg);
22677 gcc_assert (regno < LR_REGNUM);
22679 /* If we are in the normal text section we can use a single instance
22680 per compilation unit. If we are doing function sections, then we need
22681 an entry per section, since we can't rely on reachability. */
22682 if (in_section == text_section)
22684 thumb_call_reg_needed = 1;
22686 if (thumb_call_via_label[regno] == NULL)
22687 thumb_call_via_label[regno] = gen_label_rtx ();
22688 labelp = thumb_call_via_label + regno;
22692 if (cfun->machine->call_via[regno] == NULL)
22693 cfun->machine->call_via[regno] = gen_label_rtx ();
22694 labelp = cfun->machine->call_via + regno;
22697 output_asm_insn ("bl\t%a0", labelp);
22701 /* Routines for generating rtl. */
22703 thumb_expand_movmemqi (rtx *operands)
22705 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22706 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22707 HOST_WIDE_INT len = INTVAL (operands[2]);
22708 HOST_WIDE_INT offset = 0;
22712 emit_insn (gen_movmem12b (out, in, out, in));
22718 emit_insn (gen_movmem8b (out, in, out, in));
22724 rtx reg = gen_reg_rtx (SImode);
22725 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22726 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22733 rtx reg = gen_reg_rtx (HImode);
22734 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22735 plus_constant (in, offset))));
22736 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22744 rtx reg = gen_reg_rtx (QImode);
22745 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22746 plus_constant (in, offset))));
22747 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22753 thumb_reload_out_hi (rtx *operands)
22755 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22758 /* Handle reading a half-word from memory during reload. */
22760 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22762 gcc_unreachable ();
22765 /* Return the length of a function name prefix
22766 that starts with the character 'c'. */
22768 arm_get_strip_length (int c)
22772 ARM_NAME_ENCODING_LENGTHS
22777 /* Return a pointer to a function's name with any
22778 and all prefix encodings stripped from it. */
22780 arm_strip_name_encoding (const char *name)
22784 while ((skip = arm_get_strip_length (* name)))
22790 /* If there is a '*' anywhere in the name's prefix, then
22791 emit the stripped name verbatim, otherwise prepend an
22792 underscore if leading underscores are being used. */
22794 arm_asm_output_labelref (FILE *stream, const char *name)
22799 while ((skip = arm_get_strip_length (* name)))
22801 verbatim |= (*name == '*');
22806 fputs (name, stream);
22808 asm_fprintf (stream, "%U%s", name);
22812 arm_file_start (void)
22816 if (TARGET_UNIFIED_ASM)
22817 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22821 const char *fpu_name;
22822 if (arm_selected_arch)
22823 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22824 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22825 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22827 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22829 if (TARGET_SOFT_FLOAT)
22832 fpu_name = "softvfp";
22834 fpu_name = "softfpa";
22838 fpu_name = arm_fpu_desc->name;
22839 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22841 if (TARGET_HARD_FLOAT)
22842 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22843 if (TARGET_HARD_FLOAT_ABI)
22844 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22847 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22849 /* Some of these attributes only apply when the corresponding features
22850 are used. However we don't have any easy way of figuring this out.
22851 Conservatively record the setting that would have been used. */
22853 if (flag_rounding_math)
22854 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22856 if (!flag_unsafe_math_optimizations)
22858 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22859 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22861 if (flag_signaling_nans)
22862 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22864 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22865 flag_finite_math_only ? 1 : 3);
22867 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
22868 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
22869 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
22871 /* Tag_ABI_optimization_goals. */
22874 else if (optimize >= 2)
22880 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
22882 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
22884 if (arm_fp16_format)
22885 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
22887 if (arm_lang_output_object_attributes_hook)
22888 arm_lang_output_object_attributes_hook();
22891 default_file_start ();
22895 arm_file_end (void)
22899 if (NEED_INDICATE_EXEC_STACK)
22900 /* Add .note.GNU-stack. */
22901 file_end_indicate_exec_stack ();
22903 if (! thumb_call_reg_needed)
22906 switch_to_section (text_section);
22907 asm_fprintf (asm_out_file, "\t.code 16\n");
22908 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22910 for (regno = 0; regno < LR_REGNUM; regno++)
22912 rtx label = thumb_call_via_label[regno];
22916 targetm.asm_out.internal_label (asm_out_file, "L",
22917 CODE_LABEL_NUMBER (label));
22918 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22924 /* Symbols in the text segment can be accessed without indirecting via the
22925 constant pool; it may take an extra binary operation, but this is still
22926 faster than indirecting via memory. Don't do this when not optimizing,
22927 since we won't be calculating al of the offsets necessary to do this
22931 arm_encode_section_info (tree decl, rtx rtl, int first)
22933 if (optimize > 0 && TREE_CONSTANT (decl))
22934 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22936 default_encode_section_info (decl, rtl, first);
22938 #endif /* !ARM_PE */
22941 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22943 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22944 && !strcmp (prefix, "L"))
22946 arm_ccfsm_state = 0;
22947 arm_target_insn = NULL;
22949 default_internal_label (stream, prefix, labelno);
22952 /* Output code to add DELTA to the first argument, and then jump
22953 to FUNCTION. Used for C++ multiple inheritance. */
22955 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22956 HOST_WIDE_INT delta,
22957 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22960 static int thunk_label = 0;
22963 int mi_delta = delta;
22964 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22966 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22969 mi_delta = - mi_delta;
22973 int labelno = thunk_label++;
22974 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22975 /* Thunks are entered in arm mode when avaiable. */
22976 if (TARGET_THUMB1_ONLY)
22978 /* push r3 so we can use it as a temporary. */
22979 /* TODO: Omit this save if r3 is not used. */
22980 fputs ("\tpush {r3}\n", file);
22981 fputs ("\tldr\tr3, ", file);
22985 fputs ("\tldr\tr12, ", file);
22987 assemble_name (file, label);
22988 fputc ('\n', file);
22991 /* If we are generating PIC, the ldr instruction below loads
22992 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22993 the address of the add + 8, so we have:
22995 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22998 Note that we have "+ 1" because some versions of GNU ld
22999 don't set the low bit of the result for R_ARM_REL32
23000 relocations against thumb function symbols.
23001 On ARMv6M this is +4, not +8. */
23002 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23003 assemble_name (file, labelpc);
23004 fputs (":\n", file);
23005 if (TARGET_THUMB1_ONLY)
23007 /* This is 2 insns after the start of the thunk, so we know it
23008 is 4-byte aligned. */
23009 fputs ("\tadd\tr3, pc, r3\n", file);
23010 fputs ("\tmov r12, r3\n", file);
23013 fputs ("\tadd\tr12, pc, r12\n", file);
23015 else if (TARGET_THUMB1_ONLY)
23016 fputs ("\tmov r12, r3\n", file);
23018 if (TARGET_THUMB1_ONLY)
23020 if (mi_delta > 255)
23022 fputs ("\tldr\tr3, ", file);
23023 assemble_name (file, label);
23024 fputs ("+4\n", file);
23025 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23026 mi_op, this_regno, this_regno);
23028 else if (mi_delta != 0)
23030 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23031 mi_op, this_regno, this_regno,
23037 /* TODO: Use movw/movt for large constants when available. */
23038 while (mi_delta != 0)
23040 if ((mi_delta & (3 << shift)) == 0)
23044 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23045 mi_op, this_regno, this_regno,
23046 mi_delta & (0xff << shift));
23047 mi_delta &= ~(0xff << shift);
23054 if (TARGET_THUMB1_ONLY)
23055 fputs ("\tpop\t{r3}\n", file);
23057 fprintf (file, "\tbx\tr12\n");
23058 ASM_OUTPUT_ALIGN (file, 2);
23059 assemble_name (file, label);
23060 fputs (":\n", file);
23063 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23064 rtx tem = XEXP (DECL_RTL (function), 0);
23065 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23066 tem = gen_rtx_MINUS (GET_MODE (tem),
23068 gen_rtx_SYMBOL_REF (Pmode,
23069 ggc_strdup (labelpc)));
23070 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23073 /* Output ".word .LTHUNKn". */
23074 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23076 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23077 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23081 fputs ("\tb\t", file);
23082 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23083 if (NEED_PLT_RELOC)
23084 fputs ("(PLT)", file);
23085 fputc ('\n', file);
23090 arm_emit_vector_const (FILE *file, rtx x)
23093 const char * pattern;
23095 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23097 switch (GET_MODE (x))
23099 case V2SImode: pattern = "%08x"; break;
23100 case V4HImode: pattern = "%04x"; break;
23101 case V8QImode: pattern = "%02x"; break;
23102 default: gcc_unreachable ();
23105 fprintf (file, "0x");
23106 for (i = CONST_VECTOR_NUNITS (x); i--;)
23110 element = CONST_VECTOR_ELT (x, i);
23111 fprintf (file, pattern, INTVAL (element));
23117 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23118 HFmode constant pool entries are actually loaded with ldr. */
23120 arm_emit_fp16_const (rtx c)
23125 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23126 bits = real_to_target (NULL, &r, HFmode);
23127 if (WORDS_BIG_ENDIAN)
23128 assemble_zeros (2);
23129 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23130 if (!WORDS_BIG_ENDIAN)
23131 assemble_zeros (2);
23135 arm_output_load_gr (rtx *operands)
23142 if (GET_CODE (operands [1]) != MEM
23143 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23144 || GET_CODE (reg = XEXP (sum, 0)) != REG
23145 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23146 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23147 return "wldrw%?\t%0, %1";
23149 /* Fix up an out-of-range load of a GR register. */
23150 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23151 wcgr = operands[0];
23153 output_asm_insn ("ldr%?\t%0, %1", operands);
23155 operands[0] = wcgr;
23157 output_asm_insn ("tmcr%?\t%0, %1", operands);
23158 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23163 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23165 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23166 named arg and all anonymous args onto the stack.
23167 XXX I know the prologue shouldn't be pushing registers, but it is faster
23171 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23172 enum machine_mode mode,
23175 int second_time ATTRIBUTE_UNUSED)
23177 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23180 cfun->machine->uses_anonymous_args = 1;
23181 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23183 nregs = pcum->aapcs_ncrn;
23184 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23188 nregs = pcum->nregs;
23190 if (nregs < NUM_ARG_REGS)
23191 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23194 /* Return nonzero if the CONSUMER instruction (a store) does not need
23195 PRODUCER's value to calculate the address. */
23198 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23200 rtx value = PATTERN (producer);
23201 rtx addr = PATTERN (consumer);
23203 if (GET_CODE (value) == COND_EXEC)
23204 value = COND_EXEC_CODE (value);
23205 if (GET_CODE (value) == PARALLEL)
23206 value = XVECEXP (value, 0, 0);
23207 value = XEXP (value, 0);
23208 if (GET_CODE (addr) == COND_EXEC)
23209 addr = COND_EXEC_CODE (addr);
23210 if (GET_CODE (addr) == PARALLEL)
23211 addr = XVECEXP (addr, 0, 0);
23212 addr = XEXP (addr, 0);
23214 return !reg_overlap_mentioned_p (value, addr);
23217 /* Return nonzero if the CONSUMER instruction (a store) does need
23218 PRODUCER's value to calculate the address. */
23221 arm_early_store_addr_dep (rtx producer, rtx consumer)
23223 return !arm_no_early_store_addr_dep (producer, consumer);
23226 /* Return nonzero if the CONSUMER instruction (a load) does need
23227 PRODUCER's value to calculate the address. */
23230 arm_early_load_addr_dep (rtx producer, rtx consumer)
23232 rtx value = PATTERN (producer);
23233 rtx addr = PATTERN (consumer);
23235 if (GET_CODE (value) == COND_EXEC)
23236 value = COND_EXEC_CODE (value);
23237 if (GET_CODE (value) == PARALLEL)
23238 value = XVECEXP (value, 0, 0);
23239 value = XEXP (value, 0);
23240 if (GET_CODE (addr) == COND_EXEC)
23241 addr = COND_EXEC_CODE (addr);
23242 if (GET_CODE (addr) == PARALLEL)
23243 addr = XVECEXP (addr, 0, 0);
23244 addr = XEXP (addr, 1);
23246 return reg_overlap_mentioned_p (value, addr);
23249 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23250 have an early register shift value or amount dependency on the
23251 result of PRODUCER. */
23254 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23256 rtx value = PATTERN (producer);
23257 rtx op = PATTERN (consumer);
23260 if (GET_CODE (value) == COND_EXEC)
23261 value = COND_EXEC_CODE (value);
23262 if (GET_CODE (value) == PARALLEL)
23263 value = XVECEXP (value, 0, 0);
23264 value = XEXP (value, 0);
23265 if (GET_CODE (op) == COND_EXEC)
23266 op = COND_EXEC_CODE (op);
23267 if (GET_CODE (op) == PARALLEL)
23268 op = XVECEXP (op, 0, 0);
23271 early_op = XEXP (op, 0);
23272 /* This is either an actual independent shift, or a shift applied to
23273 the first operand of another operation. We want the whole shift
23275 if (GET_CODE (early_op) == REG)
23278 return !reg_overlap_mentioned_p (value, early_op);
23281 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23282 have an early register shift value dependency on the result of
23286 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23288 rtx value = PATTERN (producer);
23289 rtx op = PATTERN (consumer);
23292 if (GET_CODE (value) == COND_EXEC)
23293 value = COND_EXEC_CODE (value);
23294 if (GET_CODE (value) == PARALLEL)
23295 value = XVECEXP (value, 0, 0);
23296 value = XEXP (value, 0);
23297 if (GET_CODE (op) == COND_EXEC)
23298 op = COND_EXEC_CODE (op);
23299 if (GET_CODE (op) == PARALLEL)
23300 op = XVECEXP (op, 0, 0);
23303 early_op = XEXP (op, 0);
23305 /* This is either an actual independent shift, or a shift applied to
23306 the first operand of another operation. We want the value being
23307 shifted, in either case. */
23308 if (GET_CODE (early_op) != REG)
23309 early_op = XEXP (early_op, 0);
23311 return !reg_overlap_mentioned_p (value, early_op);
23314 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23315 have an early register mult dependency on the result of
23319 arm_no_early_mul_dep (rtx producer, rtx consumer)
23321 rtx value = PATTERN (producer);
23322 rtx op = PATTERN (consumer);
23324 if (GET_CODE (value) == COND_EXEC)
23325 value = COND_EXEC_CODE (value);
23326 if (GET_CODE (value) == PARALLEL)
23327 value = XVECEXP (value, 0, 0);
23328 value = XEXP (value, 0);
23329 if (GET_CODE (op) == COND_EXEC)
23330 op = COND_EXEC_CODE (op);
23331 if (GET_CODE (op) == PARALLEL)
23332 op = XVECEXP (op, 0, 0);
23335 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23337 if (GET_CODE (XEXP (op, 0)) == MULT)
23338 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23340 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23346 /* We can't rely on the caller doing the proper promotion when
23347 using APCS or ATPCS. */
23350 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23352 return !TARGET_AAPCS_BASED;
23355 static enum machine_mode
23356 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23357 enum machine_mode mode,
23358 int *punsignedp ATTRIBUTE_UNUSED,
23359 const_tree fntype ATTRIBUTE_UNUSED,
23360 int for_return ATTRIBUTE_UNUSED)
23362 if (GET_MODE_CLASS (mode) == MODE_INT
23363 && GET_MODE_SIZE (mode) < 4)
23369 /* AAPCS based ABIs use short enums by default. */
23372 arm_default_short_enums (void)
23374 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23378 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23381 arm_align_anon_bitfield (void)
23383 return TARGET_AAPCS_BASED;
23387 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23390 arm_cxx_guard_type (void)
23392 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23395 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23396 has an accumulator dependency on the result of the producer (a
23397 multiplication instruction) and no other dependency on that result. */
23399 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23401 rtx mul = PATTERN (producer);
23402 rtx mac = PATTERN (consumer);
23404 rtx mac_op0, mac_op1, mac_acc;
23406 if (GET_CODE (mul) == COND_EXEC)
23407 mul = COND_EXEC_CODE (mul);
23408 if (GET_CODE (mac) == COND_EXEC)
23409 mac = COND_EXEC_CODE (mac);
23411 /* Check that mul is of the form (set (...) (mult ...))
23412 and mla is of the form (set (...) (plus (mult ...) (...))). */
23413 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23414 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23415 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23418 mul_result = XEXP (mul, 0);
23419 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23420 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23421 mac_acc = XEXP (XEXP (mac, 1), 1);
23423 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23424 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23425 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23429 /* The EABI says test the least significant bit of a guard variable. */
23432 arm_cxx_guard_mask_bit (void)
23434 return TARGET_AAPCS_BASED;
23438 /* The EABI specifies that all array cookies are 8 bytes long. */
23441 arm_get_cookie_size (tree type)
23445 if (!TARGET_AAPCS_BASED)
23446 return default_cxx_get_cookie_size (type);
23448 size = build_int_cst (sizetype, 8);
23453 /* The EABI says that array cookies should also contain the element size. */
23456 arm_cookie_has_size (void)
23458 return TARGET_AAPCS_BASED;
23462 /* The EABI says constructors and destructors should return a pointer to
23463 the object constructed/destroyed. */
23466 arm_cxx_cdtor_returns_this (void)
23468 return TARGET_AAPCS_BASED;
23471 /* The EABI says that an inline function may never be the key
23475 arm_cxx_key_method_may_be_inline (void)
23477 return !TARGET_AAPCS_BASED;
23481 arm_cxx_determine_class_data_visibility (tree decl)
23483 if (!TARGET_AAPCS_BASED
23484 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23487 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23488 is exported. However, on systems without dynamic vague linkage,
23489 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23490 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23491 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23493 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23494 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23498 arm_cxx_class_data_always_comdat (void)
23500 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23501 vague linkage if the class has no key function. */
23502 return !TARGET_AAPCS_BASED;
23506 /* The EABI says __aeabi_atexit should be used to register static
23510 arm_cxx_use_aeabi_atexit (void)
23512 return TARGET_AAPCS_BASED;
23517 arm_set_return_address (rtx source, rtx scratch)
23519 arm_stack_offsets *offsets;
23520 HOST_WIDE_INT delta;
23522 unsigned long saved_regs;
23524 offsets = arm_get_frame_offsets ();
23525 saved_regs = offsets->saved_regs_mask;
23527 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23528 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23531 if (frame_pointer_needed)
23532 addr = plus_constant(hard_frame_pointer_rtx, -4);
23535 /* LR will be the first saved register. */
23536 delta = offsets->outgoing_args - (offsets->frame + 4);
23541 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23542 GEN_INT (delta & ~4095)));
23547 addr = stack_pointer_rtx;
23549 addr = plus_constant (addr, delta);
23551 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23557 thumb_set_return_address (rtx source, rtx scratch)
23559 arm_stack_offsets *offsets;
23560 HOST_WIDE_INT delta;
23561 HOST_WIDE_INT limit;
23564 unsigned long mask;
23568 offsets = arm_get_frame_offsets ();
23569 mask = offsets->saved_regs_mask;
23570 if (mask & (1 << LR_REGNUM))
23573 /* Find the saved regs. */
23574 if (frame_pointer_needed)
23576 delta = offsets->soft_frame - offsets->saved_args;
23577 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23583 delta = offsets->outgoing_args - offsets->saved_args;
23586 /* Allow for the stack frame. */
23587 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23589 /* The link register is always the first saved register. */
23592 /* Construct the address. */
23593 addr = gen_rtx_REG (SImode, reg);
23596 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23597 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23601 addr = plus_constant (addr, delta);
23603 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23606 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23609 /* Implements target hook vector_mode_supported_p. */
23611 arm_vector_mode_supported_p (enum machine_mode mode)
23613 /* Neon also supports V2SImode, etc. listed in the clause below. */
23614 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23615 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23618 if ((TARGET_NEON || TARGET_IWMMXT)
23619 && ((mode == V2SImode)
23620 || (mode == V4HImode)
23621 || (mode == V8QImode)))
23624 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23625 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23626 || mode == V2HAmode))
23632 /* Implements target hook array_mode_supported_p. */
23635 arm_array_mode_supported_p (enum machine_mode mode,
23636 unsigned HOST_WIDE_INT nelems)
23639 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23640 && (nelems >= 2 && nelems <= 4))
23646 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23647 registers when autovectorizing for Neon, at least until multiple vector
23648 widths are supported properly by the middle-end. */
23650 static enum machine_mode
23651 arm_preferred_simd_mode (enum machine_mode mode)
23657 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23659 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23661 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23663 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23665 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23672 if (TARGET_REALLY_IWMMXT)
23688 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23690 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23691 using r0-r4 for function arguments, r7 for the stack frame and don't have
23692 enough left over to do doubleword arithmetic. For Thumb-2 all the
23693 potentially problematic instructions accept high registers so this is not
23694 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23695 that require many low registers. */
23697 arm_class_likely_spilled_p (reg_class_t rclass)
23699 if ((TARGET_THUMB1 && rclass == LO_REGS)
23700 || rclass == CC_REG)
23706 /* Implements target hook small_register_classes_for_mode_p. */
23708 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23710 return TARGET_THUMB1;
23713 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23714 ARM insns and therefore guarantee that the shift count is modulo 256.
23715 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23716 guarantee no particular behavior for out-of-range counts. */
23718 static unsigned HOST_WIDE_INT
23719 arm_shift_truncation_mask (enum machine_mode mode)
23721 return mode == SImode ? 255 : 0;
23725 /* Map internal gcc register numbers to DWARF2 register numbers. */
23728 arm_dbx_register_number (unsigned int regno)
23733 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23734 compatibility. The EABI defines them as registers 96-103. */
23735 if (IS_FPA_REGNUM (regno))
23736 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23738 if (IS_VFP_REGNUM (regno))
23740 /* See comment in arm_dwarf_register_span. */
23741 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23742 return 64 + regno - FIRST_VFP_REGNUM;
23744 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23747 if (IS_IWMMXT_GR_REGNUM (regno))
23748 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23750 if (IS_IWMMXT_REGNUM (regno))
23751 return 112 + regno - FIRST_IWMMXT_REGNUM;
23753 gcc_unreachable ();
23756 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23757 GCC models tham as 64 32-bit registers, so we need to describe this to
23758 the DWARF generation code. Other registers can use the default. */
23760 arm_dwarf_register_span (rtx rtl)
23767 regno = REGNO (rtl);
23768 if (!IS_VFP_REGNUM (regno))
23771 /* XXX FIXME: The EABI defines two VFP register ranges:
23772 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23774 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23775 corresponding D register. Until GDB supports this, we shall use the
23776 legacy encodings. We also use these encodings for D0-D15 for
23777 compatibility with older debuggers. */
23778 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23781 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23782 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23783 regno = (regno - FIRST_VFP_REGNUM) / 2;
23784 for (i = 0; i < nregs; i++)
23785 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23790 #if ARM_UNWIND_INFO
23791 /* Emit unwind directives for a store-multiple instruction or stack pointer
23792 push during alignment.
23793 These should only ever be generated by the function prologue code, so
23794 expect them to have a particular form. */
23797 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23800 HOST_WIDE_INT offset;
23801 HOST_WIDE_INT nregs;
23807 e = XVECEXP (p, 0, 0);
23808 if (GET_CODE (e) != SET)
23811 /* First insn will adjust the stack pointer. */
23812 if (GET_CODE (e) != SET
23813 || GET_CODE (XEXP (e, 0)) != REG
23814 || REGNO (XEXP (e, 0)) != SP_REGNUM
23815 || GET_CODE (XEXP (e, 1)) != PLUS)
23818 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23819 nregs = XVECLEN (p, 0) - 1;
23821 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23824 /* The function prologue may also push pc, but not annotate it as it is
23825 never restored. We turn this into a stack pointer adjustment. */
23826 if (nregs * 4 == offset - 4)
23828 fprintf (asm_out_file, "\t.pad #4\n");
23832 fprintf (asm_out_file, "\t.save {");
23834 else if (IS_VFP_REGNUM (reg))
23837 fprintf (asm_out_file, "\t.vsave {");
23839 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23841 /* FPA registers are done differently. */
23842 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23846 /* Unknown register type. */
23849 /* If the stack increment doesn't match the size of the saved registers,
23850 something has gone horribly wrong. */
23851 if (offset != nregs * reg_size)
23856 /* The remaining insns will describe the stores. */
23857 for (i = 1; i <= nregs; i++)
23859 /* Expect (set (mem <addr>) (reg)).
23860 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23861 e = XVECEXP (p, 0, i);
23862 if (GET_CODE (e) != SET
23863 || GET_CODE (XEXP (e, 0)) != MEM
23864 || GET_CODE (XEXP (e, 1)) != REG)
23867 reg = REGNO (XEXP (e, 1));
23872 fprintf (asm_out_file, ", ");
23873 /* We can't use %r for vfp because we need to use the
23874 double precision register names. */
23875 if (IS_VFP_REGNUM (reg))
23876 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23878 asm_fprintf (asm_out_file, "%r", reg);
23880 #ifdef ENABLE_CHECKING
23881 /* Check that the addresses are consecutive. */
23882 e = XEXP (XEXP (e, 0), 0);
23883 if (GET_CODE (e) == PLUS)
23885 offset += reg_size;
23886 if (GET_CODE (XEXP (e, 0)) != REG
23887 || REGNO (XEXP (e, 0)) != SP_REGNUM
23888 || GET_CODE (XEXP (e, 1)) != CONST_INT
23889 || offset != INTVAL (XEXP (e, 1)))
23893 || GET_CODE (e) != REG
23894 || REGNO (e) != SP_REGNUM)
23898 fprintf (asm_out_file, "}\n");
23901 /* Emit unwind directives for a SET. */
23904 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23912 switch (GET_CODE (e0))
23915 /* Pushing a single register. */
23916 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23917 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23918 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23921 asm_fprintf (asm_out_file, "\t.save ");
23922 if (IS_VFP_REGNUM (REGNO (e1)))
23923 asm_fprintf(asm_out_file, "{d%d}\n",
23924 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23926 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23930 if (REGNO (e0) == SP_REGNUM)
23932 /* A stack increment. */
23933 if (GET_CODE (e1) != PLUS
23934 || GET_CODE (XEXP (e1, 0)) != REG
23935 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23936 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23939 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23940 -INTVAL (XEXP (e1, 1)));
23942 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23944 HOST_WIDE_INT offset;
23946 if (GET_CODE (e1) == PLUS)
23948 if (GET_CODE (XEXP (e1, 0)) != REG
23949 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23951 reg = REGNO (XEXP (e1, 0));
23952 offset = INTVAL (XEXP (e1, 1));
23953 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23954 HARD_FRAME_POINTER_REGNUM, reg,
23957 else if (GET_CODE (e1) == REG)
23960 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23961 HARD_FRAME_POINTER_REGNUM, reg);
23966 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23968 /* Move from sp to reg. */
23969 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23971 else if (GET_CODE (e1) == PLUS
23972 && GET_CODE (XEXP (e1, 0)) == REG
23973 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23974 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23976 /* Set reg to offset from sp. */
23977 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23978 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23990 /* Emit unwind directives for the given insn. */
23993 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23996 bool handled_one = false;
23998 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24001 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24002 && (TREE_NOTHROW (current_function_decl)
24003 || crtl->all_throwers_are_sibcalls))
24006 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24009 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24011 pat = XEXP (note, 0);
24012 switch (REG_NOTE_KIND (note))
24014 case REG_FRAME_RELATED_EXPR:
24017 case REG_CFA_REGISTER:
24020 pat = PATTERN (insn);
24021 if (GET_CODE (pat) == PARALLEL)
24022 pat = XVECEXP (pat, 0, 0);
24025 /* Only emitted for IS_STACKALIGN re-alignment. */
24030 src = SET_SRC (pat);
24031 dest = SET_DEST (pat);
24033 gcc_assert (src == stack_pointer_rtx);
24034 reg = REGNO (dest);
24035 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24038 handled_one = true;
24041 case REG_CFA_DEF_CFA:
24042 case REG_CFA_EXPRESSION:
24043 case REG_CFA_ADJUST_CFA:
24044 case REG_CFA_OFFSET:
24045 /* ??? Only handling here what we actually emit. */
24046 gcc_unreachable ();
24054 pat = PATTERN (insn);
24057 switch (GET_CODE (pat))
24060 arm_unwind_emit_set (asm_out_file, pat);
24064 /* Store multiple. */
24065 arm_unwind_emit_sequence (asm_out_file, pat);
24074 /* Output a reference from a function exception table to the type_info
24075 object X. The EABI specifies that the symbol should be relocated by
24076 an R_ARM_TARGET2 relocation. */
24079 arm_output_ttype (rtx x)
24081 fputs ("\t.word\t", asm_out_file);
24082 output_addr_const (asm_out_file, x);
24083 /* Use special relocations for symbol references. */
24084 if (GET_CODE (x) != CONST_INT)
24085 fputs ("(TARGET2)", asm_out_file);
24086 fputc ('\n', asm_out_file);
24091 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24094 arm_asm_emit_except_personality (rtx personality)
24096 fputs ("\t.personality\t", asm_out_file);
24097 output_addr_const (asm_out_file, personality);
24098 fputc ('\n', asm_out_file);
24101 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24104 arm_asm_init_sections (void)
24106 exception_section = get_unnamed_section (0, output_section_asm_op,
24109 #endif /* ARM_UNWIND_INFO */
24111 /* Output unwind directives for the start/end of a function. */
24114 arm_output_fn_unwind (FILE * f, bool prologue)
24116 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24120 fputs ("\t.fnstart\n", f);
24123 /* If this function will never be unwound, then mark it as such.
24124 The came condition is used in arm_unwind_emit to suppress
24125 the frame annotations. */
24126 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24127 && (TREE_NOTHROW (current_function_decl)
24128 || crtl->all_throwers_are_sibcalls))
24129 fputs("\t.cantunwind\n", f);
24131 fputs ("\t.fnend\n", f);
24136 arm_emit_tls_decoration (FILE *fp, rtx x)
24138 enum tls_reloc reloc;
24141 val = XVECEXP (x, 0, 0);
24142 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24144 output_addr_const (fp, val);
24149 fputs ("(tlsgd)", fp);
24152 fputs ("(tlsldm)", fp);
24155 fputs ("(tlsldo)", fp);
24158 fputs ("(gottpoff)", fp);
24161 fputs ("(tpoff)", fp);
24164 fputs ("(tlsdesc)", fp);
24167 gcc_unreachable ();
24176 fputs (" + (. - ", fp);
24177 output_addr_const (fp, XVECEXP (x, 0, 2));
24178 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24179 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24180 output_addr_const (fp, XVECEXP (x, 0, 3));
24190 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24193 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24195 gcc_assert (size == 4);
24196 fputs ("\t.word\t", file);
24197 output_addr_const (file, x);
24198 fputs ("(tlsldo)", file);
24201 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24204 arm_output_addr_const_extra (FILE *fp, rtx x)
24206 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24207 return arm_emit_tls_decoration (fp, x);
24208 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24211 int labelno = INTVAL (XVECEXP (x, 0, 0));
24213 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24214 assemble_name_raw (fp, label);
24218 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24220 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24224 output_addr_const (fp, XVECEXP (x, 0, 0));
24228 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24230 output_addr_const (fp, XVECEXP (x, 0, 0));
24234 output_addr_const (fp, XVECEXP (x, 0, 1));
24238 else if (GET_CODE (x) == CONST_VECTOR)
24239 return arm_emit_vector_const (fp, x);
24244 /* Output assembly for a shift instruction.
24245 SET_FLAGS determines how the instruction modifies the condition codes.
24246 0 - Do not set condition codes.
24247 1 - Set condition codes.
24248 2 - Use smallest instruction. */
24250 arm_output_shift(rtx * operands, int set_flags)
24253 static const char flag_chars[3] = {'?', '.', '!'};
24258 c = flag_chars[set_flags];
24259 if (TARGET_UNIFIED_ASM)
24261 shift = shift_op(operands[3], &val);
24265 operands[2] = GEN_INT(val);
24266 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24269 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24272 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24273 output_asm_insn (pattern, operands);
24277 /* Output a Thumb-1 casesi dispatch sequence. */
24279 thumb1_output_casesi (rtx *operands)
24281 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24283 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24285 switch (GET_MODE(diff_vec))
24288 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24289 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24291 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24292 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24294 return "bl\t%___gnu_thumb1_case_si";
24296 gcc_unreachable ();
24300 /* Output a Thumb-2 casesi instruction. */
24302 thumb2_output_casesi (rtx *operands)
24304 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24306 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24308 output_asm_insn ("cmp\t%0, %1", operands);
24309 output_asm_insn ("bhi\t%l3", operands);
24310 switch (GET_MODE(diff_vec))
24313 return "tbb\t[%|pc, %0]";
24315 return "tbh\t[%|pc, %0, lsl #1]";
24319 output_asm_insn ("adr\t%4, %l2", operands);
24320 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24321 output_asm_insn ("add\t%4, %4, %5", operands);
24326 output_asm_insn ("adr\t%4, %l2", operands);
24327 return "ldr\t%|pc, [%4, %0, lsl #2]";
24330 gcc_unreachable ();
24334 /* Most ARM cores are single issue, but some newer ones can dual issue.
24335 The scheduler descriptions rely on this being correct. */
24337 arm_issue_rate (void)
24359 /* A table and a function to perform ARM-specific name mangling for
24360 NEON vector types in order to conform to the AAPCS (see "Procedure
24361 Call Standard for the ARM Architecture", Appendix A). To qualify
24362 for emission with the mangled names defined in that document, a
24363 vector type must not only be of the correct mode but also be
24364 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24367 enum machine_mode mode;
24368 const char *element_type_name;
24369 const char *aapcs_name;
24370 } arm_mangle_map_entry;
24372 static arm_mangle_map_entry arm_mangle_map[] = {
24373 /* 64-bit containerized types. */
24374 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24375 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24376 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24377 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24378 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24379 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24380 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24381 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24382 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24383 /* 128-bit containerized types. */
24384 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24385 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24386 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24387 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24388 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24389 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24390 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24391 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24392 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24393 { VOIDmode, NULL, NULL }
24397 arm_mangle_type (const_tree type)
24399 arm_mangle_map_entry *pos = arm_mangle_map;
24401 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24402 has to be managled as if it is in the "std" namespace. */
24403 if (TARGET_AAPCS_BASED
24404 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24406 static bool warned;
24407 if (!warned && warn_psabi && !in_system_header)
24410 inform (input_location,
24411 "the mangling of %<va_list%> has changed in GCC 4.4");
24413 return "St9__va_list";
24416 /* Half-precision float. */
24417 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24420 if (TREE_CODE (type) != VECTOR_TYPE)
24423 /* Check the mode of the vector type, and the name of the vector
24424 element type, against the table. */
24425 while (pos->mode != VOIDmode)
24427 tree elt_type = TREE_TYPE (type);
24429 if (pos->mode == TYPE_MODE (type)
24430 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24431 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24432 pos->element_type_name))
24433 return pos->aapcs_name;
24438 /* Use the default mangling for unrecognized (possibly user-defined)
24443 /* Order of allocation of core registers for Thumb: this allocation is
24444 written over the corresponding initial entries of the array
24445 initialized with REG_ALLOC_ORDER. We allocate all low registers
24446 first. Saving and restoring a low register is usually cheaper than
24447 using a call-clobbered high register. */
24449 static const int thumb_core_reg_alloc_order[] =
24451 3, 2, 1, 0, 4, 5, 6, 7,
24452 14, 12, 8, 9, 10, 11, 13, 15
24455 /* Adjust register allocation order when compiling for Thumb. */
24458 arm_order_regs_for_local_alloc (void)
24460 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24461 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24463 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24464 sizeof (thumb_core_reg_alloc_order));
24467 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24470 arm_frame_pointer_required (void)
24472 return (cfun->has_nonlocal_label
24473 || SUBTARGET_FRAME_POINTER_REQUIRED
24474 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24477 /* Only thumb1 can't support conditional execution, so return true if
24478 the target is not thumb1. */
24480 arm_have_conditional_execution (void)
24482 return !TARGET_THUMB1;
24485 static unsigned int
24486 arm_autovectorize_vector_sizes (void)
24488 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24492 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24494 /* Vectors which aren't in packed structures will not be less aligned than
24495 the natural alignment of their element type, so this is safe. */
24496 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24499 return default_builtin_vector_alignment_reachable (type, is_packed);
24503 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24504 const_tree type, int misalignment,
24507 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24509 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24514 /* If the misalignment is unknown, we should be able to handle the access
24515 so long as it is not to a member of a packed data structure. */
24516 if (misalignment == -1)
24519 /* Return true if the misalignment is a multiple of the natural alignment
24520 of the vector's element type. This is probably always going to be
24521 true in practice, since we've already established that this isn't a
24523 return ((misalignment % align) == 0);
24526 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24531 arm_conditional_register_usage (void)
24535 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24537 for (regno = FIRST_FPA_REGNUM;
24538 regno <= LAST_FPA_REGNUM; ++regno)
24539 fixed_regs[regno] = call_used_regs[regno] = 1;
24542 if (TARGET_THUMB1 && optimize_size)
24544 /* When optimizing for size on Thumb-1, it's better not
24545 to use the HI regs, because of the overhead of
24547 for (regno = FIRST_HI_REGNUM;
24548 regno <= LAST_HI_REGNUM; ++regno)
24549 fixed_regs[regno] = call_used_regs[regno] = 1;
24552 /* The link register can be clobbered by any branch insn,
24553 but we have no way to track that at present, so mark
24554 it as unavailable. */
24556 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24558 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24560 if (TARGET_MAVERICK)
24562 for (regno = FIRST_FPA_REGNUM;
24563 regno <= LAST_FPA_REGNUM; ++ regno)
24564 fixed_regs[regno] = call_used_regs[regno] = 1;
24565 for (regno = FIRST_CIRRUS_FP_REGNUM;
24566 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24568 fixed_regs[regno] = 0;
24569 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24574 /* VFPv3 registers are disabled when earlier VFP
24575 versions are selected due to the definition of
24576 LAST_VFP_REGNUM. */
24577 for (regno = FIRST_VFP_REGNUM;
24578 regno <= LAST_VFP_REGNUM; ++ regno)
24580 fixed_regs[regno] = 0;
24581 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24582 || regno >= FIRST_VFP_REGNUM + 32;
24587 if (TARGET_REALLY_IWMMXT)
24589 regno = FIRST_IWMMXT_GR_REGNUM;
24590 /* The 2002/10/09 revision of the XScale ABI has wCG0
24591 and wCG1 as call-preserved registers. The 2002/11/21
24592 revision changed this so that all wCG registers are
24593 scratch registers. */
24594 for (regno = FIRST_IWMMXT_GR_REGNUM;
24595 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24596 fixed_regs[regno] = 0;
24597 /* The XScale ABI has wR0 - wR9 as scratch registers,
24598 the rest as call-preserved registers. */
24599 for (regno = FIRST_IWMMXT_REGNUM;
24600 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24602 fixed_regs[regno] = 0;
24603 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24607 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24609 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24610 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24612 else if (TARGET_APCS_STACK)
24614 fixed_regs[10] = 1;
24615 call_used_regs[10] = 1;
24617 /* -mcaller-super-interworking reserves r11 for calls to
24618 _interwork_r11_call_via_rN(). Making the register global
24619 is an easy way of ensuring that it remains valid for all
24621 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24622 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24624 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24625 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24626 if (TARGET_CALLER_INTERWORKING)
24627 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24629 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24633 arm_preferred_rename_class (reg_class_t rclass)
24635 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24636 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24637 and code size can be reduced. */
24638 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24644 /* Compute the atrribute "length" of insn "*push_multi".
24645 So this function MUST be kept in sync with that insn pattern. */
24647 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24649 int i, regno, hi_reg;
24650 int num_saves = XVECLEN (parallel_op, 0);
24660 regno = REGNO (first_op);
24661 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24662 for (i = 1; i < num_saves && !hi_reg; i++)
24664 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24665 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24673 /* Compute the number of instructions emitted by output_move_double. */
24675 arm_count_output_move_double_insns (rtx *operands)
24679 /* output_move_double may modify the operands array, so call it
24680 here on a copy of the array. */
24681 ops[0] = operands[0];
24682 ops[1] = operands[1];
24683 output_move_double (ops, false, &count);
24688 vfp3_const_double_for_fract_bits (rtx operand)
24690 REAL_VALUE_TYPE r0;
24692 if (GET_CODE (operand) != CONST_DOUBLE)
24695 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
24696 if (exact_real_inverse (DFmode, &r0))
24698 if (exact_real_truncate (DFmode, &r0))
24700 HOST_WIDE_INT value = real_to_integer (&r0);
24701 value = value & 0xffffffff;
24702 if ((value != 0) && ( (value & (value - 1)) == 0))
24703 return int_log2 (value);
24709 /* Emit a memory barrier around an atomic sequence according to MODEL. */
24712 arm_pre_atomic_barrier (enum memmodel model)
24716 case MEMMODEL_RELAXED:
24717 case MEMMODEL_CONSUME:
24718 case MEMMODEL_ACQUIRE:
24720 case MEMMODEL_RELEASE:
24721 case MEMMODEL_ACQ_REL:
24722 case MEMMODEL_SEQ_CST:
24723 emit_insn (gen_memory_barrier ());
24726 gcc_unreachable ();
24731 arm_post_atomic_barrier (enum memmodel model)
24735 case MEMMODEL_RELAXED:
24736 case MEMMODEL_CONSUME:
24737 case MEMMODEL_RELEASE:
24739 case MEMMODEL_ACQUIRE:
24740 case MEMMODEL_ACQ_REL:
24741 case MEMMODEL_SEQ_CST:
24742 emit_insn (gen_memory_barrier ());
24745 gcc_unreachable ();
24749 /* Emit the load-exclusive and store-exclusive instructions. */
24752 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
24754 rtx (*gen) (rtx, rtx);
24758 case QImode: gen = gen_arm_load_exclusiveqi; break;
24759 case HImode: gen = gen_arm_load_exclusivehi; break;
24760 case SImode: gen = gen_arm_load_exclusivesi; break;
24761 case DImode: gen = gen_arm_load_exclusivedi; break;
24763 gcc_unreachable ();
24766 emit_insn (gen (rval, mem));
24770 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
24772 rtx (*gen) (rtx, rtx, rtx);
24776 case QImode: gen = gen_arm_store_exclusiveqi; break;
24777 case HImode: gen = gen_arm_store_exclusivehi; break;
24778 case SImode: gen = gen_arm_store_exclusivesi; break;
24779 case DImode: gen = gen_arm_store_exclusivedi; break;
24781 gcc_unreachable ();
24784 emit_insn (gen (bval, rval, mem));
24787 /* Mark the previous jump instruction as unlikely. */
24790 emit_unlikely_jump (rtx insn)
24792 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
24794 insn = emit_jump_insn (insn);
24795 add_reg_note (insn, REG_BR_PROB, very_unlikely);
24798 /* Expand a compare and swap pattern. */
24801 arm_expand_compare_and_swap (rtx operands[])
24803 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
24804 enum machine_mode mode;
24805 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
24807 bval = operands[0];
24808 rval = operands[1];
24810 oldval = operands[3];
24811 newval = operands[4];
24812 is_weak = operands[5];
24813 mod_s = operands[6];
24814 mod_f = operands[7];
24815 mode = GET_MODE (mem);
24821 /* For narrow modes, we're going to perform the comparison in SImode,
24822 so do the zero-extension now. */
24823 rval = gen_reg_rtx (SImode);
24824 oldval = convert_modes (SImode, mode, oldval, true);
24828 /* Force the value into a register if needed. We waited until after
24829 the zero-extension above to do this properly. */
24830 if (!arm_add_operand (oldval, mode))
24831 oldval = force_reg (mode, oldval);
24835 if (!cmpdi_operand (oldval, mode))
24836 oldval = force_reg (mode, oldval);
24840 gcc_unreachable ();
24845 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
24846 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
24847 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
24848 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
24850 gcc_unreachable ();
24853 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
24855 if (mode == QImode || mode == HImode)
24856 emit_move_insn (operands[1], gen_lowpart (mode, rval));
24858 /* In all cases, we arrange for success to be signaled by Z set.
24859 This arrangement allows for the boolean result to be used directly
24860 in a subsequent branch, post optimization. */
24861 x = gen_rtx_REG (CCmode, CC_REGNUM);
24862 x = gen_rtx_EQ (SImode, x, const0_rtx);
24863 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
24866 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
24867 another memory store between the load-exclusive and store-exclusive can
24868 reset the monitor from Exclusive to Open state. This means we must wait
24869 until after reload to split the pattern, lest we get a register spill in
24870 the middle of the atomic sequence. */
24873 arm_split_compare_and_swap (rtx operands[])
24875 rtx rval, mem, oldval, newval, scratch;
24876 enum machine_mode mode;
24877 enum memmodel mod_s, mod_f;
24879 rtx label1, label2, x, cond;
24881 rval = operands[0];
24883 oldval = operands[2];
24884 newval = operands[3];
24885 is_weak = (operands[4] != const0_rtx);
24886 mod_s = (enum memmodel) INTVAL (operands[5]);
24887 mod_f = (enum memmodel) INTVAL (operands[6]);
24888 scratch = operands[7];
24889 mode = GET_MODE (mem);
24891 arm_pre_atomic_barrier (mod_s);
24896 label1 = gen_label_rtx ();
24897 emit_label (label1);
24899 label2 = gen_label_rtx ();
24901 arm_emit_load_exclusive (mode, rval, mem);
24903 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
24904 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24905 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24906 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
24907 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24909 arm_emit_store_exclusive (mode, scratch, mem, newval);
24911 /* Weak or strong, we want EQ to be true for success, so that we
24912 match the flags that we got from the compare above. */
24913 cond = gen_rtx_REG (CCmode, CC_REGNUM);
24914 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
24915 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
24919 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24920 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
24921 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
24922 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
24925 if (mod_f != MEMMODEL_RELAXED)
24926 emit_label (label2);
24928 arm_post_atomic_barrier (mod_s);
24930 if (mod_f == MEMMODEL_RELAXED)
24931 emit_label (label2);
24935 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
24936 rtx value, rtx model_rtx, rtx cond)
24938 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
24939 enum machine_mode mode = GET_MODE (mem);
24940 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
24943 arm_pre_atomic_barrier (model);
24945 label = gen_label_rtx ();
24946 emit_label (label);
24949 new_out = gen_lowpart (wmode, new_out);
24951 old_out = gen_lowpart (wmode, old_out);
24954 value = simplify_gen_subreg (wmode, value, mode, 0);
24956 arm_emit_load_exclusive (mode, old_out, mem);
24965 x = gen_rtx_AND (wmode, old_out, value);
24966 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24967 x = gen_rtx_NOT (wmode, new_out);
24968 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
24972 if (CONST_INT_P (value))
24974 value = GEN_INT (-INTVAL (value));
24980 if (mode == DImode)
24982 /* DImode plus/minus need to clobber flags. */
24983 /* The adddi3 and subdi3 patterns are incorrectly written so that
24984 they require matching operands, even when we could easily support
24985 three operands. Thankfully, this can be fixed up post-splitting,
24986 as the individual add+adc patterns do accept three operands and
24987 post-reload cprop can make these moves go away. */
24988 emit_move_insn (new_out, old_out);
24990 x = gen_adddi3 (new_out, new_out, value);
24992 x = gen_subdi3 (new_out, new_out, value);
24999 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25000 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25004 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25006 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25007 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25009 arm_post_atomic_barrier (model);
25012 #define MAX_VECT_LEN 16
25014 struct expand_vec_perm_d
25016 rtx target, op0, op1;
25017 unsigned char perm[MAX_VECT_LEN];
25018 enum machine_mode vmode;
25019 unsigned char nelt;
25024 /* Generate a variable permutation. */
25027 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25029 enum machine_mode vmode = GET_MODE (target);
25030 bool one_vector_p = rtx_equal_p (op0, op1);
25032 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25033 gcc_checking_assert (GET_MODE (op0) == vmode);
25034 gcc_checking_assert (GET_MODE (op1) == vmode);
25035 gcc_checking_assert (GET_MODE (sel) == vmode);
25036 gcc_checking_assert (TARGET_NEON);
25040 if (vmode == V8QImode)
25041 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25043 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25049 if (vmode == V8QImode)
25051 pair = gen_reg_rtx (V16QImode);
25052 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25053 pair = gen_lowpart (TImode, pair);
25054 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25058 pair = gen_reg_rtx (OImode);
25059 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25060 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25066 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25068 enum machine_mode vmode = GET_MODE (target);
25069 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25070 bool one_vector_p = rtx_equal_p (op0, op1);
25071 rtx rmask[MAX_VECT_LEN], mask;
25073 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25074 numbering of elements for big-endian, we must reverse the order. */
25075 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25077 /* The VTBL instruction does not use a modulo index, so we must take care
25078 of that ourselves. */
25079 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25080 for (i = 0; i < nelt; ++i)
25082 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25083 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25085 arm_expand_vec_perm_1 (target, op0, op1, sel);
25088 /* Generate or test for an insn that supports a constant permutation. */
25090 /* Recognize patterns for the VUZP insns. */
25093 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25095 unsigned int i, odd, mask, nelt = d->nelt;
25096 rtx out0, out1, in0, in1, x;
25097 rtx (*gen)(rtx, rtx, rtx, rtx);
25099 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25102 /* Note that these are little-endian tests. Adjust for big-endian later. */
25103 if (d->perm[0] == 0)
25105 else if (d->perm[0] == 1)
25109 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25111 for (i = 0; i < nelt; i++)
25113 unsigned elt = (i * 2 + odd) & mask;
25114 if (d->perm[i] != elt)
25124 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25125 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25126 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25127 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25128 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25129 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25130 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25131 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25133 gcc_unreachable ();
25138 if (BYTES_BIG_ENDIAN)
25140 x = in0, in0 = in1, in1 = x;
25145 out1 = gen_reg_rtx (d->vmode);
25147 x = out0, out0 = out1, out1 = x;
25149 emit_insn (gen (out0, in0, in1, out1));
25153 /* Recognize patterns for the VZIP insns. */
25156 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25158 unsigned int i, high, mask, nelt = d->nelt;
25159 rtx out0, out1, in0, in1, x;
25160 rtx (*gen)(rtx, rtx, rtx, rtx);
25162 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25165 /* Note that these are little-endian tests. Adjust for big-endian later. */
25167 if (d->perm[0] == high)
25169 else if (d->perm[0] == 0)
25173 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25175 for (i = 0; i < nelt / 2; i++)
25177 unsigned elt = (i + high) & mask;
25178 if (d->perm[i * 2] != elt)
25180 elt = (elt + nelt) & mask;
25181 if (d->perm[i * 2 + 1] != elt)
25191 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25192 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25193 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25194 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25195 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25196 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25197 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25198 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25200 gcc_unreachable ();
25205 if (BYTES_BIG_ENDIAN)
25207 x = in0, in0 = in1, in1 = x;
25212 out1 = gen_reg_rtx (d->vmode);
25214 x = out0, out0 = out1, out1 = x;
25216 emit_insn (gen (out0, in0, in1, out1));
25220 /* Recognize patterns for the VREV insns. */
25223 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25225 unsigned int i, j, diff, nelt = d->nelt;
25226 rtx (*gen)(rtx, rtx, rtx);
25228 if (!d->one_vector_p)
25237 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25238 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25246 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25247 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25248 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25249 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25257 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25258 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25259 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25260 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25261 case V4SImode: gen = gen_neon_vrev64v4si; break;
25262 case V2SImode: gen = gen_neon_vrev64v2si; break;
25263 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25264 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25273 for (i = 0; i < nelt; i += diff)
25274 for (j = 0; j <= diff; j += 1)
25275 if (d->perm[i + j] != i + diff - j)
25282 /* ??? The third operand is an artifact of the builtin infrastructure
25283 and is ignored by the actual instruction. */
25284 emit_insn (gen (d->target, d->op0, const0_rtx));
25288 /* Recognize patterns for the VTRN insns. */
25291 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25293 unsigned int i, odd, mask, nelt = d->nelt;
25294 rtx out0, out1, in0, in1, x;
25295 rtx (*gen)(rtx, rtx, rtx, rtx);
25297 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25300 /* Note that these are little-endian tests. Adjust for big-endian later. */
25301 if (d->perm[0] == 0)
25303 else if (d->perm[0] == 1)
25307 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25309 for (i = 0; i < nelt; i += 2)
25311 if (d->perm[i] != i + odd)
25313 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25323 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25324 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25325 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25326 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25327 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25328 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25329 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25330 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25332 gcc_unreachable ();
25337 if (BYTES_BIG_ENDIAN)
25339 x = in0, in0 = in1, in1 = x;
25344 out1 = gen_reg_rtx (d->vmode);
25346 x = out0, out0 = out1, out1 = x;
25348 emit_insn (gen (out0, in0, in1, out1));
25352 /* The NEON VTBL instruction is a fully variable permuation that's even
25353 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25354 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25355 can do slightly better by expanding this as a constant where we don't
25356 have to apply a mask. */
25359 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25361 rtx rperm[MAX_VECT_LEN], sel;
25362 enum machine_mode vmode = d->vmode;
25363 unsigned int i, nelt = d->nelt;
25365 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25366 numbering of elements for big-endian, we must reverse the order. */
25367 if (BYTES_BIG_ENDIAN)
25373 /* Generic code will try constant permutation twice. Once with the
25374 original mode and again with the elements lowered to QImode.
25375 So wait and don't do the selector expansion ourselves. */
25376 if (vmode != V8QImode && vmode != V16QImode)
25379 for (i = 0; i < nelt; ++i)
25380 rperm[i] = GEN_INT (d->perm[i]);
25381 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25382 sel = force_reg (vmode, sel);
25384 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25389 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25391 /* The pattern matching functions above are written to look for a small
25392 number to begin the sequence (0, 1, N/2). If we begin with an index
25393 from the second operand, we can swap the operands. */
25394 if (d->perm[0] >= d->nelt)
25396 unsigned i, nelt = d->nelt;
25399 for (i = 0; i < nelt; ++i)
25400 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25409 if (arm_evpc_neon_vuzp (d))
25411 if (arm_evpc_neon_vzip (d))
25413 if (arm_evpc_neon_vrev (d))
25415 if (arm_evpc_neon_vtrn (d))
25417 return arm_evpc_neon_vtbl (d);
25422 /* Expand a vec_perm_const pattern. */
25425 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25427 struct expand_vec_perm_d d;
25428 int i, nelt, which;
25434 d.vmode = GET_MODE (target);
25435 gcc_assert (VECTOR_MODE_P (d.vmode));
25436 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25437 d.testing_p = false;
25439 for (i = which = 0; i < nelt; ++i)
25441 rtx e = XVECEXP (sel, 0, i);
25442 int ei = INTVAL (e) & (2 * nelt - 1);
25443 which |= (ei < nelt ? 1 : 2);
25453 d.one_vector_p = false;
25454 if (!rtx_equal_p (op0, op1))
25457 /* The elements of PERM do not suggest that only the first operand
25458 is used, but both operands are identical. Allow easier matching
25459 of the permutation by folding the permutation into the single
25463 for (i = 0; i < nelt; ++i)
25464 d.perm[i] &= nelt - 1;
25466 d.one_vector_p = true;
25471 d.one_vector_p = true;
25475 return arm_expand_vec_perm_const_1 (&d);
25478 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25481 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25482 const unsigned char *sel)
25484 struct expand_vec_perm_d d;
25485 unsigned int i, nelt, which;
25489 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25490 d.testing_p = true;
25491 memcpy (d.perm, sel, nelt);
25493 /* Categorize the set of elements in the selector. */
25494 for (i = which = 0; i < nelt; ++i)
25496 unsigned char e = d.perm[i];
25497 gcc_assert (e < 2 * nelt);
25498 which |= (e < nelt ? 1 : 2);
25501 /* For all elements from second vector, fold the elements to first. */
25503 for (i = 0; i < nelt; ++i)
25506 /* Check whether the mask can be applied to the vector type. */
25507 d.one_vector_p = (which != 3);
25509 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25510 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25511 if (!d.one_vector_p)
25512 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25515 ret = arm_expand_vec_perm_const_1 (&d);
25522 #include "gt-arm.h"