1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static bool arm_memory_load_p (rtx);
167 static bool arm_cirrus_insn_p (rtx);
168 static void cirrus_reorg (rtx);
169 static void arm_init_builtins (void);
170 static void arm_init_iwmmxt_builtins (void);
171 static rtx safe_vector_operand (rtx, enum machine_mode);
172 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
173 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
174 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
175 static tree arm_builtin_decl (unsigned, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx emit_set_insn (rtx, rtx);
178 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
180 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
182 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
184 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
187 static int aapcs_select_return_coproc (const_tree, const_tree);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_encode_section_info (tree, rtx, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
216 static rtx arm_dwarf_register_span (rtx);
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
269 /* Table of machine attributes. */
270 static const struct attribute_spec arm_attribute_table[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
273 affects_type_identity } */
274 /* Function calls made to this symbol must be done indirectly, because
275 it may lie outside of the 26 bit addressing range of a normal function
277 { "long_call", 0, 0, false, true, true, NULL, false },
278 /* Whereas these functions are always known to reside within the 26 bit
280 { "short_call", 0, 0, false, true, true, NULL, false },
281 /* Specify the procedure call conventions for a function. */
282 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
284 /* Interrupt Service Routines have special prologue and epilogue requirements. */
285 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
287 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
289 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
292 /* ARM/PE has three new attributes:
294 dllexport - for exporting a function/variable that will live in a dll
295 dllimport - for importing a function/variable from a dll
297 Microsoft allows multiple declspecs in one __declspec, separating
298 them with spaces. We do NOT support this. Instead, use __declspec
301 { "dllimport", 0, 0, true, false, false, NULL, false },
302 { "dllexport", 0, 0, true, false, false, NULL, false },
303 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
305 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
306 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
307 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
308 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
311 { NULL, 0, 0, false, false, false, NULL, false }
314 /* Initialize the GCC target structure. */
315 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
316 #undef TARGET_MERGE_DECL_ATTRIBUTES
317 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
320 #undef TARGET_LEGITIMIZE_ADDRESS
321 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START arm_file_start
328 #undef TARGET_ASM_FILE_END
329 #define TARGET_ASM_FILE_END arm_file_end
331 #undef TARGET_ASM_ALIGNED_SI_OP
332 #define TARGET_ASM_ALIGNED_SI_OP NULL
333 #undef TARGET_ASM_INTEGER
334 #define TARGET_ASM_INTEGER arm_assemble_integer
336 #undef TARGET_PRINT_OPERAND
337 #define TARGET_PRINT_OPERAND arm_print_operand
338 #undef TARGET_PRINT_OPERAND_ADDRESS
339 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
340 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
341 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
343 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
344 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
346 #undef TARGET_ASM_FUNCTION_PROLOGUE
347 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
349 #undef TARGET_ASM_FUNCTION_EPILOGUE
350 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
352 #undef TARGET_OPTION_OVERRIDE
353 #define TARGET_OPTION_OVERRIDE arm_option_override
355 #undef TARGET_COMP_TYPE_ATTRIBUTES
356 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
358 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
359 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
361 #undef TARGET_SCHED_ADJUST_COST
362 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
364 #undef TARGET_ENCODE_SECTION_INFO
366 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
368 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
371 #undef TARGET_STRIP_NAME_ENCODING
372 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
374 #undef TARGET_ASM_INTERNAL_LABEL
375 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
377 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
378 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
380 #undef TARGET_FUNCTION_VALUE
381 #define TARGET_FUNCTION_VALUE arm_function_value
383 #undef TARGET_LIBCALL_VALUE
384 #define TARGET_LIBCALL_VALUE arm_libcall_value
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
391 #undef TARGET_RTX_COSTS
392 #define TARGET_RTX_COSTS arm_rtx_costs
393 #undef TARGET_ADDRESS_COST
394 #define TARGET_ADDRESS_COST arm_address_cost
396 #undef TARGET_SHIFT_TRUNCATION_MASK
397 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
398 #undef TARGET_VECTOR_MODE_SUPPORTED_P
399 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
400 #undef TARGET_ARRAY_MODE_SUPPORTED_P
401 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
402 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
403 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
406 arm_autovectorize_vector_sizes
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
411 #undef TARGET_INIT_BUILTINS
412 #define TARGET_INIT_BUILTINS arm_init_builtins
413 #undef TARGET_EXPAND_BUILTIN
414 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
415 #undef TARGET_BUILTIN_DECL
416 #define TARGET_BUILTIN_DECL arm_builtin_decl
418 #undef TARGET_INIT_LIBFUNCS
419 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
421 #undef TARGET_PROMOTE_FUNCTION_MODE
422 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
423 #undef TARGET_PROMOTE_PROTOTYPES
424 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
425 #undef TARGET_PASS_BY_REFERENCE
426 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
427 #undef TARGET_ARG_PARTIAL_BYTES
428 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
429 #undef TARGET_FUNCTION_ARG
430 #define TARGET_FUNCTION_ARG arm_function_arg
431 #undef TARGET_FUNCTION_ARG_ADVANCE
432 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
433 #undef TARGET_FUNCTION_ARG_BOUNDARY
434 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
436 #undef TARGET_SETUP_INCOMING_VARARGS
437 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
439 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
440 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
442 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
443 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
444 #undef TARGET_TRAMPOLINE_INIT
445 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
446 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
447 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
449 #undef TARGET_DEFAULT_SHORT_ENUMS
450 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
452 #undef TARGET_ALIGN_ANON_BITFIELD
453 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
455 #undef TARGET_NARROW_VOLATILE_BITFIELD
456 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
458 #undef TARGET_CXX_GUARD_TYPE
459 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
461 #undef TARGET_CXX_GUARD_MASK_BIT
462 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
464 #undef TARGET_CXX_GET_COOKIE_SIZE
465 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
467 #undef TARGET_CXX_COOKIE_HAS_SIZE
468 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
470 #undef TARGET_CXX_CDTOR_RETURNS_THIS
471 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
473 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
474 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
476 #undef TARGET_CXX_USE_AEABI_ATEXIT
477 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
479 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
480 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
481 arm_cxx_determine_class_data_visibility
483 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
484 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
486 #undef TARGET_RETURN_IN_MSB
487 #define TARGET_RETURN_IN_MSB arm_return_in_msb
489 #undef TARGET_RETURN_IN_MEMORY
490 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
492 #undef TARGET_MUST_PASS_IN_STACK
493 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
496 #undef TARGET_ASM_UNWIND_EMIT
497 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
499 /* EABI unwinding tables use a different format for the typeinfo tables. */
500 #undef TARGET_ASM_TTYPE
501 #define TARGET_ASM_TTYPE arm_output_ttype
503 #undef TARGET_ARM_EABI_UNWINDER
504 #define TARGET_ARM_EABI_UNWINDER true
506 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
507 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
509 #undef TARGET_ASM_INIT_SECTIONS
510 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
511 #endif /* ARM_UNWIND_INFO */
513 #undef TARGET_DWARF_REGISTER_SPAN
514 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
516 #undef TARGET_CANNOT_COPY_INSN_P
517 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
520 #undef TARGET_HAVE_TLS
521 #define TARGET_HAVE_TLS true
524 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
525 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
527 #undef TARGET_LEGITIMATE_CONSTANT_P
528 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
530 #undef TARGET_CANNOT_FORCE_CONST_MEM
531 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
533 #undef TARGET_MAX_ANCHOR_OFFSET
534 #define TARGET_MAX_ANCHOR_OFFSET 4095
536 /* The minimum is set such that the total size of the block
537 for a particular anchor is -4088 + 1 + 4095 bytes, which is
538 divisible by eight, ensuring natural spacing of anchors. */
539 #undef TARGET_MIN_ANCHOR_OFFSET
540 #define TARGET_MIN_ANCHOR_OFFSET -4088
542 #undef TARGET_SCHED_ISSUE_RATE
543 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
545 #undef TARGET_MANGLE_TYPE
546 #define TARGET_MANGLE_TYPE arm_mangle_type
548 #undef TARGET_BUILD_BUILTIN_VA_LIST
549 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
550 #undef TARGET_EXPAND_BUILTIN_VA_START
551 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
553 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
556 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
557 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
560 #undef TARGET_LEGITIMATE_ADDRESS_P
561 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
563 #undef TARGET_INVALID_PARAMETER_TYPE
564 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
566 #undef TARGET_INVALID_RETURN_TYPE
567 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
569 #undef TARGET_PROMOTED_TYPE
570 #define TARGET_PROMOTED_TYPE arm_promoted_type
572 #undef TARGET_CONVERT_TO_TYPE
573 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
575 #undef TARGET_SCALAR_MODE_SUPPORTED_P
576 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
578 #undef TARGET_FRAME_POINTER_REQUIRED
579 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
581 #undef TARGET_CAN_ELIMINATE
582 #define TARGET_CAN_ELIMINATE arm_can_eliminate
584 #undef TARGET_CONDITIONAL_REGISTER_USAGE
585 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
587 #undef TARGET_CLASS_LIKELY_SPILLED_P
588 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
590 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
591 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
592 arm_vector_alignment_reachable
594 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
595 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
596 arm_builtin_support_vector_misalignment
598 #undef TARGET_PREFERRED_RENAME_CLASS
599 #define TARGET_PREFERRED_RENAME_CLASS \
600 arm_preferred_rename_class
602 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Obstack for minipool constant handling. */
605 static struct obstack minipool_obstack;
606 static char * minipool_startobj;
608 /* The maximum number of insns skipped which
609 will be conditionalised if possible. */
610 static int max_insns_skipped = 5;
612 extern FILE * asm_out_file;
614 /* True if we are currently building a constant table. */
615 int making_const_table;
617 /* The processor for which instructions should be scheduled. */
618 enum processor_type arm_tune = arm_none;
620 /* The current tuning set. */
621 const struct tune_params *current_tune;
623 /* Which floating point hardware to schedule for. */
626 /* Which floating popint hardware to use. */
627 const struct arm_fpu_desc *arm_fpu_desc;
629 /* Used for Thumb call_via trampolines. */
630 rtx thumb_call_via_label[14];
631 static int thumb_call_reg_needed;
633 /* Bit values used to identify processor capabilities. */
634 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
635 #define FL_ARCH3M (1 << 1) /* Extended multiply */
636 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
637 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
638 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
639 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
640 #define FL_THUMB (1 << 6) /* Thumb aware */
641 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
642 #define FL_STRONG (1 << 8) /* StrongARM */
643 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
644 #define FL_XSCALE (1 << 10) /* XScale */
645 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
646 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
647 media instructions. */
648 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
649 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
650 Note: ARM6 & 7 derivatives only. */
651 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
652 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
653 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
655 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
656 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
657 #define FL_NEON (1 << 20) /* Neon instructions. */
658 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
660 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
661 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
663 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
665 /* Flags that only effect tuning, not available instructions. */
666 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
669 #define FL_FOR_ARCH2 FL_NOTM
670 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
671 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
672 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
673 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
674 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
675 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
676 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
677 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
678 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
679 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
680 #define FL_FOR_ARCH6J FL_FOR_ARCH6
681 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
682 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
683 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
684 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
685 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
686 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
687 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
688 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
689 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
690 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
692 /* The bits in this mask specify which
693 instructions we are allowed to generate. */
694 static unsigned long insn_flags = 0;
696 /* The bits in this mask specify which instruction scheduling options should
698 static unsigned long tune_flags = 0;
700 /* The following are used in the arm.md file as equivalents to bits
701 in the above two flag variables. */
703 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
706 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
709 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
721 /* Nonzero if this chip supports the ARM 6K extensions. */
724 /* Nonzero if this chip supports the ARM 7 extensions. */
727 /* Nonzero if instructions not present in the 'M' profile can be used. */
728 int arm_arch_notm = 0;
730 /* Nonzero if instructions present in ARMv7E-M can be used. */
733 /* Nonzero if this chip can benefit from load scheduling. */
734 int arm_ld_sched = 0;
736 /* Nonzero if this chip is a StrongARM. */
737 int arm_tune_strongarm = 0;
739 /* Nonzero if this chip is a Cirrus variant. */
740 int arm_arch_cirrus = 0;
742 /* Nonzero if this chip supports Intel Wireless MMX technology. */
743 int arm_arch_iwmmxt = 0;
745 /* Nonzero if this chip is an XScale. */
746 int arm_arch_xscale = 0;
748 /* Nonzero if tuning for XScale */
749 int arm_tune_xscale = 0;
751 /* Nonzero if we want to tune for stores that access the write-buffer.
752 This typically means an ARM6 or ARM7 with MMU or MPU. */
753 int arm_tune_wbuf = 0;
755 /* Nonzero if tuning for Cortex-A9. */
756 int arm_tune_cortex_a9 = 0;
758 /* Nonzero if generating Thumb instructions. */
761 /* Nonzero if generating Thumb-1 instructions. */
764 /* Nonzero if we should define __THUMB_INTERWORK__ in the
766 XXX This is a bit of a hack, it's intended to help work around
767 problems in GLD which doesn't understand that armv5t code is
768 interworking clean. */
769 int arm_cpp_interwork = 0;
771 /* Nonzero if chip supports Thumb 2. */
774 /* Nonzero if chip supports integer division instruction. */
775 int arm_arch_arm_hwdiv;
776 int arm_arch_thumb_hwdiv;
778 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
779 we must report the mode of the memory reference from
780 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
781 enum machine_mode output_memory_reference_mode;
783 /* The register number to be used for the PIC offset register. */
784 unsigned arm_pic_register = INVALID_REGNUM;
786 /* Set to 1 after arm_reorg has started. Reset to start at the start of
787 the next function. */
788 static int after_arm_reorg = 0;
790 enum arm_pcs arm_pcs_default;
792 /* For an explanation of these variables, see final_prescan_insn below. */
794 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
795 enum arm_cond_code arm_current_cc;
798 int arm_target_label;
799 /* The number of conditionally executed insns, including the current insn. */
800 int arm_condexec_count = 0;
801 /* A bitmask specifying the patterns for the IT block.
802 Zero means do not output an IT block before this insn. */
803 int arm_condexec_mask = 0;
804 /* The number of bits used in arm_condexec_mask. */
805 int arm_condexec_masklen = 0;
807 /* The condition codes of the ARM, and the inverse function. */
808 static const char * const arm_condition_codes[] =
810 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
811 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
814 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
815 int arm_regs_in_sequence[] =
817 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
820 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
821 #define streq(string1, string2) (strcmp (string1, string2) == 0)
823 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
824 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
825 | (1 << PIC_OFFSET_TABLE_REGNUM)))
827 /* Initialization code. */
831 const char *const name;
832 enum processor_type core;
834 const unsigned long flags;
835 const struct tune_params *const tune;
839 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
840 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
845 const struct tune_params arm_slowmul_tune =
847 arm_slowmul_rtx_costs,
849 3, /* Constant limit. */
850 5, /* Max cond insns. */
851 ARM_PREFETCH_NOT_BENEFICIAL,
852 true, /* Prefer constant pool. */
853 arm_default_branch_cost
856 const struct tune_params arm_fastmul_tune =
858 arm_fastmul_rtx_costs,
860 1, /* Constant limit. */
861 5, /* Max cond insns. */
862 ARM_PREFETCH_NOT_BENEFICIAL,
863 true, /* Prefer constant pool. */
864 arm_default_branch_cost
867 /* StrongARM has early execution of branches, so a sequence that is worth
868 skipping is shorter. Set max_insns_skipped to a lower value. */
870 const struct tune_params arm_strongarm_tune =
872 arm_fastmul_rtx_costs,
874 1, /* Constant limit. */
875 3, /* Max cond insns. */
876 ARM_PREFETCH_NOT_BENEFICIAL,
877 true, /* Prefer constant pool. */
878 arm_default_branch_cost
881 const struct tune_params arm_xscale_tune =
883 arm_xscale_rtx_costs,
884 xscale_sched_adjust_cost,
885 2, /* Constant limit. */
886 3, /* Max cond insns. */
887 ARM_PREFETCH_NOT_BENEFICIAL,
888 true, /* Prefer constant pool. */
889 arm_default_branch_cost
892 const struct tune_params arm_9e_tune =
896 1, /* Constant limit. */
897 5, /* Max cond insns. */
898 ARM_PREFETCH_NOT_BENEFICIAL,
899 true, /* Prefer constant pool. */
900 arm_default_branch_cost
903 const struct tune_params arm_v6t2_tune =
907 1, /* Constant limit. */
908 5, /* Max cond insns. */
909 ARM_PREFETCH_NOT_BENEFICIAL,
910 false, /* Prefer constant pool. */
911 arm_default_branch_cost
914 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
915 const struct tune_params arm_cortex_tune =
919 1, /* Constant limit. */
920 5, /* Max cond insns. */
921 ARM_PREFETCH_NOT_BENEFICIAL,
922 false, /* Prefer constant pool. */
923 arm_default_branch_cost
926 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
927 less appealing. Set max_insns_skipped to a low value. */
929 const struct tune_params arm_cortex_a5_tune =
933 1, /* Constant limit. */
934 1, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 false, /* Prefer constant pool. */
937 arm_cortex_a5_branch_cost
940 const struct tune_params arm_cortex_a9_tune =
943 cortex_a9_sched_adjust_cost,
944 1, /* Constant limit. */
945 5, /* Max cond insns. */
946 ARM_PREFETCH_BENEFICIAL(4,32,32),
947 false, /* Prefer constant pool. */
948 arm_default_branch_cost
951 const struct tune_params arm_fa726te_tune =
954 fa726te_sched_adjust_cost,
955 1, /* Constant limit. */
956 5, /* Max cond insns. */
957 ARM_PREFETCH_NOT_BENEFICIAL,
958 true, /* Prefer constant pool. */
959 arm_default_branch_cost
963 /* Not all of these give usefully different compilation alternatives,
964 but there is no simple way of generalizing them. */
965 static const struct processors all_cores[] =
968 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
969 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
970 #include "arm-cores.def"
972 {NULL, arm_none, NULL, 0, NULL}
975 static const struct processors all_architectures[] =
977 /* ARM Architectures */
978 /* We don't specify tuning costs here as it will be figured out
981 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
982 {NAME, CORE, #ARCH, FLAGS, NULL},
983 #include "arm-arches.def"
985 {NULL, arm_none, NULL, 0 , NULL}
989 /* These are populated as commandline arguments are processed, or NULL
991 static const struct processors *arm_selected_arch;
992 static const struct processors *arm_selected_cpu;
993 static const struct processors *arm_selected_tune;
995 /* The name of the preprocessor macro to define for this architecture. */
997 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
999 /* Available values for -mfpu=. */
1001 static const struct arm_fpu_desc all_fpus[] =
1003 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1004 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1005 #include "arm-fpus.def"
1010 /* Supported TLS relocations. */
1018 TLS_DESCSEQ /* GNU scheme */
1021 /* The maximum number of insns to be used when loading a constant. */
1023 arm_constant_limit (bool size_p)
1025 return size_p ? 1 : current_tune->constant_limit;
1028 /* Emit an insn that's a simple single-set. Both the operands must be known
1031 emit_set_insn (rtx x, rtx y)
1033 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1036 /* Return the number of bits set in VALUE. */
1038 bit_count (unsigned long value)
1040 unsigned long count = 0;
1045 value &= value - 1; /* Clear the least-significant set bit. */
1053 enum machine_mode mode;
1055 } arm_fixed_mode_set;
1057 /* A small helper for setting fixed-point library libfuncs. */
1060 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1061 const char *funcname, const char *modename,
1066 if (num_suffix == 0)
1067 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1069 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1071 set_optab_libfunc (optable, mode, buffer);
1075 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1076 enum machine_mode from, const char *funcname,
1077 const char *toname, const char *fromname)
1080 const char *maybe_suffix_2 = "";
1082 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1083 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1084 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1085 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1086 maybe_suffix_2 = "2";
1088 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1091 set_conv_libfunc (optable, to, from, buffer);
1094 /* Set up library functions unique to ARM. */
1097 arm_init_libfuncs (void)
1099 /* There are no special library functions unless we are using the
1104 /* The functions below are described in Section 4 of the "Run-Time
1105 ABI for the ARM architecture", Version 1.0. */
1107 /* Double-precision floating-point arithmetic. Table 2. */
1108 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1109 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1110 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1111 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1112 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1114 /* Double-precision comparisons. Table 3. */
1115 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1116 set_optab_libfunc (ne_optab, DFmode, NULL);
1117 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1118 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1119 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1120 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1121 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1123 /* Single-precision floating-point arithmetic. Table 4. */
1124 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1125 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1126 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1127 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1128 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1130 /* Single-precision comparisons. Table 5. */
1131 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1132 set_optab_libfunc (ne_optab, SFmode, NULL);
1133 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1134 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1135 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1136 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1137 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1139 /* Floating-point to integer conversions. Table 6. */
1140 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1141 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1142 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1143 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1144 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1145 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1146 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1147 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1149 /* Conversions between floating types. Table 7. */
1150 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1151 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1153 /* Integer to floating-point conversions. Table 8. */
1154 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1155 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1156 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1157 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1158 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1159 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1160 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1161 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1163 /* Long long. Table 9. */
1164 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1165 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1166 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1167 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1168 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1169 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1170 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1171 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1173 /* Integer (32/32->32) division. \S 4.3.1. */
1174 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1175 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1177 /* The divmod functions are designed so that they can be used for
1178 plain division, even though they return both the quotient and the
1179 remainder. The quotient is returned in the usual location (i.e.,
1180 r0 for SImode, {r0, r1} for DImode), just as would be expected
1181 for an ordinary division routine. Because the AAPCS calling
1182 conventions specify that all of { r0, r1, r2, r3 } are
1183 callee-saved registers, there is no need to tell the compiler
1184 explicitly that those registers are clobbered by these
1186 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1187 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1189 /* For SImode division the ABI provides div-without-mod routines,
1190 which are faster. */
1191 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1192 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1194 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1195 divmod libcalls instead. */
1196 set_optab_libfunc (smod_optab, DImode, NULL);
1197 set_optab_libfunc (umod_optab, DImode, NULL);
1198 set_optab_libfunc (smod_optab, SImode, NULL);
1199 set_optab_libfunc (umod_optab, SImode, NULL);
1201 /* Half-precision float operations. The compiler handles all operations
1202 with NULL libfuncs by converting the SFmode. */
1203 switch (arm_fp16_format)
1205 case ARM_FP16_FORMAT_IEEE:
1206 case ARM_FP16_FORMAT_ALTERNATIVE:
1209 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1210 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1212 : "__gnu_f2h_alternative"));
1213 set_conv_libfunc (sext_optab, SFmode, HFmode,
1214 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1216 : "__gnu_h2f_alternative"));
1219 set_optab_libfunc (add_optab, HFmode, NULL);
1220 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1221 set_optab_libfunc (smul_optab, HFmode, NULL);
1222 set_optab_libfunc (neg_optab, HFmode, NULL);
1223 set_optab_libfunc (sub_optab, HFmode, NULL);
1226 set_optab_libfunc (eq_optab, HFmode, NULL);
1227 set_optab_libfunc (ne_optab, HFmode, NULL);
1228 set_optab_libfunc (lt_optab, HFmode, NULL);
1229 set_optab_libfunc (le_optab, HFmode, NULL);
1230 set_optab_libfunc (ge_optab, HFmode, NULL);
1231 set_optab_libfunc (gt_optab, HFmode, NULL);
1232 set_optab_libfunc (unord_optab, HFmode, NULL);
1239 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1241 const arm_fixed_mode_set fixed_arith_modes[] =
1262 const arm_fixed_mode_set fixed_conv_modes[] =
1292 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1294 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1295 "add", fixed_arith_modes[i].name, 3);
1296 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1297 "ssadd", fixed_arith_modes[i].name, 3);
1298 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1299 "usadd", fixed_arith_modes[i].name, 3);
1300 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1301 "sub", fixed_arith_modes[i].name, 3);
1302 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1303 "sssub", fixed_arith_modes[i].name, 3);
1304 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1305 "ussub", fixed_arith_modes[i].name, 3);
1306 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1307 "mul", fixed_arith_modes[i].name, 3);
1308 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1309 "ssmul", fixed_arith_modes[i].name, 3);
1310 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1311 "usmul", fixed_arith_modes[i].name, 3);
1312 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1313 "div", fixed_arith_modes[i].name, 3);
1314 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1315 "udiv", fixed_arith_modes[i].name, 3);
1316 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1317 "ssdiv", fixed_arith_modes[i].name, 3);
1318 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1319 "usdiv", fixed_arith_modes[i].name, 3);
1320 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1321 "neg", fixed_arith_modes[i].name, 2);
1322 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1323 "ssneg", fixed_arith_modes[i].name, 2);
1324 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1325 "usneg", fixed_arith_modes[i].name, 2);
1326 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1327 "ashl", fixed_arith_modes[i].name, 3);
1328 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1329 "ashr", fixed_arith_modes[i].name, 3);
1330 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1331 "lshr", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1333 "ssashl", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1335 "usashl", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1337 "cmp", fixed_arith_modes[i].name, 2);
1340 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1341 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1344 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1345 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1348 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1349 fixed_conv_modes[j].mode, "fract",
1350 fixed_conv_modes[i].name,
1351 fixed_conv_modes[j].name);
1352 arm_set_fixed_conv_libfunc (satfract_optab,
1353 fixed_conv_modes[i].mode,
1354 fixed_conv_modes[j].mode, "satfract",
1355 fixed_conv_modes[i].name,
1356 fixed_conv_modes[j].name);
1357 arm_set_fixed_conv_libfunc (fractuns_optab,
1358 fixed_conv_modes[i].mode,
1359 fixed_conv_modes[j].mode, "fractuns",
1360 fixed_conv_modes[i].name,
1361 fixed_conv_modes[j].name);
1362 arm_set_fixed_conv_libfunc (satfractuns_optab,
1363 fixed_conv_modes[i].mode,
1364 fixed_conv_modes[j].mode, "satfractuns",
1365 fixed_conv_modes[i].name,
1366 fixed_conv_modes[j].name);
1370 if (TARGET_AAPCS_BASED)
1371 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1374 /* On AAPCS systems, this is the "struct __va_list". */
1375 static GTY(()) tree va_list_type;
1377 /* Return the type to use as __builtin_va_list. */
1379 arm_build_builtin_va_list (void)
1384 if (!TARGET_AAPCS_BASED)
1385 return std_build_builtin_va_list ();
1387 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1395 The C Library ABI further reinforces this definition in \S
1398 We must follow this definition exactly. The structure tag
1399 name is visible in C++ mangled names, and thus forms a part
1400 of the ABI. The field name may be used by people who
1401 #include <stdarg.h>. */
1402 /* Create the type. */
1403 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1404 /* Give it the required name. */
1405 va_list_name = build_decl (BUILTINS_LOCATION,
1407 get_identifier ("__va_list"),
1409 DECL_ARTIFICIAL (va_list_name) = 1;
1410 TYPE_NAME (va_list_type) = va_list_name;
1411 TYPE_STUB_DECL (va_list_type) = va_list_name;
1412 /* Create the __ap field. */
1413 ap_field = build_decl (BUILTINS_LOCATION,
1415 get_identifier ("__ap"),
1417 DECL_ARTIFICIAL (ap_field) = 1;
1418 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1419 TYPE_FIELDS (va_list_type) = ap_field;
1420 /* Compute its layout. */
1421 layout_type (va_list_type);
1423 return va_list_type;
1426 /* Return an expression of type "void *" pointing to the next
1427 available argument in a variable-argument list. VALIST is the
1428 user-level va_list object, of type __builtin_va_list. */
1430 arm_extract_valist_ptr (tree valist)
1432 if (TREE_TYPE (valist) == error_mark_node)
1433 return error_mark_node;
1435 /* On an AAPCS target, the pointer is stored within "struct
1437 if (TARGET_AAPCS_BASED)
1439 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1440 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1441 valist, ap_field, NULL_TREE);
1447 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1449 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1451 valist = arm_extract_valist_ptr (valist);
1452 std_expand_builtin_va_start (valist, nextarg);
1455 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1457 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1460 valist = arm_extract_valist_ptr (valist);
1461 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1464 /* Fix up any incompatible options that the user has specified. */
1466 arm_option_override (void)
1468 if (global_options_set.x_arm_arch_option)
1469 arm_selected_arch = &all_architectures[arm_arch_option];
1471 if (global_options_set.x_arm_cpu_option)
1472 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1474 if (global_options_set.x_arm_tune_option)
1475 arm_selected_tune = &all_cores[(int) arm_tune_option];
1477 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1478 SUBTARGET_OVERRIDE_OPTIONS;
1481 if (arm_selected_arch)
1483 if (arm_selected_cpu)
1485 /* Check for conflict between mcpu and march. */
1486 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1488 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1489 arm_selected_cpu->name, arm_selected_arch->name);
1490 /* -march wins for code generation.
1491 -mcpu wins for default tuning. */
1492 if (!arm_selected_tune)
1493 arm_selected_tune = arm_selected_cpu;
1495 arm_selected_cpu = arm_selected_arch;
1499 arm_selected_arch = NULL;
1502 /* Pick a CPU based on the architecture. */
1503 arm_selected_cpu = arm_selected_arch;
1506 /* If the user did not specify a processor, choose one for them. */
1507 if (!arm_selected_cpu)
1509 const struct processors * sel;
1510 unsigned int sought;
1512 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1513 if (!arm_selected_cpu->name)
1515 #ifdef SUBTARGET_CPU_DEFAULT
1516 /* Use the subtarget default CPU if none was specified by
1518 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1520 /* Default to ARM6. */
1521 if (!arm_selected_cpu->name)
1522 arm_selected_cpu = &all_cores[arm6];
1525 sel = arm_selected_cpu;
1526 insn_flags = sel->flags;
1528 /* Now check to see if the user has specified some command line
1529 switch that require certain abilities from the cpu. */
1532 if (TARGET_INTERWORK || TARGET_THUMB)
1534 sought |= (FL_THUMB | FL_MODE32);
1536 /* There are no ARM processors that support both APCS-26 and
1537 interworking. Therefore we force FL_MODE26 to be removed
1538 from insn_flags here (if it was set), so that the search
1539 below will always be able to find a compatible processor. */
1540 insn_flags &= ~FL_MODE26;
1543 if (sought != 0 && ((sought & insn_flags) != sought))
1545 /* Try to locate a CPU type that supports all of the abilities
1546 of the default CPU, plus the extra abilities requested by
1548 for (sel = all_cores; sel->name != NULL; sel++)
1549 if ((sel->flags & sought) == (sought | insn_flags))
1552 if (sel->name == NULL)
1554 unsigned current_bit_count = 0;
1555 const struct processors * best_fit = NULL;
1557 /* Ideally we would like to issue an error message here
1558 saying that it was not possible to find a CPU compatible
1559 with the default CPU, but which also supports the command
1560 line options specified by the programmer, and so they
1561 ought to use the -mcpu=<name> command line option to
1562 override the default CPU type.
1564 If we cannot find a cpu that has both the
1565 characteristics of the default cpu and the given
1566 command line options we scan the array again looking
1567 for a best match. */
1568 for (sel = all_cores; sel->name != NULL; sel++)
1569 if ((sel->flags & sought) == sought)
1573 count = bit_count (sel->flags & insn_flags);
1575 if (count >= current_bit_count)
1578 current_bit_count = count;
1582 gcc_assert (best_fit);
1586 arm_selected_cpu = sel;
1590 gcc_assert (arm_selected_cpu);
1591 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1592 if (!arm_selected_tune)
1593 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1595 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1596 insn_flags = arm_selected_cpu->flags;
1598 arm_tune = arm_selected_tune->core;
1599 tune_flags = arm_selected_tune->flags;
1600 current_tune = arm_selected_tune->tune;
1602 /* Make sure that the processor choice does not conflict with any of the
1603 other command line choices. */
1604 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1605 error ("target CPU does not support ARM mode");
1607 /* BPABI targets use linker tricks to allow interworking on cores
1608 without thumb support. */
1609 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1611 warning (0, "target CPU does not support interworking" );
1612 target_flags &= ~MASK_INTERWORK;
1615 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1617 warning (0, "target CPU does not support THUMB instructions");
1618 target_flags &= ~MASK_THUMB;
1621 if (TARGET_APCS_FRAME && TARGET_THUMB)
1623 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1624 target_flags &= ~MASK_APCS_FRAME;
1627 /* Callee super interworking implies thumb interworking. Adding
1628 this to the flags here simplifies the logic elsewhere. */
1629 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1630 target_flags |= MASK_INTERWORK;
1632 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1633 from here where no function is being compiled currently. */
1634 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1635 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1637 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1638 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1640 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1642 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1643 target_flags |= MASK_APCS_FRAME;
1646 if (TARGET_POKE_FUNCTION_NAME)
1647 target_flags |= MASK_APCS_FRAME;
1649 if (TARGET_APCS_REENT && flag_pic)
1650 error ("-fpic and -mapcs-reent are incompatible");
1652 if (TARGET_APCS_REENT)
1653 warning (0, "APCS reentrant code not supported. Ignored");
1655 /* If this target is normally configured to use APCS frames, warn if they
1656 are turned off and debugging is turned on. */
1658 && write_symbols != NO_DEBUG
1659 && !TARGET_APCS_FRAME
1660 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1661 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1663 if (TARGET_APCS_FLOAT)
1664 warning (0, "passing floating point arguments in fp regs not yet supported");
1666 if (TARGET_LITTLE_WORDS)
1667 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1668 "will be removed in a future release");
1670 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1671 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1672 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1673 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1674 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1675 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1676 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1677 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1678 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1679 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1680 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1681 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1682 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1683 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1685 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1686 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1687 thumb_code = TARGET_ARM == 0;
1688 thumb1_code = TARGET_THUMB1 != 0;
1689 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1690 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1691 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1692 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1693 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1694 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1696 /* If we are not using the default (ARM mode) section anchor offset
1697 ranges, then set the correct ranges now. */
1700 /* Thumb-1 LDR instructions cannot have negative offsets.
1701 Permissible positive offset ranges are 5-bit (for byte loads),
1702 6-bit (for halfword loads), or 7-bit (for word loads).
1703 Empirical results suggest a 7-bit anchor range gives the best
1704 overall code size. */
1705 targetm.min_anchor_offset = 0;
1706 targetm.max_anchor_offset = 127;
1708 else if (TARGET_THUMB2)
1710 /* The minimum is set such that the total size of the block
1711 for a particular anchor is 248 + 1 + 4095 bytes, which is
1712 divisible by eight, ensuring natural spacing of anchors. */
1713 targetm.min_anchor_offset = -248;
1714 targetm.max_anchor_offset = 4095;
1717 /* V5 code we generate is completely interworking capable, so we turn off
1718 TARGET_INTERWORK here to avoid many tests later on. */
1720 /* XXX However, we must pass the right pre-processor defines to CPP
1721 or GLD can get confused. This is a hack. */
1722 if (TARGET_INTERWORK)
1723 arm_cpp_interwork = 1;
1726 target_flags &= ~MASK_INTERWORK;
1728 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1729 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1731 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1732 error ("iwmmxt abi requires an iwmmxt capable cpu");
1734 if (!global_options_set.x_arm_fpu_index)
1736 const char *target_fpu_name;
1739 #ifdef FPUTYPE_DEFAULT
1740 target_fpu_name = FPUTYPE_DEFAULT;
1742 if (arm_arch_cirrus)
1743 target_fpu_name = "maverick";
1745 target_fpu_name = "fpe2";
1748 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1753 arm_fpu_desc = &all_fpus[arm_fpu_index];
1755 switch (arm_fpu_desc->model)
1757 case ARM_FP_MODEL_FPA:
1758 if (arm_fpu_desc->rev == 2)
1759 arm_fpu_attr = FPU_FPE2;
1760 else if (arm_fpu_desc->rev == 3)
1761 arm_fpu_attr = FPU_FPE3;
1763 arm_fpu_attr = FPU_FPA;
1766 case ARM_FP_MODEL_MAVERICK:
1767 arm_fpu_attr = FPU_MAVERICK;
1770 case ARM_FP_MODEL_VFP:
1771 arm_fpu_attr = FPU_VFP;
1778 if (TARGET_AAPCS_BASED
1779 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1780 error ("FPA is unsupported in the AAPCS");
1782 if (TARGET_AAPCS_BASED)
1784 if (TARGET_CALLER_INTERWORKING)
1785 error ("AAPCS does not support -mcaller-super-interworking");
1787 if (TARGET_CALLEE_INTERWORKING)
1788 error ("AAPCS does not support -mcallee-super-interworking");
1791 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1792 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1793 will ever exist. GCC makes no attempt to support this combination. */
1794 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1795 sorry ("iWMMXt and hardware floating point");
1797 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1798 if (TARGET_THUMB2 && TARGET_IWMMXT)
1799 sorry ("Thumb-2 iWMMXt");
1801 /* __fp16 support currently assumes the core has ldrh. */
1802 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1803 sorry ("__fp16 and no ldrh");
1805 /* If soft-float is specified then don't use FPU. */
1806 if (TARGET_SOFT_FLOAT)
1807 arm_fpu_attr = FPU_NONE;
1809 if (TARGET_AAPCS_BASED)
1811 if (arm_abi == ARM_ABI_IWMMXT)
1812 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1813 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1814 && TARGET_HARD_FLOAT
1816 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1818 arm_pcs_default = ARM_PCS_AAPCS;
1822 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1823 sorry ("-mfloat-abi=hard and VFP");
1825 if (arm_abi == ARM_ABI_APCS)
1826 arm_pcs_default = ARM_PCS_APCS;
1828 arm_pcs_default = ARM_PCS_ATPCS;
1831 /* For arm2/3 there is no need to do any scheduling if there is only
1832 a floating point emulator, or we are doing software floating-point. */
1833 if ((TARGET_SOFT_FLOAT
1834 || (TARGET_FPA && arm_fpu_desc->rev))
1835 && (tune_flags & FL_MODE32) == 0)
1836 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1838 /* Use the cp15 method if it is available. */
1839 if (target_thread_pointer == TP_AUTO)
1841 if (arm_arch6k && !TARGET_THUMB1)
1842 target_thread_pointer = TP_CP15;
1844 target_thread_pointer = TP_SOFT;
1847 if (TARGET_HARD_TP && TARGET_THUMB1)
1848 error ("can not use -mtp=cp15 with 16-bit Thumb");
1850 /* Override the default structure alignment for AAPCS ABI. */
1851 if (!global_options_set.x_arm_structure_size_boundary)
1853 if (TARGET_AAPCS_BASED)
1854 arm_structure_size_boundary = 8;
1858 if (arm_structure_size_boundary != 8
1859 && arm_structure_size_boundary != 32
1860 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1862 if (ARM_DOUBLEWORD_ALIGN)
1864 "structure size boundary can only be set to 8, 32 or 64");
1866 warning (0, "structure size boundary can only be set to 8 or 32");
1867 arm_structure_size_boundary
1868 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1872 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1874 error ("RTP PIC is incompatible with Thumb");
1878 /* If stack checking is disabled, we can use r10 as the PIC register,
1879 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1880 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1882 if (TARGET_VXWORKS_RTP)
1883 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1884 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1887 if (flag_pic && TARGET_VXWORKS_RTP)
1888 arm_pic_register = 9;
1890 if (arm_pic_register_string != NULL)
1892 int pic_register = decode_reg_name (arm_pic_register_string);
1895 warning (0, "-mpic-register= is useless without -fpic");
1897 /* Prevent the user from choosing an obviously stupid PIC register. */
1898 else if (pic_register < 0 || call_used_regs[pic_register]
1899 || pic_register == HARD_FRAME_POINTER_REGNUM
1900 || pic_register == STACK_POINTER_REGNUM
1901 || pic_register >= PC_REGNUM
1902 || (TARGET_VXWORKS_RTP
1903 && (unsigned int) pic_register != arm_pic_register))
1904 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1906 arm_pic_register = pic_register;
1909 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1910 if (fix_cm3_ldrd == 2)
1912 if (arm_selected_cpu->core == cortexm3)
1918 /* Enable -munaligned-access by default for
1919 - all ARMv6 architecture-based processors
1920 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1922 Disable -munaligned-access by default for
1923 - all pre-ARMv6 architecture-based processors
1924 - ARMv6-M architecture-based processors. */
1926 if (unaligned_access == 2)
1928 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1929 unaligned_access = 1;
1931 unaligned_access = 0;
1933 else if (unaligned_access == 1
1934 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1936 warning (0, "target CPU does not support unaligned accesses");
1937 unaligned_access = 0;
1940 if (TARGET_THUMB1 && flag_schedule_insns)
1942 /* Don't warn since it's on by default in -O2. */
1943 flag_schedule_insns = 0;
1948 /* If optimizing for size, bump the number of instructions that we
1949 are prepared to conditionally execute (even on a StrongARM). */
1950 max_insns_skipped = 6;
1953 max_insns_skipped = current_tune->max_insns_skipped;
1955 /* Hot/Cold partitioning is not currently supported, since we can't
1956 handle literal pool placement in that case. */
1957 if (flag_reorder_blocks_and_partition)
1959 inform (input_location,
1960 "-freorder-blocks-and-partition not supported on this architecture");
1961 flag_reorder_blocks_and_partition = 0;
1962 flag_reorder_blocks = 1;
1966 /* Hoisting PIC address calculations more aggressively provides a small,
1967 but measurable, size reduction for PIC code. Therefore, we decrease
1968 the bar for unrestricted expression hoisting to the cost of PIC address
1969 calculation, which is 2 instructions. */
1970 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1971 global_options.x_param_values,
1972 global_options_set.x_param_values);
1974 /* ARM EABI defaults to strict volatile bitfields. */
1975 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1976 flag_strict_volatile_bitfields = 1;
1978 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1979 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1980 if (flag_prefetch_loop_arrays < 0
1983 && current_tune->num_prefetch_slots > 0)
1984 flag_prefetch_loop_arrays = 1;
1986 /* Set up parameters to be used in prefetching algorithm. Do not override the
1987 defaults unless we are tuning for a core we have researched values for. */
1988 if (current_tune->num_prefetch_slots > 0)
1989 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1990 current_tune->num_prefetch_slots,
1991 global_options.x_param_values,
1992 global_options_set.x_param_values);
1993 if (current_tune->l1_cache_line_size >= 0)
1994 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1995 current_tune->l1_cache_line_size,
1996 global_options.x_param_values,
1997 global_options_set.x_param_values);
1998 if (current_tune->l1_cache_size >= 0)
1999 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2000 current_tune->l1_cache_size,
2001 global_options.x_param_values,
2002 global_options_set.x_param_values);
2004 /* Register global variables with the garbage collector. */
2005 arm_add_gc_roots ();
2009 arm_add_gc_roots (void)
2011 gcc_obstack_init(&minipool_obstack);
2012 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2015 /* A table of known ARM exception types.
2016 For use with the interrupt function attribute. */
2020 const char *const arg;
2021 const unsigned long return_value;
2025 static const isr_attribute_arg isr_attribute_args [] =
2027 { "IRQ", ARM_FT_ISR },
2028 { "irq", ARM_FT_ISR },
2029 { "FIQ", ARM_FT_FIQ },
2030 { "fiq", ARM_FT_FIQ },
2031 { "ABORT", ARM_FT_ISR },
2032 { "abort", ARM_FT_ISR },
2033 { "ABORT", ARM_FT_ISR },
2034 { "abort", ARM_FT_ISR },
2035 { "UNDEF", ARM_FT_EXCEPTION },
2036 { "undef", ARM_FT_EXCEPTION },
2037 { "SWI", ARM_FT_EXCEPTION },
2038 { "swi", ARM_FT_EXCEPTION },
2039 { NULL, ARM_FT_NORMAL }
2042 /* Returns the (interrupt) function type of the current
2043 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2045 static unsigned long
2046 arm_isr_value (tree argument)
2048 const isr_attribute_arg * ptr;
2052 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2054 /* No argument - default to IRQ. */
2055 if (argument == NULL_TREE)
2058 /* Get the value of the argument. */
2059 if (TREE_VALUE (argument) == NULL_TREE
2060 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2061 return ARM_FT_UNKNOWN;
2063 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2065 /* Check it against the list of known arguments. */
2066 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2067 if (streq (arg, ptr->arg))
2068 return ptr->return_value;
2070 /* An unrecognized interrupt type. */
2071 return ARM_FT_UNKNOWN;
2074 /* Computes the type of the current function. */
2076 static unsigned long
2077 arm_compute_func_type (void)
2079 unsigned long type = ARM_FT_UNKNOWN;
2083 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2085 /* Decide if the current function is volatile. Such functions
2086 never return, and many memory cycles can be saved by not storing
2087 register values that will never be needed again. This optimization
2088 was added to speed up context switching in a kernel application. */
2090 && (TREE_NOTHROW (current_function_decl)
2091 || !(flag_unwind_tables
2093 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2094 && TREE_THIS_VOLATILE (current_function_decl))
2095 type |= ARM_FT_VOLATILE;
2097 if (cfun->static_chain_decl != NULL)
2098 type |= ARM_FT_NESTED;
2100 attr = DECL_ATTRIBUTES (current_function_decl);
2102 a = lookup_attribute ("naked", attr);
2104 type |= ARM_FT_NAKED;
2106 a = lookup_attribute ("isr", attr);
2108 a = lookup_attribute ("interrupt", attr);
2111 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2113 type |= arm_isr_value (TREE_VALUE (a));
2118 /* Returns the type of the current function. */
2121 arm_current_func_type (void)
2123 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2124 cfun->machine->func_type = arm_compute_func_type ();
2126 return cfun->machine->func_type;
2130 arm_allocate_stack_slots_for_args (void)
2132 /* Naked functions should not allocate stack slots for arguments. */
2133 return !IS_NAKED (arm_current_func_type ());
2137 /* Output assembler code for a block containing the constant parts
2138 of a trampoline, leaving space for the variable parts.
2140 On the ARM, (if r8 is the static chain regnum, and remembering that
2141 referencing pc adds an offset of 8) the trampoline looks like:
2144 .word static chain value
2145 .word function's address
2146 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2149 arm_asm_trampoline_template (FILE *f)
2153 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2154 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2156 else if (TARGET_THUMB2)
2158 /* The Thumb-2 trampoline is similar to the arm implementation.
2159 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2160 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2161 STATIC_CHAIN_REGNUM, PC_REGNUM);
2162 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2166 ASM_OUTPUT_ALIGN (f, 2);
2167 fprintf (f, "\t.code\t16\n");
2168 fprintf (f, ".Ltrampoline_start:\n");
2169 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2170 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2171 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2172 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2173 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2174 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2176 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2177 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2180 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2183 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2185 rtx fnaddr, mem, a_tramp;
2187 emit_block_move (m_tramp, assemble_trampoline_template (),
2188 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2190 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2191 emit_move_insn (mem, chain_value);
2193 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2194 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2195 emit_move_insn (mem, fnaddr);
2197 a_tramp = XEXP (m_tramp, 0);
2198 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2199 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2200 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2203 /* Thumb trampolines should be entered in thumb mode, so set
2204 the bottom bit of the address. */
2207 arm_trampoline_adjust_address (rtx addr)
2210 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2211 NULL, 0, OPTAB_LIB_WIDEN);
2215 /* Return 1 if it is possible to return using a single instruction.
2216 If SIBLING is non-null, this is a test for a return before a sibling
2217 call. SIBLING is the call insn, so we can examine its register usage. */
2220 use_return_insn (int iscond, rtx sibling)
2223 unsigned int func_type;
2224 unsigned long saved_int_regs;
2225 unsigned HOST_WIDE_INT stack_adjust;
2226 arm_stack_offsets *offsets;
2228 /* Never use a return instruction before reload has run. */
2229 if (!reload_completed)
2232 func_type = arm_current_func_type ();
2234 /* Naked, volatile and stack alignment functions need special
2236 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2239 /* So do interrupt functions that use the frame pointer and Thumb
2240 interrupt functions. */
2241 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2244 offsets = arm_get_frame_offsets ();
2245 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2247 /* As do variadic functions. */
2248 if (crtl->args.pretend_args_size
2249 || cfun->machine->uses_anonymous_args
2250 /* Or if the function calls __builtin_eh_return () */
2251 || crtl->calls_eh_return
2252 /* Or if the function calls alloca */
2253 || cfun->calls_alloca
2254 /* Or if there is a stack adjustment. However, if the stack pointer
2255 is saved on the stack, we can use a pre-incrementing stack load. */
2256 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2257 && stack_adjust == 4)))
2260 saved_int_regs = offsets->saved_regs_mask;
2262 /* Unfortunately, the insn
2264 ldmib sp, {..., sp, ...}
2266 triggers a bug on most SA-110 based devices, such that the stack
2267 pointer won't be correctly restored if the instruction takes a
2268 page fault. We work around this problem by popping r3 along with
2269 the other registers, since that is never slower than executing
2270 another instruction.
2272 We test for !arm_arch5 here, because code for any architecture
2273 less than this could potentially be run on one of the buggy
2275 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2277 /* Validate that r3 is a call-clobbered register (always true in
2278 the default abi) ... */
2279 if (!call_used_regs[3])
2282 /* ... that it isn't being used for a return value ... */
2283 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2286 /* ... or for a tail-call argument ... */
2289 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2291 if (find_regno_fusage (sibling, USE, 3))
2295 /* ... and that there are no call-saved registers in r0-r2
2296 (always true in the default ABI). */
2297 if (saved_int_regs & 0x7)
2301 /* Can't be done if interworking with Thumb, and any registers have been
2303 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2306 /* On StrongARM, conditional returns are expensive if they aren't
2307 taken and multiple registers have been stacked. */
2308 if (iscond && arm_tune_strongarm)
2310 /* Conditional return when just the LR is stored is a simple
2311 conditional-load instruction, that's not expensive. */
2312 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2316 && arm_pic_register != INVALID_REGNUM
2317 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2321 /* If there are saved registers but the LR isn't saved, then we need
2322 two instructions for the return. */
2323 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2326 /* Can't be done if any of the FPA regs are pushed,
2327 since this also requires an insn. */
2328 if (TARGET_HARD_FLOAT && TARGET_FPA)
2329 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2330 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2333 /* Likewise VFP regs. */
2334 if (TARGET_HARD_FLOAT && TARGET_VFP)
2335 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2336 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2339 if (TARGET_REALLY_IWMMXT)
2340 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2341 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2347 /* Return TRUE if int I is a valid immediate ARM constant. */
2350 const_ok_for_arm (HOST_WIDE_INT i)
2354 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2355 be all zero, or all one. */
2356 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2357 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2358 != ((~(unsigned HOST_WIDE_INT) 0)
2359 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2362 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2364 /* Fast return for 0 and small values. We must do this for zero, since
2365 the code below can't handle that one case. */
2366 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2369 /* Get the number of trailing zeros. */
2370 lowbit = ffs((int) i) - 1;
2372 /* Only even shifts are allowed in ARM mode so round down to the
2373 nearest even number. */
2377 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2382 /* Allow rotated constants in ARM mode. */
2384 && ((i & ~0xc000003f) == 0
2385 || (i & ~0xf000000f) == 0
2386 || (i & ~0xfc000003) == 0))
2393 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2396 if (i == v || i == (v | (v << 8)))
2399 /* Allow repeated pattern 0xXY00XY00. */
2409 /* Return true if I is a valid constant for the operation CODE. */
2411 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2413 if (const_ok_for_arm (i))
2419 /* See if we can use movw. */
2420 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2423 /* Otherwise, try mvn. */
2424 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2427 /* See if we can use addw or subw. */
2429 && ((i & 0xfffff000) == 0
2430 || ((-i) & 0xfffff000) == 0))
2432 /* else fall through. */
2452 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2454 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2460 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2464 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2471 /* Emit a sequence of insns to handle a large constant.
2472 CODE is the code of the operation required, it can be any of SET, PLUS,
2473 IOR, AND, XOR, MINUS;
2474 MODE is the mode in which the operation is being performed;
2475 VAL is the integer to operate on;
2476 SOURCE is the other operand (a register, or a null-pointer for SET);
2477 SUBTARGETS means it is safe to create scratch registers if that will
2478 either produce a simpler sequence, or we will want to cse the values.
2479 Return value is the number of insns emitted. */
2481 /* ??? Tweak this for thumb2. */
2483 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2484 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2488 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2489 cond = COND_EXEC_TEST (PATTERN (insn));
2493 if (subtargets || code == SET
2494 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2495 && REGNO (target) != REGNO (source)))
2497 /* After arm_reorg has been called, we can't fix up expensive
2498 constants by pushing them into memory so we must synthesize
2499 them in-line, regardless of the cost. This is only likely to
2500 be more costly on chips that have load delay slots and we are
2501 compiling without running the scheduler (so no splitting
2502 occurred before the final instruction emission).
2504 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2506 if (!after_arm_reorg
2508 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2510 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2515 /* Currently SET is the only monadic value for CODE, all
2516 the rest are diadic. */
2517 if (TARGET_USE_MOVT)
2518 arm_emit_movpair (target, GEN_INT (val));
2520 emit_set_insn (target, GEN_INT (val));
2526 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2528 if (TARGET_USE_MOVT)
2529 arm_emit_movpair (temp, GEN_INT (val));
2531 emit_set_insn (temp, GEN_INT (val));
2533 /* For MINUS, the value is subtracted from, since we never
2534 have subtraction of a constant. */
2536 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2538 emit_set_insn (target,
2539 gen_rtx_fmt_ee (code, mode, source, temp));
2545 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2549 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2550 ARM/THUMB2 immediates, and add up to VAL.
2551 Thr function return value gives the number of insns required. */
2553 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2554 struct four_ints *return_sequence)
2556 int best_consecutive_zeros = 0;
2560 struct four_ints tmp_sequence;
2562 /* If we aren't targetting ARM, the best place to start is always at
2563 the bottom, otherwise look more closely. */
2566 for (i = 0; i < 32; i += 2)
2568 int consecutive_zeros = 0;
2570 if (!(val & (3 << i)))
2572 while ((i < 32) && !(val & (3 << i)))
2574 consecutive_zeros += 2;
2577 if (consecutive_zeros > best_consecutive_zeros)
2579 best_consecutive_zeros = consecutive_zeros;
2580 best_start = i - consecutive_zeros;
2587 /* So long as it won't require any more insns to do so, it's
2588 desirable to emit a small constant (in bits 0...9) in the last
2589 insn. This way there is more chance that it can be combined with
2590 a later addressing insn to form a pre-indexed load or store
2591 operation. Consider:
2593 *((volatile int *)0xe0000100) = 1;
2594 *((volatile int *)0xe0000110) = 2;
2596 We want this to wind up as:
2600 str rB, [rA, #0x100]
2602 str rB, [rA, #0x110]
2604 rather than having to synthesize both large constants from scratch.
2606 Therefore, we calculate how many insns would be required to emit
2607 the constant starting from `best_start', and also starting from
2608 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2609 yield a shorter sequence, we may as well use zero. */
2610 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2612 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2614 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2615 if (insns2 <= insns1)
2617 *return_sequence = tmp_sequence;
2625 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2627 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2628 struct four_ints *return_sequence, int i)
2630 int remainder = val & 0xffffffff;
2633 /* Try and find a way of doing the job in either two or three
2636 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2637 location. We start at position I. This may be the MSB, or
2638 optimial_immediate_sequence may have positioned it at the largest block
2639 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2640 wrapping around to the top of the word when we drop off the bottom.
2641 In the worst case this code should produce no more than four insns.
2643 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2644 constants, shifted to any arbitrary location. We should always start
2649 unsigned int b1, b2, b3, b4;
2650 unsigned HOST_WIDE_INT result;
2653 gcc_assert (insns < 4);
2658 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2659 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2662 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2663 /* We can use addw/subw for the last 12 bits. */
2667 /* Use an 8-bit shifted/rotated immediate. */
2671 result = remainder & ((0x0ff << end)
2672 | ((i < end) ? (0xff >> (32 - end))
2679 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2680 arbitrary shifts. */
2681 i -= TARGET_ARM ? 2 : 1;
2685 /* Next, see if we can do a better job with a thumb2 replicated
2688 We do it this way around to catch the cases like 0x01F001E0 where
2689 two 8-bit immediates would work, but a replicated constant would
2692 TODO: 16-bit constants that don't clear all the bits, but still win.
2693 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2696 b1 = (remainder & 0xff000000) >> 24;
2697 b2 = (remainder & 0x00ff0000) >> 16;
2698 b3 = (remainder & 0x0000ff00) >> 8;
2699 b4 = remainder & 0xff;
2703 /* The 8-bit immediate already found clears b1 (and maybe b2),
2704 but must leave b3 and b4 alone. */
2706 /* First try to find a 32-bit replicated constant that clears
2707 almost everything. We can assume that we can't do it in one,
2708 or else we wouldn't be here. */
2709 unsigned int tmp = b1 & b2 & b3 & b4;
2710 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2712 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2713 + (tmp == b3) + (tmp == b4);
2715 && (matching_bytes >= 3
2716 || (matching_bytes == 2
2717 && const_ok_for_op (remainder & ~tmp2, code))))
2719 /* At least 3 of the bytes match, and the fourth has at
2720 least as many bits set, or two of the bytes match
2721 and it will only require one more insn to finish. */
2729 /* Second, try to find a 16-bit replicated constant that can
2730 leave three of the bytes clear. If b2 or b4 is already
2731 zero, then we can. If the 8-bit from above would not
2732 clear b2 anyway, then we still win. */
2733 else if (b1 == b3 && (!b2 || !b4
2734 || (remainder & 0x00ff0000 & ~result)))
2736 result = remainder & 0xff00ff00;
2742 /* The 8-bit immediate already found clears b2 (and maybe b3)
2743 and we don't get here unless b1 is alredy clear, but it will
2744 leave b4 unchanged. */
2746 /* If we can clear b2 and b4 at once, then we win, since the
2747 8-bits couldn't possibly reach that far. */
2750 result = remainder & 0x00ff00ff;
2756 return_sequence->i[insns++] = result;
2757 remainder &= ~result;
2759 if (code == SET || code == MINUS)
2767 /* Emit an instruction with the indicated PATTERN. If COND is
2768 non-NULL, conditionalize the execution of the instruction on COND
2772 emit_constant_insn (rtx cond, rtx pattern)
2775 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2776 emit_insn (pattern);
2779 /* As above, but extra parameter GENERATE which, if clear, suppresses
2783 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2784 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2789 int final_invert = 0;
2791 int set_sign_bit_copies = 0;
2792 int clear_sign_bit_copies = 0;
2793 int clear_zero_bit_copies = 0;
2794 int set_zero_bit_copies = 0;
2795 int insns = 0, neg_insns, inv_insns;
2796 unsigned HOST_WIDE_INT temp1, temp2;
2797 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2798 struct four_ints *immediates;
2799 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2801 /* Find out which operations are safe for a given CODE. Also do a quick
2802 check for degenerate cases; these can occur when DImode operations
2815 if (remainder == 0xffffffff)
2818 emit_constant_insn (cond,
2819 gen_rtx_SET (VOIDmode, target,
2820 GEN_INT (ARM_SIGN_EXTEND (val))));
2826 if (reload_completed && rtx_equal_p (target, source))
2830 emit_constant_insn (cond,
2831 gen_rtx_SET (VOIDmode, target, source));
2840 emit_constant_insn (cond,
2841 gen_rtx_SET (VOIDmode, target, const0_rtx));
2844 if (remainder == 0xffffffff)
2846 if (reload_completed && rtx_equal_p (target, source))
2849 emit_constant_insn (cond,
2850 gen_rtx_SET (VOIDmode, target, source));
2859 if (reload_completed && rtx_equal_p (target, source))
2862 emit_constant_insn (cond,
2863 gen_rtx_SET (VOIDmode, target, source));
2867 if (remainder == 0xffffffff)
2870 emit_constant_insn (cond,
2871 gen_rtx_SET (VOIDmode, target,
2872 gen_rtx_NOT (mode, source)));
2879 /* We treat MINUS as (val - source), since (source - val) is always
2880 passed as (source + (-val)). */
2884 emit_constant_insn (cond,
2885 gen_rtx_SET (VOIDmode, target,
2886 gen_rtx_NEG (mode, source)));
2889 if (const_ok_for_arm (val))
2892 emit_constant_insn (cond,
2893 gen_rtx_SET (VOIDmode, target,
2894 gen_rtx_MINUS (mode, GEN_INT (val),
2905 /* If we can do it in one insn get out quickly. */
2906 if (const_ok_for_op (val, code))
2909 emit_constant_insn (cond,
2910 gen_rtx_SET (VOIDmode, target,
2912 ? gen_rtx_fmt_ee (code, mode, source,
2918 /* Calculate a few attributes that may be useful for specific
2920 /* Count number of leading zeros. */
2921 for (i = 31; i >= 0; i--)
2923 if ((remainder & (1 << i)) == 0)
2924 clear_sign_bit_copies++;
2929 /* Count number of leading 1's. */
2930 for (i = 31; i >= 0; i--)
2932 if ((remainder & (1 << i)) != 0)
2933 set_sign_bit_copies++;
2938 /* Count number of trailing zero's. */
2939 for (i = 0; i <= 31; i++)
2941 if ((remainder & (1 << i)) == 0)
2942 clear_zero_bit_copies++;
2947 /* Count number of trailing 1's. */
2948 for (i = 0; i <= 31; i++)
2950 if ((remainder & (1 << i)) != 0)
2951 set_zero_bit_copies++;
2959 /* See if we can do this by sign_extending a constant that is known
2960 to be negative. This is a good, way of doing it, since the shift
2961 may well merge into a subsequent insn. */
2962 if (set_sign_bit_copies > 1)
2964 if (const_ok_for_arm
2965 (temp1 = ARM_SIGN_EXTEND (remainder
2966 << (set_sign_bit_copies - 1))))
2970 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, new_src,
2974 emit_constant_insn (cond,
2975 gen_ashrsi3 (target, new_src,
2976 GEN_INT (set_sign_bit_copies - 1)));
2980 /* For an inverted constant, we will need to set the low bits,
2981 these will be shifted out of harm's way. */
2982 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2983 if (const_ok_for_arm (~temp1))
2987 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2988 emit_constant_insn (cond,
2989 gen_rtx_SET (VOIDmode, new_src,
2991 emit_constant_insn (cond,
2992 gen_ashrsi3 (target, new_src,
2993 GEN_INT (set_sign_bit_copies - 1)));
2999 /* See if we can calculate the value as the difference between two
3000 valid immediates. */
3001 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3003 int topshift = clear_sign_bit_copies & ~1;
3005 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3006 & (0xff000000 >> topshift));
3008 /* If temp1 is zero, then that means the 9 most significant
3009 bits of remainder were 1 and we've caused it to overflow.
3010 When topshift is 0 we don't need to do anything since we
3011 can borrow from 'bit 32'. */
3012 if (temp1 == 0 && topshift != 0)
3013 temp1 = 0x80000000 >> (topshift - 1);
3015 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3017 if (const_ok_for_arm (temp2))
3021 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 emit_constant_insn (cond,
3023 gen_rtx_SET (VOIDmode, new_src,
3025 emit_constant_insn (cond,
3026 gen_addsi3 (target, new_src,
3034 /* See if we can generate this by setting the bottom (or the top)
3035 16 bits, and then shifting these into the other half of the
3036 word. We only look for the simplest cases, to do more would cost
3037 too much. Be careful, however, not to generate this when the
3038 alternative would take fewer insns. */
3039 if (val & 0xffff0000)
3041 temp1 = remainder & 0xffff0000;
3042 temp2 = remainder & 0x0000ffff;
3044 /* Overlaps outside this range are best done using other methods. */
3045 for (i = 9; i < 24; i++)
3047 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3048 && !const_ok_for_arm (temp2))
3050 rtx new_src = (subtargets
3051 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3053 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3054 source, subtargets, generate);
3062 gen_rtx_ASHIFT (mode, source,
3069 /* Don't duplicate cases already considered. */
3070 for (i = 17; i < 24; i++)
3072 if (((temp1 | (temp1 >> i)) == remainder)
3073 && !const_ok_for_arm (temp1))
3075 rtx new_src = (subtargets
3076 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3078 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3079 source, subtargets, generate);
3084 gen_rtx_SET (VOIDmode, target,
3087 gen_rtx_LSHIFTRT (mode, source,
3098 /* If we have IOR or XOR, and the constant can be loaded in a
3099 single instruction, and we can find a temporary to put it in,
3100 then this can be done in two instructions instead of 3-4. */
3102 /* TARGET can't be NULL if SUBTARGETS is 0 */
3103 || (reload_completed && !reg_mentioned_p (target, source)))
3105 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3109 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3111 emit_constant_insn (cond,
3112 gen_rtx_SET (VOIDmode, sub,
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, target,
3116 gen_rtx_fmt_ee (code, mode,
3127 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3128 and the remainder 0s for e.g. 0xfff00000)
3129 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3131 This can be done in 2 instructions by using shifts with mov or mvn.
3136 mvn r0, r0, lsr #12 */
3137 if (set_sign_bit_copies > 8
3138 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3142 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3143 rtx shift = GEN_INT (set_sign_bit_copies);
3147 gen_rtx_SET (VOIDmode, sub,
3149 gen_rtx_ASHIFT (mode,
3154 gen_rtx_SET (VOIDmode, target,
3156 gen_rtx_LSHIFTRT (mode, sub,
3163 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3165 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3167 For eg. r0 = r0 | 0xfff
3172 if (set_zero_bit_copies > 8
3173 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3177 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3178 rtx shift = GEN_INT (set_zero_bit_copies);
3182 gen_rtx_SET (VOIDmode, sub,
3184 gen_rtx_LSHIFTRT (mode,
3189 gen_rtx_SET (VOIDmode, target,
3191 gen_rtx_ASHIFT (mode, sub,
3197 /* This will never be reached for Thumb2 because orn is a valid
3198 instruction. This is for Thumb1 and the ARM 32 bit cases.
3200 x = y | constant (such that ~constant is a valid constant)
3202 x = ~(~y & ~constant).
3204 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3208 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3209 emit_constant_insn (cond,
3210 gen_rtx_SET (VOIDmode, sub,
3211 gen_rtx_NOT (mode, source)));
3214 sub = gen_reg_rtx (mode);
3215 emit_constant_insn (cond,
3216 gen_rtx_SET (VOIDmode, sub,
3217 gen_rtx_AND (mode, source,
3219 emit_constant_insn (cond,
3220 gen_rtx_SET (VOIDmode, target,
3221 gen_rtx_NOT (mode, sub)));
3228 /* See if two shifts will do 2 or more insn's worth of work. */
3229 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3231 HOST_WIDE_INT shift_mask = ((0xffffffff
3232 << (32 - clear_sign_bit_copies))
3235 if ((remainder | shift_mask) != 0xffffffff)
3239 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3240 insns = arm_gen_constant (AND, mode, cond,
3241 remainder | shift_mask,
3242 new_src, source, subtargets, 1);
3247 rtx targ = subtargets ? NULL_RTX : target;
3248 insns = arm_gen_constant (AND, mode, cond,
3249 remainder | shift_mask,
3250 targ, source, subtargets, 0);
3256 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3257 rtx shift = GEN_INT (clear_sign_bit_copies);
3259 emit_insn (gen_ashlsi3 (new_src, source, shift));
3260 emit_insn (gen_lshrsi3 (target, new_src, shift));
3266 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3268 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3270 if ((remainder | shift_mask) != 0xffffffff)
3274 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3276 insns = arm_gen_constant (AND, mode, cond,
3277 remainder | shift_mask,
3278 new_src, source, subtargets, 1);
3283 rtx targ = subtargets ? NULL_RTX : target;
3285 insns = arm_gen_constant (AND, mode, cond,
3286 remainder | shift_mask,
3287 targ, source, subtargets, 0);
3293 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3294 rtx shift = GEN_INT (clear_zero_bit_copies);
3296 emit_insn (gen_lshrsi3 (new_src, source, shift));
3297 emit_insn (gen_ashlsi3 (target, new_src, shift));
3309 /* Calculate what the instruction sequences would be if we generated it
3310 normally, negated, or inverted. */
3312 /* AND cannot be split into multiple insns, so invert and use BIC. */
3315 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3318 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3323 if (can_invert || final_invert)
3324 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3329 immediates = &pos_immediates;
3331 /* Is the negated immediate sequence more efficient? */
3332 if (neg_insns < insns && neg_insns <= inv_insns)
3335 immediates = &neg_immediates;
3340 /* Is the inverted immediate sequence more efficient?
3341 We must allow for an extra NOT instruction for XOR operations, although
3342 there is some chance that the final 'mvn' will get optimized later. */
3343 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3346 immediates = &inv_immediates;
3354 /* Now output the chosen sequence as instructions. */
3357 for (i = 0; i < insns; i++)
3359 rtx new_src, temp1_rtx;
3361 temp1 = immediates->i[i];
3363 if (code == SET || code == MINUS)
3364 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3365 else if ((final_invert || i < (insns - 1)) && subtargets)
3366 new_src = gen_reg_rtx (mode);
3372 else if (can_negate)
3375 temp1 = trunc_int_for_mode (temp1, mode);
3376 temp1_rtx = GEN_INT (temp1);
3380 else if (code == MINUS)
3381 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3383 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3385 emit_constant_insn (cond,
3386 gen_rtx_SET (VOIDmode, new_src,
3392 can_negate = can_invert;
3396 else if (code == MINUS)
3404 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3405 gen_rtx_NOT (mode, source)));
3412 /* Canonicalize a comparison so that we are more likely to recognize it.
3413 This can be done for a few constant compares, where we can make the
3414 immediate value easier to load. */
3417 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3419 enum machine_mode mode;
3420 unsigned HOST_WIDE_INT i, maxval;
3422 mode = GET_MODE (*op0);
3423 if (mode == VOIDmode)
3424 mode = GET_MODE (*op1);
3426 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3428 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3429 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3430 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3431 for GTU/LEU in Thumb mode. */
3436 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3438 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3441 if (code == GT || code == LE
3442 || (!TARGET_ARM && (code == GTU || code == LEU)))
3444 /* Missing comparison. First try to use an available
3446 if (GET_CODE (*op1) == CONST_INT)
3454 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3456 *op1 = GEN_INT (i + 1);
3457 return code == GT ? GE : LT;
3462 if (i != ~((unsigned HOST_WIDE_INT) 0)
3463 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3465 *op1 = GEN_INT (i + 1);
3466 return code == GTU ? GEU : LTU;
3474 /* If that did not work, reverse the condition. */
3478 return swap_condition (code);
3484 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3485 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3486 to facilitate possible combining with a cmp into 'ands'. */
3488 && GET_CODE (*op0) == ZERO_EXTEND
3489 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3490 && GET_MODE (XEXP (*op0, 0)) == QImode
3491 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3492 && subreg_lowpart_p (XEXP (*op0, 0))
3493 && *op1 == const0_rtx)
3494 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3497 /* Comparisons smaller than DImode. Only adjust comparisons against
3498 an out-of-range constant. */
3499 if (GET_CODE (*op1) != CONST_INT
3500 || const_ok_for_arm (INTVAL (*op1))
3501 || const_ok_for_arm (- INTVAL (*op1)))
3515 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3517 *op1 = GEN_INT (i + 1);
3518 return code == GT ? GE : LT;
3525 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3527 *op1 = GEN_INT (i - 1);
3528 return code == GE ? GT : LE;
3534 if (i != ~((unsigned HOST_WIDE_INT) 0)
3535 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3537 *op1 = GEN_INT (i + 1);
3538 return code == GTU ? GEU : LTU;
3545 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3547 *op1 = GEN_INT (i - 1);
3548 return code == GEU ? GTU : LEU;
3560 /* Define how to find the value returned by a function. */
3563 arm_function_value(const_tree type, const_tree func,
3564 bool outgoing ATTRIBUTE_UNUSED)
3566 enum machine_mode mode;
3567 int unsignedp ATTRIBUTE_UNUSED;
3568 rtx r ATTRIBUTE_UNUSED;
3570 mode = TYPE_MODE (type);
3572 if (TARGET_AAPCS_BASED)
3573 return aapcs_allocate_return_reg (mode, type, func);
3575 /* Promote integer types. */
3576 if (INTEGRAL_TYPE_P (type))
3577 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3579 /* Promotes small structs returned in a register to full-word size
3580 for big-endian AAPCS. */
3581 if (arm_return_in_msb (type))
3583 HOST_WIDE_INT size = int_size_in_bytes (type);
3584 if (size % UNITS_PER_WORD != 0)
3586 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3587 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3591 return LIBCALL_VALUE (mode);
3595 libcall_eq (const void *p1, const void *p2)
3597 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3601 libcall_hash (const void *p1)
3603 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3607 add_libcall (htab_t htab, rtx libcall)
3609 *htab_find_slot (htab, libcall, INSERT) = libcall;
3613 arm_libcall_uses_aapcs_base (const_rtx libcall)
3615 static bool init_done = false;
3616 static htab_t libcall_htab;
3622 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3624 add_libcall (libcall_htab,
3625 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3626 add_libcall (libcall_htab,
3627 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3628 add_libcall (libcall_htab,
3629 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3630 add_libcall (libcall_htab,
3631 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3633 add_libcall (libcall_htab,
3634 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3635 add_libcall (libcall_htab,
3636 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3637 add_libcall (libcall_htab,
3638 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3639 add_libcall (libcall_htab,
3640 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3642 add_libcall (libcall_htab,
3643 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3644 add_libcall (libcall_htab,
3645 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3650 add_libcall (libcall_htab,
3651 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3652 add_libcall (libcall_htab,
3653 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3655 /* Values from double-precision helper functions are returned in core
3656 registers if the selected core only supports single-precision
3657 arithmetic, even if we are using the hard-float ABI. The same is
3658 true for single-precision helpers, but we will never be using the
3659 hard-float ABI on a CPU which doesn't support single-precision
3660 operations in hardware. */
3661 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3662 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3663 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3664 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3665 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3666 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3667 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3668 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3669 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3670 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3671 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3672 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3674 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3678 return libcall && htab_find (libcall_htab, libcall) != NULL;
3682 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3684 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3685 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3687 /* The following libcalls return their result in integer registers,
3688 even though they return a floating point value. */
3689 if (arm_libcall_uses_aapcs_base (libcall))
3690 return gen_rtx_REG (mode, ARG_REGISTER(1));
3694 return LIBCALL_VALUE (mode);
3697 /* Determine the amount of memory needed to store the possible return
3698 registers of an untyped call. */
3700 arm_apply_result_size (void)
3706 if (TARGET_HARD_FLOAT_ABI)
3712 if (TARGET_MAVERICK)
3715 if (TARGET_IWMMXT_ABI)
3722 /* Decide whether TYPE should be returned in memory (true)
3723 or in a register (false). FNTYPE is the type of the function making
3726 arm_return_in_memory (const_tree type, const_tree fntype)
3730 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3732 if (TARGET_AAPCS_BASED)
3734 /* Simple, non-aggregate types (ie not including vectors and
3735 complex) are always returned in a register (or registers).
3736 We don't care about which register here, so we can short-cut
3737 some of the detail. */
3738 if (!AGGREGATE_TYPE_P (type)
3739 && TREE_CODE (type) != VECTOR_TYPE
3740 && TREE_CODE (type) != COMPLEX_TYPE)
3743 /* Any return value that is no larger than one word can be
3745 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3748 /* Check any available co-processors to see if they accept the
3749 type as a register candidate (VFP, for example, can return
3750 some aggregates in consecutive registers). These aren't
3751 available if the call is variadic. */
3752 if (aapcs_select_return_coproc (type, fntype) >= 0)
3755 /* Vector values should be returned using ARM registers, not
3756 memory (unless they're over 16 bytes, which will break since
3757 we only have four call-clobbered registers to play with). */
3758 if (TREE_CODE (type) == VECTOR_TYPE)
3759 return (size < 0 || size > (4 * UNITS_PER_WORD));
3761 /* The rest go in memory. */
3765 if (TREE_CODE (type) == VECTOR_TYPE)
3766 return (size < 0 || size > (4 * UNITS_PER_WORD));
3768 if (!AGGREGATE_TYPE_P (type) &&
3769 (TREE_CODE (type) != VECTOR_TYPE))
3770 /* All simple types are returned in registers. */
3773 if (arm_abi != ARM_ABI_APCS)
3775 /* ATPCS and later return aggregate types in memory only if they are
3776 larger than a word (or are variable size). */
3777 return (size < 0 || size > UNITS_PER_WORD);
3780 /* For the arm-wince targets we choose to be compatible with Microsoft's
3781 ARM and Thumb compilers, which always return aggregates in memory. */
3783 /* All structures/unions bigger than one word are returned in memory.
3784 Also catch the case where int_size_in_bytes returns -1. In this case
3785 the aggregate is either huge or of variable size, and in either case
3786 we will want to return it via memory and not in a register. */
3787 if (size < 0 || size > UNITS_PER_WORD)
3790 if (TREE_CODE (type) == RECORD_TYPE)
3794 /* For a struct the APCS says that we only return in a register
3795 if the type is 'integer like' and every addressable element
3796 has an offset of zero. For practical purposes this means
3797 that the structure can have at most one non bit-field element
3798 and that this element must be the first one in the structure. */
3800 /* Find the first field, ignoring non FIELD_DECL things which will
3801 have been created by C++. */
3802 for (field = TYPE_FIELDS (type);
3803 field && TREE_CODE (field) != FIELD_DECL;
3804 field = DECL_CHAIN (field))
3808 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3810 /* Check that the first field is valid for returning in a register. */
3812 /* ... Floats are not allowed */
3813 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3816 /* ... Aggregates that are not themselves valid for returning in
3817 a register are not allowed. */
3818 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3821 /* Now check the remaining fields, if any. Only bitfields are allowed,
3822 since they are not addressable. */
3823 for (field = DECL_CHAIN (field);
3825 field = DECL_CHAIN (field))
3827 if (TREE_CODE (field) != FIELD_DECL)
3830 if (!DECL_BIT_FIELD_TYPE (field))
3837 if (TREE_CODE (type) == UNION_TYPE)
3841 /* Unions can be returned in registers if every element is
3842 integral, or can be returned in an integer register. */
3843 for (field = TYPE_FIELDS (type);
3845 field = DECL_CHAIN (field))
3847 if (TREE_CODE (field) != FIELD_DECL)
3850 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3853 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3859 #endif /* not ARM_WINCE */
3861 /* Return all other types in memory. */
3865 /* Indicate whether or not words of a double are in big-endian order. */
3868 arm_float_words_big_endian (void)
3870 if (TARGET_MAVERICK)
3873 /* For FPA, float words are always big-endian. For VFP, floats words
3874 follow the memory system mode. */
3882 return (TARGET_BIG_END ? 1 : 0);
3887 const struct pcs_attribute_arg
3891 } pcs_attribute_args[] =
3893 {"aapcs", ARM_PCS_AAPCS},
3894 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3896 /* We could recognize these, but changes would be needed elsewhere
3897 * to implement them. */
3898 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3899 {"atpcs", ARM_PCS_ATPCS},
3900 {"apcs", ARM_PCS_APCS},
3902 {NULL, ARM_PCS_UNKNOWN}
3906 arm_pcs_from_attribute (tree attr)
3908 const struct pcs_attribute_arg *ptr;
3911 /* Get the value of the argument. */
3912 if (TREE_VALUE (attr) == NULL_TREE
3913 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3914 return ARM_PCS_UNKNOWN;
3916 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3918 /* Check it against the list of known arguments. */
3919 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3920 if (streq (arg, ptr->arg))
3923 /* An unrecognized interrupt type. */
3924 return ARM_PCS_UNKNOWN;
3927 /* Get the PCS variant to use for this call. TYPE is the function's type
3928 specification, DECL is the specific declartion. DECL may be null if
3929 the call could be indirect or if this is a library call. */
3931 arm_get_pcs_model (const_tree type, const_tree decl)
3933 bool user_convention = false;
3934 enum arm_pcs user_pcs = arm_pcs_default;
3939 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3942 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3943 user_convention = true;
3946 if (TARGET_AAPCS_BASED)
3948 /* Detect varargs functions. These always use the base rules
3949 (no argument is ever a candidate for a co-processor
3951 bool base_rules = stdarg_p (type);
3953 if (user_convention)
3955 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3956 sorry ("non-AAPCS derived PCS variant");
3957 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3958 error ("variadic functions must use the base AAPCS variant");
3962 return ARM_PCS_AAPCS;
3963 else if (user_convention)
3965 else if (decl && flag_unit_at_a_time)
3967 /* Local functions never leak outside this compilation unit,
3968 so we are free to use whatever conventions are
3970 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3971 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3973 return ARM_PCS_AAPCS_LOCAL;
3976 else if (user_convention && user_pcs != arm_pcs_default)
3977 sorry ("PCS variant");
3979 /* For everything else we use the target's default. */
3980 return arm_pcs_default;
3985 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3986 const_tree fntype ATTRIBUTE_UNUSED,
3987 rtx libcall ATTRIBUTE_UNUSED,
3988 const_tree fndecl ATTRIBUTE_UNUSED)
3990 /* Record the unallocated VFP registers. */
3991 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3992 pcum->aapcs_vfp_reg_alloc = 0;
3995 /* Walk down the type tree of TYPE counting consecutive base elements.
3996 If *MODEP is VOIDmode, then set it to the first valid floating point
3997 type. If a non-floating point type is found, or if a floating point
3998 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3999 otherwise return the count in the sub-tree. */
4001 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4003 enum machine_mode mode;
4006 switch (TREE_CODE (type))
4009 mode = TYPE_MODE (type);
4010 if (mode != DFmode && mode != SFmode)
4013 if (*modep == VOIDmode)
4022 mode = TYPE_MODE (TREE_TYPE (type));
4023 if (mode != DFmode && mode != SFmode)
4026 if (*modep == VOIDmode)
4035 /* Use V2SImode and V4SImode as representatives of all 64-bit
4036 and 128-bit vector types, whether or not those modes are
4037 supported with the present options. */
4038 size = int_size_in_bytes (type);
4051 if (*modep == VOIDmode)
4054 /* Vector modes are considered to be opaque: two vectors are
4055 equivalent for the purposes of being homogeneous aggregates
4056 if they are the same size. */
4065 tree index = TYPE_DOMAIN (type);
4067 /* Can't handle incomplete types. */
4068 if (!COMPLETE_TYPE_P(type))
4071 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4074 || !TYPE_MAX_VALUE (index)
4075 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4076 || !TYPE_MIN_VALUE (index)
4077 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4081 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4082 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4084 /* There must be no padding. */
4085 if (!host_integerp (TYPE_SIZE (type), 1)
4086 || (tree_low_cst (TYPE_SIZE (type), 1)
4087 != count * GET_MODE_BITSIZE (*modep)))
4099 /* Can't handle incomplete types. */
4100 if (!COMPLETE_TYPE_P(type))
4103 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4105 if (TREE_CODE (field) != FIELD_DECL)
4108 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4114 /* There must be no padding. */
4115 if (!host_integerp (TYPE_SIZE (type), 1)
4116 || (tree_low_cst (TYPE_SIZE (type), 1)
4117 != count * GET_MODE_BITSIZE (*modep)))
4124 case QUAL_UNION_TYPE:
4126 /* These aren't very interesting except in a degenerate case. */
4131 /* Can't handle incomplete types. */
4132 if (!COMPLETE_TYPE_P(type))
4135 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4137 if (TREE_CODE (field) != FIELD_DECL)
4140 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4143 count = count > sub_count ? count : sub_count;
4146 /* There must be no padding. */
4147 if (!host_integerp (TYPE_SIZE (type), 1)
4148 || (tree_low_cst (TYPE_SIZE (type), 1)
4149 != count * GET_MODE_BITSIZE (*modep)))
4162 /* Return true if PCS_VARIANT should use VFP registers. */
4164 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4166 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4168 static bool seen_thumb1_vfp = false;
4170 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4172 sorry ("Thumb-1 hard-float VFP ABI");
4173 /* sorry() is not immediately fatal, so only display this once. */
4174 seen_thumb1_vfp = true;
4180 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4183 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4184 (TARGET_VFP_DOUBLE || !is_double));
4188 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4189 enum machine_mode mode, const_tree type,
4190 enum machine_mode *base_mode, int *count)
4192 enum machine_mode new_mode = VOIDmode;
4194 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4195 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4196 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4201 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4204 new_mode = (mode == DCmode ? DFmode : SFmode);
4206 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4208 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4210 if (ag_count > 0 && ag_count <= 4)
4219 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4222 *base_mode = new_mode;
4227 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4228 enum machine_mode mode, const_tree type)
4230 int count ATTRIBUTE_UNUSED;
4231 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4233 if (!use_vfp_abi (pcs_variant, false))
4235 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4240 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4243 if (!use_vfp_abi (pcum->pcs_variant, false))
4246 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4247 &pcum->aapcs_vfp_rmode,
4248 &pcum->aapcs_vfp_rcount);
4252 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4253 const_tree type ATTRIBUTE_UNUSED)
4255 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4256 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4259 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4260 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4262 pcum->aapcs_vfp_reg_alloc = mask << regno;
4263 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4266 int rcount = pcum->aapcs_vfp_rcount;
4268 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4272 /* Avoid using unsupported vector modes. */
4273 if (rmode == V2SImode)
4275 else if (rmode == V4SImode)
4282 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4283 for (i = 0; i < rcount; i++)
4285 rtx tmp = gen_rtx_REG (rmode,
4286 FIRST_VFP_REGNUM + regno + i * rshift);
4287 tmp = gen_rtx_EXPR_LIST
4289 GEN_INT (i * GET_MODE_SIZE (rmode)));
4290 XVECEXP (par, 0, i) = tmp;
4293 pcum->aapcs_reg = par;
4296 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4303 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4304 enum machine_mode mode,
4305 const_tree type ATTRIBUTE_UNUSED)
4307 if (!use_vfp_abi (pcs_variant, false))
4310 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4313 enum machine_mode ag_mode;
4318 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4323 if (ag_mode == V2SImode)
4325 else if (ag_mode == V4SImode)
4331 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4332 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4333 for (i = 0; i < count; i++)
4335 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4336 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4337 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4338 XVECEXP (par, 0, i) = tmp;
4344 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4348 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4349 enum machine_mode mode ATTRIBUTE_UNUSED,
4350 const_tree type ATTRIBUTE_UNUSED)
4352 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4353 pcum->aapcs_vfp_reg_alloc = 0;
4357 #define AAPCS_CP(X) \
4359 aapcs_ ## X ## _cum_init, \
4360 aapcs_ ## X ## _is_call_candidate, \
4361 aapcs_ ## X ## _allocate, \
4362 aapcs_ ## X ## _is_return_candidate, \
4363 aapcs_ ## X ## _allocate_return_reg, \
4364 aapcs_ ## X ## _advance \
4367 /* Table of co-processors that can be used to pass arguments in
4368 registers. Idealy no arugment should be a candidate for more than
4369 one co-processor table entry, but the table is processed in order
4370 and stops after the first match. If that entry then fails to put
4371 the argument into a co-processor register, the argument will go on
4375 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4376 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4378 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4379 BLKmode) is a candidate for this co-processor's registers; this
4380 function should ignore any position-dependent state in
4381 CUMULATIVE_ARGS and only use call-type dependent information. */
4382 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4384 /* Return true if the argument does get a co-processor register; it
4385 should set aapcs_reg to an RTX of the register allocated as is
4386 required for a return from FUNCTION_ARG. */
4387 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4389 /* Return true if a result of mode MODE (or type TYPE if MODE is
4390 BLKmode) is can be returned in this co-processor's registers. */
4391 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4393 /* Allocate and return an RTX element to hold the return type of a
4394 call, this routine must not fail and will only be called if
4395 is_return_candidate returned true with the same parameters. */
4396 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4398 /* Finish processing this argument and prepare to start processing
4400 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4401 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4409 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4414 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4415 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4422 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4424 /* We aren't passed a decl, so we can't check that a call is local.
4425 However, it isn't clear that that would be a win anyway, since it
4426 might limit some tail-calling opportunities. */
4427 enum arm_pcs pcs_variant;
4431 const_tree fndecl = NULL_TREE;
4433 if (TREE_CODE (fntype) == FUNCTION_DECL)
4436 fntype = TREE_TYPE (fntype);
4439 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4442 pcs_variant = arm_pcs_default;
4444 if (pcs_variant != ARM_PCS_AAPCS)
4448 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4449 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4458 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4461 /* We aren't passed a decl, so we can't check that a call is local.
4462 However, it isn't clear that that would be a win anyway, since it
4463 might limit some tail-calling opportunities. */
4464 enum arm_pcs pcs_variant;
4465 int unsignedp ATTRIBUTE_UNUSED;
4469 const_tree fndecl = NULL_TREE;
4471 if (TREE_CODE (fntype) == FUNCTION_DECL)
4474 fntype = TREE_TYPE (fntype);
4477 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4480 pcs_variant = arm_pcs_default;
4482 /* Promote integer types. */
4483 if (type && INTEGRAL_TYPE_P (type))
4484 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4486 if (pcs_variant != ARM_PCS_AAPCS)
4490 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4491 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4493 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4497 /* Promotes small structs returned in a register to full-word size
4498 for big-endian AAPCS. */
4499 if (type && arm_return_in_msb (type))
4501 HOST_WIDE_INT size = int_size_in_bytes (type);
4502 if (size % UNITS_PER_WORD != 0)
4504 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4505 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4509 return gen_rtx_REG (mode, R0_REGNUM);
4513 aapcs_libcall_value (enum machine_mode mode)
4515 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4516 && GET_MODE_SIZE (mode) <= 4)
4519 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4522 /* Lay out a function argument using the AAPCS rules. The rule
4523 numbers referred to here are those in the AAPCS. */
4525 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4526 const_tree type, bool named)
4531 /* We only need to do this once per argument. */
4532 if (pcum->aapcs_arg_processed)
4535 pcum->aapcs_arg_processed = true;
4537 /* Special case: if named is false then we are handling an incoming
4538 anonymous argument which is on the stack. */
4542 /* Is this a potential co-processor register candidate? */
4543 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4545 int slot = aapcs_select_call_coproc (pcum, mode, type);
4546 pcum->aapcs_cprc_slot = slot;
4548 /* We don't have to apply any of the rules from part B of the
4549 preparation phase, these are handled elsewhere in the
4554 /* A Co-processor register candidate goes either in its own
4555 class of registers or on the stack. */
4556 if (!pcum->aapcs_cprc_failed[slot])
4558 /* C1.cp - Try to allocate the argument to co-processor
4560 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4563 /* C2.cp - Put the argument on the stack and note that we
4564 can't assign any more candidates in this slot. We also
4565 need to note that we have allocated stack space, so that
4566 we won't later try to split a non-cprc candidate between
4567 core registers and the stack. */
4568 pcum->aapcs_cprc_failed[slot] = true;
4569 pcum->can_split = false;
4572 /* We didn't get a register, so this argument goes on the
4574 gcc_assert (pcum->can_split == false);
4579 /* C3 - For double-word aligned arguments, round the NCRN up to the
4580 next even number. */
4581 ncrn = pcum->aapcs_ncrn;
4582 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4585 nregs = ARM_NUM_REGS2(mode, type);
4587 /* Sigh, this test should really assert that nregs > 0, but a GCC
4588 extension allows empty structs and then gives them empty size; it
4589 then allows such a structure to be passed by value. For some of
4590 the code below we have to pretend that such an argument has
4591 non-zero size so that we 'locate' it correctly either in
4592 registers or on the stack. */
4593 gcc_assert (nregs >= 0);
4595 nregs2 = nregs ? nregs : 1;
4597 /* C4 - Argument fits entirely in core registers. */
4598 if (ncrn + nregs2 <= NUM_ARG_REGS)
4600 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4601 pcum->aapcs_next_ncrn = ncrn + nregs;
4605 /* C5 - Some core registers left and there are no arguments already
4606 on the stack: split this argument between the remaining core
4607 registers and the stack. */
4608 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4610 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4611 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4612 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4616 /* C6 - NCRN is set to 4. */
4617 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4619 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4623 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4624 for a call to a function whose data type is FNTYPE.
4625 For a library call, FNTYPE is NULL. */
4627 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4629 tree fndecl ATTRIBUTE_UNUSED)
4631 /* Long call handling. */
4633 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4635 pcum->pcs_variant = arm_pcs_default;
4637 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4639 if (arm_libcall_uses_aapcs_base (libname))
4640 pcum->pcs_variant = ARM_PCS_AAPCS;
4642 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4643 pcum->aapcs_reg = NULL_RTX;
4644 pcum->aapcs_partial = 0;
4645 pcum->aapcs_arg_processed = false;
4646 pcum->aapcs_cprc_slot = -1;
4647 pcum->can_split = true;
4649 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4653 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4655 pcum->aapcs_cprc_failed[i] = false;
4656 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4664 /* On the ARM, the offset starts at 0. */
4666 pcum->iwmmxt_nregs = 0;
4667 pcum->can_split = true;
4669 /* Varargs vectors are treated the same as long long.
4670 named_count avoids having to change the way arm handles 'named' */
4671 pcum->named_count = 0;
4674 if (TARGET_REALLY_IWMMXT && fntype)
4678 for (fn_arg = TYPE_ARG_TYPES (fntype);
4680 fn_arg = TREE_CHAIN (fn_arg))
4681 pcum->named_count += 1;
4683 if (! pcum->named_count)
4684 pcum->named_count = INT_MAX;
4689 /* Return true if mode/type need doubleword alignment. */
4691 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4693 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4694 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4698 /* Determine where to put an argument to a function.
4699 Value is zero to push the argument on the stack,
4700 or a hard register in which to store the argument.
4702 MODE is the argument's machine mode.
4703 TYPE is the data type of the argument (as a tree).
4704 This is null for libcalls where that information may
4706 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4707 the preceding args and about the function being called.
4708 NAMED is nonzero if this argument is a named parameter
4709 (otherwise it is an extra parameter matching an ellipsis).
4711 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4712 other arguments are passed on the stack. If (NAMED == 0) (which happens
4713 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4714 defined), say it is passed in the stack (function_prologue will
4715 indeed make it pass in the stack if necessary). */
4718 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4719 const_tree type, bool named)
4721 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4724 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4725 a call insn (op3 of a call_value insn). */
4726 if (mode == VOIDmode)
4729 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4731 aapcs_layout_arg (pcum, mode, type, named);
4732 return pcum->aapcs_reg;
4735 /* Varargs vectors are treated the same as long long.
4736 named_count avoids having to change the way arm handles 'named' */
4737 if (TARGET_IWMMXT_ABI
4738 && arm_vector_mode_supported_p (mode)
4739 && pcum->named_count > pcum->nargs + 1)
4741 if (pcum->iwmmxt_nregs <= 9)
4742 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4745 pcum->can_split = false;
4750 /* Put doubleword aligned quantities in even register pairs. */
4752 && ARM_DOUBLEWORD_ALIGN
4753 && arm_needs_doubleword_align (mode, type))
4756 /* Only allow splitting an arg between regs and memory if all preceding
4757 args were allocated to regs. For args passed by reference we only count
4758 the reference pointer. */
4759 if (pcum->can_split)
4762 nregs = ARM_NUM_REGS2 (mode, type);
4764 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4767 return gen_rtx_REG (mode, pcum->nregs);
4771 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4773 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4774 ? DOUBLEWORD_ALIGNMENT
4779 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4780 tree type, bool named)
4782 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4783 int nregs = pcum->nregs;
4785 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4787 aapcs_layout_arg (pcum, mode, type, named);
4788 return pcum->aapcs_partial;
4791 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4794 if (NUM_ARG_REGS > nregs
4795 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4797 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4802 /* Update the data in PCUM to advance over an argument
4803 of mode MODE and data type TYPE.
4804 (TYPE is null for libcalls where that information may not be available.) */
4807 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4808 const_tree type, bool named)
4810 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4812 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4814 aapcs_layout_arg (pcum, mode, type, named);
4816 if (pcum->aapcs_cprc_slot >= 0)
4818 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4820 pcum->aapcs_cprc_slot = -1;
4823 /* Generic stuff. */
4824 pcum->aapcs_arg_processed = false;
4825 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4826 pcum->aapcs_reg = NULL_RTX;
4827 pcum->aapcs_partial = 0;
4832 if (arm_vector_mode_supported_p (mode)
4833 && pcum->named_count > pcum->nargs
4834 && TARGET_IWMMXT_ABI)
4835 pcum->iwmmxt_nregs += 1;
4837 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4841 /* Variable sized types are passed by reference. This is a GCC
4842 extension to the ARM ABI. */
4845 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4846 enum machine_mode mode ATTRIBUTE_UNUSED,
4847 const_tree type, bool named ATTRIBUTE_UNUSED)
4849 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4852 /* Encode the current state of the #pragma [no_]long_calls. */
4855 OFF, /* No #pragma [no_]long_calls is in effect. */
4856 LONG, /* #pragma long_calls is in effect. */
4857 SHORT /* #pragma no_long_calls is in effect. */
4860 static arm_pragma_enum arm_pragma_long_calls = OFF;
4863 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4865 arm_pragma_long_calls = LONG;
4869 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4871 arm_pragma_long_calls = SHORT;
4875 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4877 arm_pragma_long_calls = OFF;
4880 /* Handle an attribute requiring a FUNCTION_DECL;
4881 arguments as in struct attribute_spec.handler. */
4883 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4884 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4886 if (TREE_CODE (*node) != FUNCTION_DECL)
4888 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4890 *no_add_attrs = true;
4896 /* Handle an "interrupt" or "isr" attribute;
4897 arguments as in struct attribute_spec.handler. */
4899 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4904 if (TREE_CODE (*node) != FUNCTION_DECL)
4906 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4908 *no_add_attrs = true;
4910 /* FIXME: the argument if any is checked for type attributes;
4911 should it be checked for decl ones? */
4915 if (TREE_CODE (*node) == FUNCTION_TYPE
4916 || TREE_CODE (*node) == METHOD_TYPE)
4918 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4920 warning (OPT_Wattributes, "%qE attribute ignored",
4922 *no_add_attrs = true;
4925 else if (TREE_CODE (*node) == POINTER_TYPE
4926 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4927 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4928 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4930 *node = build_variant_type_copy (*node);
4931 TREE_TYPE (*node) = build_type_attribute_variant
4933 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4934 *no_add_attrs = true;
4938 /* Possibly pass this attribute on from the type to a decl. */
4939 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4940 | (int) ATTR_FLAG_FUNCTION_NEXT
4941 | (int) ATTR_FLAG_ARRAY_NEXT))
4943 *no_add_attrs = true;
4944 return tree_cons (name, args, NULL_TREE);
4948 warning (OPT_Wattributes, "%qE attribute ignored",
4957 /* Handle a "pcs" attribute; arguments as in struct
4958 attribute_spec.handler. */
4960 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4961 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4963 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4965 warning (OPT_Wattributes, "%qE attribute ignored", name);
4966 *no_add_attrs = true;
4971 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4972 /* Handle the "notshared" attribute. This attribute is another way of
4973 requesting hidden visibility. ARM's compiler supports
4974 "__declspec(notshared)"; we support the same thing via an
4978 arm_handle_notshared_attribute (tree *node,
4979 tree name ATTRIBUTE_UNUSED,
4980 tree args ATTRIBUTE_UNUSED,
4981 int flags ATTRIBUTE_UNUSED,
4984 tree decl = TYPE_NAME (*node);
4988 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4989 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4990 *no_add_attrs = false;
4996 /* Return 0 if the attributes for two types are incompatible, 1 if they
4997 are compatible, and 2 if they are nearly compatible (which causes a
4998 warning to be generated). */
5000 arm_comp_type_attributes (const_tree type1, const_tree type2)
5004 /* Check for mismatch of non-default calling convention. */
5005 if (TREE_CODE (type1) != FUNCTION_TYPE)
5008 /* Check for mismatched call attributes. */
5009 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5010 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5011 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5012 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5014 /* Only bother to check if an attribute is defined. */
5015 if (l1 | l2 | s1 | s2)
5017 /* If one type has an attribute, the other must have the same attribute. */
5018 if ((l1 != l2) || (s1 != s2))
5021 /* Disallow mixed attributes. */
5022 if ((l1 & s2) || (l2 & s1))
5026 /* Check for mismatched ISR attribute. */
5027 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5029 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5030 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5032 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5039 /* Assigns default attributes to newly defined type. This is used to
5040 set short_call/long_call attributes for function types of
5041 functions defined inside corresponding #pragma scopes. */
5043 arm_set_default_type_attributes (tree type)
5045 /* Add __attribute__ ((long_call)) to all functions, when
5046 inside #pragma long_calls or __attribute__ ((short_call)),
5047 when inside #pragma no_long_calls. */
5048 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5050 tree type_attr_list, attr_name;
5051 type_attr_list = TYPE_ATTRIBUTES (type);
5053 if (arm_pragma_long_calls == LONG)
5054 attr_name = get_identifier ("long_call");
5055 else if (arm_pragma_long_calls == SHORT)
5056 attr_name = get_identifier ("short_call");
5060 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5061 TYPE_ATTRIBUTES (type) = type_attr_list;
5065 /* Return true if DECL is known to be linked into section SECTION. */
5068 arm_function_in_section_p (tree decl, section *section)
5070 /* We can only be certain about functions defined in the same
5071 compilation unit. */
5072 if (!TREE_STATIC (decl))
5075 /* Make sure that SYMBOL always binds to the definition in this
5076 compilation unit. */
5077 if (!targetm.binds_local_p (decl))
5080 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5081 if (!DECL_SECTION_NAME (decl))
5083 /* Make sure that we will not create a unique section for DECL. */
5084 if (flag_function_sections || DECL_ONE_ONLY (decl))
5088 return function_section (decl) == section;
5091 /* Return nonzero if a 32-bit "long_call" should be generated for
5092 a call from the current function to DECL. We generate a long_call
5095 a. has an __attribute__((long call))
5096 or b. is within the scope of a #pragma long_calls
5097 or c. the -mlong-calls command line switch has been specified
5099 However we do not generate a long call if the function:
5101 d. has an __attribute__ ((short_call))
5102 or e. is inside the scope of a #pragma no_long_calls
5103 or f. is defined in the same section as the current function. */
5106 arm_is_long_call_p (tree decl)
5111 return TARGET_LONG_CALLS;
5113 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5114 if (lookup_attribute ("short_call", attrs))
5117 /* For "f", be conservative, and only cater for cases in which the
5118 whole of the current function is placed in the same section. */
5119 if (!flag_reorder_blocks_and_partition
5120 && TREE_CODE (decl) == FUNCTION_DECL
5121 && arm_function_in_section_p (decl, current_function_section ()))
5124 if (lookup_attribute ("long_call", attrs))
5127 return TARGET_LONG_CALLS;
5130 /* Return nonzero if it is ok to make a tail-call to DECL. */
5132 arm_function_ok_for_sibcall (tree decl, tree exp)
5134 unsigned long func_type;
5136 if (cfun->machine->sibcall_blocked)
5139 /* Never tailcall something for which we have no decl, or if we
5140 are generating code for Thumb-1. */
5141 if (decl == NULL || TARGET_THUMB1)
5144 /* The PIC register is live on entry to VxWorks PLT entries, so we
5145 must make the call before restoring the PIC register. */
5146 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5149 /* Cannot tail-call to long calls, since these are out of range of
5150 a branch instruction. */
5151 if (arm_is_long_call_p (decl))
5154 /* If we are interworking and the function is not declared static
5155 then we can't tail-call it unless we know that it exists in this
5156 compilation unit (since it might be a Thumb routine). */
5157 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5160 func_type = arm_current_func_type ();
5161 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5162 if (IS_INTERRUPT (func_type))
5165 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5167 /* Check that the return value locations are the same. For
5168 example that we aren't returning a value from the sibling in
5169 a VFP register but then need to transfer it to a core
5173 a = arm_function_value (TREE_TYPE (exp), decl, false);
5174 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5176 if (!rtx_equal_p (a, b))
5180 /* Never tailcall if function may be called with a misaligned SP. */
5181 if (IS_STACKALIGN (func_type))
5184 /* Everything else is ok. */
5189 /* Addressing mode support functions. */
5191 /* Return nonzero if X is a legitimate immediate operand when compiling
5192 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5194 legitimate_pic_operand_p (rtx x)
5196 if (GET_CODE (x) == SYMBOL_REF
5197 || (GET_CODE (x) == CONST
5198 && GET_CODE (XEXP (x, 0)) == PLUS
5199 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5205 /* Record that the current function needs a PIC register. Initialize
5206 cfun->machine->pic_reg if we have not already done so. */
5209 require_pic_register (void)
5211 /* A lot of the logic here is made obscure by the fact that this
5212 routine gets called as part of the rtx cost estimation process.
5213 We don't want those calls to affect any assumptions about the real
5214 function; and further, we can't call entry_of_function() until we
5215 start the real expansion process. */
5216 if (!crtl->uses_pic_offset_table)
5218 gcc_assert (can_create_pseudo_p ());
5219 if (arm_pic_register != INVALID_REGNUM)
5221 if (!cfun->machine->pic_reg)
5222 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5224 /* Play games to avoid marking the function as needing pic
5225 if we are being called as part of the cost-estimation
5227 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5228 crtl->uses_pic_offset_table = 1;
5234 if (!cfun->machine->pic_reg)
5235 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5237 /* Play games to avoid marking the function as needing pic
5238 if we are being called as part of the cost-estimation
5240 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5242 crtl->uses_pic_offset_table = 1;
5245 arm_load_pic_register (0UL);
5250 for (insn = seq; insn; insn = NEXT_INSN (insn))
5252 INSN_LOCATOR (insn) = prologue_locator;
5254 /* We can be called during expansion of PHI nodes, where
5255 we can't yet emit instructions directly in the final
5256 insn stream. Queue the insns on the entry edge, they will
5257 be committed after everything else is expanded. */
5258 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5265 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5267 if (GET_CODE (orig) == SYMBOL_REF
5268 || GET_CODE (orig) == LABEL_REF)
5274 gcc_assert (can_create_pseudo_p ());
5275 reg = gen_reg_rtx (Pmode);
5278 /* VxWorks does not impose a fixed gap between segments; the run-time
5279 gap can be different from the object-file gap. We therefore can't
5280 use GOTOFF unless we are absolutely sure that the symbol is in the
5281 same segment as the GOT. Unfortunately, the flexibility of linker
5282 scripts means that we can't be sure of that in general, so assume
5283 that GOTOFF is never valid on VxWorks. */
5284 if ((GET_CODE (orig) == LABEL_REF
5285 || (GET_CODE (orig) == SYMBOL_REF &&
5286 SYMBOL_REF_LOCAL_P (orig)))
5288 && !TARGET_VXWORKS_RTP)
5289 insn = arm_pic_static_addr (orig, reg);
5295 /* If this function doesn't have a pic register, create one now. */
5296 require_pic_register ();
5298 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5300 /* Make the MEM as close to a constant as possible. */
5301 mem = SET_SRC (pat);
5302 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5303 MEM_READONLY_P (mem) = 1;
5304 MEM_NOTRAP_P (mem) = 1;
5306 insn = emit_insn (pat);
5309 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5311 set_unique_reg_note (insn, REG_EQUAL, orig);
5315 else if (GET_CODE (orig) == CONST)
5319 if (GET_CODE (XEXP (orig, 0)) == PLUS
5320 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5323 /* Handle the case where we have: const (UNSPEC_TLS). */
5324 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5325 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5328 /* Handle the case where we have:
5329 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5331 if (GET_CODE (XEXP (orig, 0)) == PLUS
5332 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5333 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5335 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5341 gcc_assert (can_create_pseudo_p ());
5342 reg = gen_reg_rtx (Pmode);
5345 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5347 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5348 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5349 base == reg ? 0 : reg);
5351 if (GET_CODE (offset) == CONST_INT)
5353 /* The base register doesn't really matter, we only want to
5354 test the index for the appropriate mode. */
5355 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5357 gcc_assert (can_create_pseudo_p ());
5358 offset = force_reg (Pmode, offset);
5361 if (GET_CODE (offset) == CONST_INT)
5362 return plus_constant (base, INTVAL (offset));
5365 if (GET_MODE_SIZE (mode) > 4
5366 && (GET_MODE_CLASS (mode) == MODE_INT
5367 || TARGET_SOFT_FLOAT))
5369 emit_insn (gen_addsi3 (reg, base, offset));
5373 return gen_rtx_PLUS (Pmode, base, offset);
5380 /* Find a spare register to use during the prolog of a function. */
5383 thumb_find_work_register (unsigned long pushed_regs_mask)
5387 /* Check the argument registers first as these are call-used. The
5388 register allocation order means that sometimes r3 might be used
5389 but earlier argument registers might not, so check them all. */
5390 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5391 if (!df_regs_ever_live_p (reg))
5394 /* Before going on to check the call-saved registers we can try a couple
5395 more ways of deducing that r3 is available. The first is when we are
5396 pushing anonymous arguments onto the stack and we have less than 4
5397 registers worth of fixed arguments(*). In this case r3 will be part of
5398 the variable argument list and so we can be sure that it will be
5399 pushed right at the start of the function. Hence it will be available
5400 for the rest of the prologue.
5401 (*): ie crtl->args.pretend_args_size is greater than 0. */
5402 if (cfun->machine->uses_anonymous_args
5403 && crtl->args.pretend_args_size > 0)
5404 return LAST_ARG_REGNUM;
5406 /* The other case is when we have fixed arguments but less than 4 registers
5407 worth. In this case r3 might be used in the body of the function, but
5408 it is not being used to convey an argument into the function. In theory
5409 we could just check crtl->args.size to see how many bytes are
5410 being passed in argument registers, but it seems that it is unreliable.
5411 Sometimes it will have the value 0 when in fact arguments are being
5412 passed. (See testcase execute/20021111-1.c for an example). So we also
5413 check the args_info.nregs field as well. The problem with this field is
5414 that it makes no allowances for arguments that are passed to the
5415 function but which are not used. Hence we could miss an opportunity
5416 when a function has an unused argument in r3. But it is better to be
5417 safe than to be sorry. */
5418 if (! cfun->machine->uses_anonymous_args
5419 && crtl->args.size >= 0
5420 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5421 && crtl->args.info.nregs < 4)
5422 return LAST_ARG_REGNUM;
5424 /* Otherwise look for a call-saved register that is going to be pushed. */
5425 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5426 if (pushed_regs_mask & (1 << reg))
5431 /* Thumb-2 can use high regs. */
5432 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5433 if (pushed_regs_mask & (1 << reg))
5436 /* Something went wrong - thumb_compute_save_reg_mask()
5437 should have arranged for a suitable register to be pushed. */
5441 static GTY(()) int pic_labelno;
5443 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5447 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5449 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5451 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5454 gcc_assert (flag_pic);
5456 pic_reg = cfun->machine->pic_reg;
5457 if (TARGET_VXWORKS_RTP)
5459 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5460 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5461 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5463 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5465 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5466 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5470 /* We use an UNSPEC rather than a LABEL_REF because this label
5471 never appears in the code stream. */
5473 labelno = GEN_INT (pic_labelno++);
5474 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5475 l1 = gen_rtx_CONST (VOIDmode, l1);
5477 /* On the ARM the PC register contains 'dot + 8' at the time of the
5478 addition, on the Thumb it is 'dot + 4'. */
5479 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5480 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5482 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5486 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5488 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5490 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5492 else /* TARGET_THUMB1 */
5494 if (arm_pic_register != INVALID_REGNUM
5495 && REGNO (pic_reg) > LAST_LO_REGNUM)
5497 /* We will have pushed the pic register, so we should always be
5498 able to find a work register. */
5499 pic_tmp = gen_rtx_REG (SImode,
5500 thumb_find_work_register (saved_regs));
5501 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5502 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5505 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5506 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5510 /* Need to emit this whether or not we obey regdecls,
5511 since setjmp/longjmp can cause life info to screw up. */
5515 /* Generate code to load the address of a static var when flag_pic is set. */
5517 arm_pic_static_addr (rtx orig, rtx reg)
5519 rtx l1, labelno, offset_rtx, insn;
5521 gcc_assert (flag_pic);
5523 /* We use an UNSPEC rather than a LABEL_REF because this label
5524 never appears in the code stream. */
5525 labelno = GEN_INT (pic_labelno++);
5526 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5527 l1 = gen_rtx_CONST (VOIDmode, l1);
5529 /* On the ARM the PC register contains 'dot + 8' at the time of the
5530 addition, on the Thumb it is 'dot + 4'. */
5531 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5532 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5533 UNSPEC_SYMBOL_OFFSET);
5534 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5538 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5540 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5542 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5544 else /* TARGET_THUMB1 */
5546 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5547 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5553 /* Return nonzero if X is valid as an ARM state addressing register. */
5555 arm_address_register_rtx_p (rtx x, int strict_p)
5559 if (GET_CODE (x) != REG)
5565 return ARM_REGNO_OK_FOR_BASE_P (regno);
5567 return (regno <= LAST_ARM_REGNUM
5568 || regno >= FIRST_PSEUDO_REGISTER
5569 || regno == FRAME_POINTER_REGNUM
5570 || regno == ARG_POINTER_REGNUM);
5573 /* Return TRUE if this rtx is the difference of a symbol and a label,
5574 and will reduce to a PC-relative relocation in the object file.
5575 Expressions like this can be left alone when generating PIC, rather
5576 than forced through the GOT. */
5578 pcrel_constant_p (rtx x)
5580 if (GET_CODE (x) == MINUS)
5581 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5586 /* Return true if X will surely end up in an index register after next
5589 will_be_in_index_register (const_rtx x)
5591 /* arm.md: calculate_pic_address will split this into a register. */
5592 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5595 /* Return nonzero if X is a valid ARM state address operand. */
5597 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5601 enum rtx_code code = GET_CODE (x);
5603 if (arm_address_register_rtx_p (x, strict_p))
5606 use_ldrd = (TARGET_LDRD
5608 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5610 if (code == POST_INC || code == PRE_DEC
5611 || ((code == PRE_INC || code == POST_DEC)
5612 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5613 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5615 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5616 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5617 && GET_CODE (XEXP (x, 1)) == PLUS
5618 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5620 rtx addend = XEXP (XEXP (x, 1), 1);
5622 /* Don't allow ldrd post increment by register because it's hard
5623 to fixup invalid register choices. */
5625 && GET_CODE (x) == POST_MODIFY
5626 && GET_CODE (addend) == REG)
5629 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5630 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5633 /* After reload constants split into minipools will have addresses
5634 from a LABEL_REF. */
5635 else if (reload_completed
5636 && (code == LABEL_REF
5638 && GET_CODE (XEXP (x, 0)) == PLUS
5639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5640 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5643 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5646 else if (code == PLUS)
5648 rtx xop0 = XEXP (x, 0);
5649 rtx xop1 = XEXP (x, 1);
5651 return ((arm_address_register_rtx_p (xop0, strict_p)
5652 && ((GET_CODE(xop1) == CONST_INT
5653 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5654 || (!strict_p && will_be_in_index_register (xop1))))
5655 || (arm_address_register_rtx_p (xop1, strict_p)
5656 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5660 /* Reload currently can't handle MINUS, so disable this for now */
5661 else if (GET_CODE (x) == MINUS)
5663 rtx xop0 = XEXP (x, 0);
5664 rtx xop1 = XEXP (x, 1);
5666 return (arm_address_register_rtx_p (xop0, strict_p)
5667 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5671 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5672 && code == SYMBOL_REF
5673 && CONSTANT_POOL_ADDRESS_P (x)
5675 && symbol_mentioned_p (get_pool_constant (x))
5676 && ! pcrel_constant_p (get_pool_constant (x))))
5682 /* Return nonzero if X is a valid Thumb-2 address operand. */
5684 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5687 enum rtx_code code = GET_CODE (x);
5689 if (arm_address_register_rtx_p (x, strict_p))
5692 use_ldrd = (TARGET_LDRD
5694 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5696 if (code == POST_INC || code == PRE_DEC
5697 || ((code == PRE_INC || code == POST_DEC)
5698 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5699 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5701 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5702 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5703 && GET_CODE (XEXP (x, 1)) == PLUS
5704 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5706 /* Thumb-2 only has autoincrement by constant. */
5707 rtx addend = XEXP (XEXP (x, 1), 1);
5708 HOST_WIDE_INT offset;
5710 if (GET_CODE (addend) != CONST_INT)
5713 offset = INTVAL(addend);
5714 if (GET_MODE_SIZE (mode) <= 4)
5715 return (offset > -256 && offset < 256);
5717 return (use_ldrd && offset > -1024 && offset < 1024
5718 && (offset & 3) == 0);
5721 /* After reload constants split into minipools will have addresses
5722 from a LABEL_REF. */
5723 else if (reload_completed
5724 && (code == LABEL_REF
5726 && GET_CODE (XEXP (x, 0)) == PLUS
5727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5728 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5731 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5734 else if (code == PLUS)
5736 rtx xop0 = XEXP (x, 0);
5737 rtx xop1 = XEXP (x, 1);
5739 return ((arm_address_register_rtx_p (xop0, strict_p)
5740 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5741 || (!strict_p && will_be_in_index_register (xop1))))
5742 || (arm_address_register_rtx_p (xop1, strict_p)
5743 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5746 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5747 && code == SYMBOL_REF
5748 && CONSTANT_POOL_ADDRESS_P (x)
5750 && symbol_mentioned_p (get_pool_constant (x))
5751 && ! pcrel_constant_p (get_pool_constant (x))))
5757 /* Return nonzero if INDEX is valid for an address index operand in
5760 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5763 HOST_WIDE_INT range;
5764 enum rtx_code code = GET_CODE (index);
5766 /* Standard coprocessor addressing modes. */
5767 if (TARGET_HARD_FLOAT
5768 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5769 && (mode == SFmode || mode == DFmode
5770 || (TARGET_MAVERICK && mode == DImode)))
5771 return (code == CONST_INT && INTVAL (index) < 1024
5772 && INTVAL (index) > -1024
5773 && (INTVAL (index) & 3) == 0);
5775 /* For quad modes, we restrict the constant offset to be slightly less
5776 than what the instruction format permits. We do this because for
5777 quad mode moves, we will actually decompose them into two separate
5778 double-mode reads or writes. INDEX must therefore be a valid
5779 (double-mode) offset and so should INDEX+8. */
5780 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5781 return (code == CONST_INT
5782 && INTVAL (index) < 1016
5783 && INTVAL (index) > -1024
5784 && (INTVAL (index) & 3) == 0);
5786 /* We have no such constraint on double mode offsets, so we permit the
5787 full range of the instruction format. */
5788 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5789 return (code == CONST_INT
5790 && INTVAL (index) < 1024
5791 && INTVAL (index) > -1024
5792 && (INTVAL (index) & 3) == 0);
5794 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5795 return (code == CONST_INT
5796 && INTVAL (index) < 1024
5797 && INTVAL (index) > -1024
5798 && (INTVAL (index) & 3) == 0);
5800 if (arm_address_register_rtx_p (index, strict_p)
5801 && (GET_MODE_SIZE (mode) <= 4))
5804 if (mode == DImode || mode == DFmode)
5806 if (code == CONST_INT)
5808 HOST_WIDE_INT val = INTVAL (index);
5811 return val > -256 && val < 256;
5813 return val > -4096 && val < 4092;
5816 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5819 if (GET_MODE_SIZE (mode) <= 4
5823 || (mode == QImode && outer == SIGN_EXTEND))))
5827 rtx xiop0 = XEXP (index, 0);
5828 rtx xiop1 = XEXP (index, 1);
5830 return ((arm_address_register_rtx_p (xiop0, strict_p)
5831 && power_of_two_operand (xiop1, SImode))
5832 || (arm_address_register_rtx_p (xiop1, strict_p)
5833 && power_of_two_operand (xiop0, SImode)));
5835 else if (code == LSHIFTRT || code == ASHIFTRT
5836 || code == ASHIFT || code == ROTATERT)
5838 rtx op = XEXP (index, 1);
5840 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5841 && GET_CODE (op) == CONST_INT
5843 && INTVAL (op) <= 31);
5847 /* For ARM v4 we may be doing a sign-extend operation during the
5853 || (outer == SIGN_EXTEND && mode == QImode))
5859 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5861 return (code == CONST_INT
5862 && INTVAL (index) < range
5863 && INTVAL (index) > -range);
5866 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5867 index operand. i.e. 1, 2, 4 or 8. */
5869 thumb2_index_mul_operand (rtx op)
5873 if (GET_CODE(op) != CONST_INT)
5877 return (val == 1 || val == 2 || val == 4 || val == 8);
5880 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5882 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5884 enum rtx_code code = GET_CODE (index);
5886 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5887 /* Standard coprocessor addressing modes. */
5888 if (TARGET_HARD_FLOAT
5889 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5890 && (mode == SFmode || mode == DFmode
5891 || (TARGET_MAVERICK && mode == DImode)))
5892 return (code == CONST_INT && INTVAL (index) < 1024
5893 /* Thumb-2 allows only > -256 index range for it's core register
5894 load/stores. Since we allow SF/DF in core registers, we have
5895 to use the intersection between -256~4096 (core) and -1024~1024
5897 && INTVAL (index) > -256
5898 && (INTVAL (index) & 3) == 0);
5900 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5902 /* For DImode assume values will usually live in core regs
5903 and only allow LDRD addressing modes. */
5904 if (!TARGET_LDRD || mode != DImode)
5905 return (code == CONST_INT
5906 && INTVAL (index) < 1024
5907 && INTVAL (index) > -1024
5908 && (INTVAL (index) & 3) == 0);
5911 /* For quad modes, we restrict the constant offset to be slightly less
5912 than what the instruction format permits. We do this because for
5913 quad mode moves, we will actually decompose them into two separate
5914 double-mode reads or writes. INDEX must therefore be a valid
5915 (double-mode) offset and so should INDEX+8. */
5916 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5917 return (code == CONST_INT
5918 && INTVAL (index) < 1016
5919 && INTVAL (index) > -1024
5920 && (INTVAL (index) & 3) == 0);
5922 /* We have no such constraint on double mode offsets, so we permit the
5923 full range of the instruction format. */
5924 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5925 return (code == CONST_INT
5926 && INTVAL (index) < 1024
5927 && INTVAL (index) > -1024
5928 && (INTVAL (index) & 3) == 0);
5930 if (arm_address_register_rtx_p (index, strict_p)
5931 && (GET_MODE_SIZE (mode) <= 4))
5934 if (mode == DImode || mode == DFmode)
5936 if (code == CONST_INT)
5938 HOST_WIDE_INT val = INTVAL (index);
5939 /* ??? Can we assume ldrd for thumb2? */
5940 /* Thumb-2 ldrd only has reg+const addressing modes. */
5941 /* ldrd supports offsets of +-1020.
5942 However the ldr fallback does not. */
5943 return val > -256 && val < 256 && (val & 3) == 0;
5951 rtx xiop0 = XEXP (index, 0);
5952 rtx xiop1 = XEXP (index, 1);
5954 return ((arm_address_register_rtx_p (xiop0, strict_p)
5955 && thumb2_index_mul_operand (xiop1))
5956 || (arm_address_register_rtx_p (xiop1, strict_p)
5957 && thumb2_index_mul_operand (xiop0)));
5959 else if (code == ASHIFT)
5961 rtx op = XEXP (index, 1);
5963 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5964 && GET_CODE (op) == CONST_INT
5966 && INTVAL (op) <= 3);
5969 return (code == CONST_INT
5970 && INTVAL (index) < 4096
5971 && INTVAL (index) > -256);
5974 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5976 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5980 if (GET_CODE (x) != REG)
5986 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5988 return (regno <= LAST_LO_REGNUM
5989 || regno > LAST_VIRTUAL_REGISTER
5990 || regno == FRAME_POINTER_REGNUM
5991 || (GET_MODE_SIZE (mode) >= 4
5992 && (regno == STACK_POINTER_REGNUM
5993 || regno >= FIRST_PSEUDO_REGISTER
5994 || x == hard_frame_pointer_rtx
5995 || x == arg_pointer_rtx)));
5998 /* Return nonzero if x is a legitimate index register. This is the case
5999 for any base register that can access a QImode object. */
6001 thumb1_index_register_rtx_p (rtx x, int strict_p)
6003 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6006 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6008 The AP may be eliminated to either the SP or the FP, so we use the
6009 least common denominator, e.g. SImode, and offsets from 0 to 64.
6011 ??? Verify whether the above is the right approach.
6013 ??? Also, the FP may be eliminated to the SP, so perhaps that
6014 needs special handling also.
6016 ??? Look at how the mips16 port solves this problem. It probably uses
6017 better ways to solve some of these problems.
6019 Although it is not incorrect, we don't accept QImode and HImode
6020 addresses based on the frame pointer or arg pointer until the
6021 reload pass starts. This is so that eliminating such addresses
6022 into stack based ones won't produce impossible code. */
6024 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6026 /* ??? Not clear if this is right. Experiment. */
6027 if (GET_MODE_SIZE (mode) < 4
6028 && !(reload_in_progress || reload_completed)
6029 && (reg_mentioned_p (frame_pointer_rtx, x)
6030 || reg_mentioned_p (arg_pointer_rtx, x)
6031 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6032 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6033 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6034 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6037 /* Accept any base register. SP only in SImode or larger. */
6038 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6041 /* This is PC relative data before arm_reorg runs. */
6042 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6043 && GET_CODE (x) == SYMBOL_REF
6044 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6047 /* This is PC relative data after arm_reorg runs. */
6048 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6050 && (GET_CODE (x) == LABEL_REF
6051 || (GET_CODE (x) == CONST
6052 && GET_CODE (XEXP (x, 0)) == PLUS
6053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6054 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6057 /* Post-inc indexing only supported for SImode and larger. */
6058 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6059 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6062 else if (GET_CODE (x) == PLUS)
6064 /* REG+REG address can be any two index registers. */
6065 /* We disallow FRAME+REG addressing since we know that FRAME
6066 will be replaced with STACK, and SP relative addressing only
6067 permits SP+OFFSET. */
6068 if (GET_MODE_SIZE (mode) <= 4
6069 && XEXP (x, 0) != frame_pointer_rtx
6070 && XEXP (x, 1) != frame_pointer_rtx
6071 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6072 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6073 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6076 /* REG+const has 5-7 bit offset for non-SP registers. */
6077 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6078 || XEXP (x, 0) == arg_pointer_rtx)
6079 && GET_CODE (XEXP (x, 1)) == CONST_INT
6080 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6083 /* REG+const has 10-bit offset for SP, but only SImode and
6084 larger is supported. */
6085 /* ??? Should probably check for DI/DFmode overflow here
6086 just like GO_IF_LEGITIMATE_OFFSET does. */
6087 else if (GET_CODE (XEXP (x, 0)) == REG
6088 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6089 && GET_MODE_SIZE (mode) >= 4
6090 && GET_CODE (XEXP (x, 1)) == CONST_INT
6091 && INTVAL (XEXP (x, 1)) >= 0
6092 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6093 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6096 else if (GET_CODE (XEXP (x, 0)) == REG
6097 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6098 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6099 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6100 && REGNO (XEXP (x, 0))
6101 <= LAST_VIRTUAL_POINTER_REGISTER))
6102 && GET_MODE_SIZE (mode) >= 4
6103 && GET_CODE (XEXP (x, 1)) == CONST_INT
6104 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6108 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6109 && GET_MODE_SIZE (mode) == 4
6110 && GET_CODE (x) == SYMBOL_REF
6111 && CONSTANT_POOL_ADDRESS_P (x)
6113 && symbol_mentioned_p (get_pool_constant (x))
6114 && ! pcrel_constant_p (get_pool_constant (x))))
6120 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6121 instruction of mode MODE. */
6123 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6125 switch (GET_MODE_SIZE (mode))
6128 return val >= 0 && val < 32;
6131 return val >= 0 && val < 64 && (val & 1) == 0;
6135 && (val + GET_MODE_SIZE (mode)) <= 128
6141 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6144 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6145 else if (TARGET_THUMB2)
6146 return thumb2_legitimate_address_p (mode, x, strict_p);
6147 else /* if (TARGET_THUMB1) */
6148 return thumb1_legitimate_address_p (mode, x, strict_p);
6151 /* Build the SYMBOL_REF for __tls_get_addr. */
6153 static GTY(()) rtx tls_get_addr_libfunc;
6156 get_tls_get_addr (void)
6158 if (!tls_get_addr_libfunc)
6159 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6160 return tls_get_addr_libfunc;
6164 arm_load_tp (rtx target)
6167 target = gen_reg_rtx (SImode);
6171 /* Can return in any reg. */
6172 emit_insn (gen_load_tp_hard (target));
6176 /* Always returned in r0. Immediately copy the result into a pseudo,
6177 otherwise other uses of r0 (e.g. setting up function arguments) may
6178 clobber the value. */
6182 emit_insn (gen_load_tp_soft ());
6184 tmp = gen_rtx_REG (SImode, 0);
6185 emit_move_insn (target, tmp);
6191 load_tls_operand (rtx x, rtx reg)
6195 if (reg == NULL_RTX)
6196 reg = gen_reg_rtx (SImode);
6198 tmp = gen_rtx_CONST (SImode, x);
6200 emit_move_insn (reg, tmp);
6206 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6208 rtx insns, label, labelno, sum;
6210 gcc_assert (reloc != TLS_DESCSEQ);
6213 labelno = GEN_INT (pic_labelno++);
6214 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6215 label = gen_rtx_CONST (VOIDmode, label);
6217 sum = gen_rtx_UNSPEC (Pmode,
6218 gen_rtvec (4, x, GEN_INT (reloc), label,
6219 GEN_INT (TARGET_ARM ? 8 : 4)),
6221 reg = load_tls_operand (sum, reg);
6224 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6226 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6228 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6229 LCT_PURE, /* LCT_CONST? */
6230 Pmode, 1, reg, Pmode);
6232 insns = get_insns ();
6239 arm_tls_descseq_addr (rtx x, rtx reg)
6241 rtx labelno = GEN_INT (pic_labelno++);
6242 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6243 rtx sum = gen_rtx_UNSPEC (Pmode,
6244 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6245 gen_rtx_CONST (VOIDmode, label),
6246 GEN_INT (!TARGET_ARM)),
6248 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6250 emit_insn (gen_tlscall (x, labelno));
6252 reg = gen_reg_rtx (SImode);
6254 gcc_assert (REGNO (reg) != 0);
6256 emit_move_insn (reg, reg0);
6262 legitimize_tls_address (rtx x, rtx reg)
6264 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6265 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6269 case TLS_MODEL_GLOBAL_DYNAMIC:
6270 if (TARGET_GNU2_TLS)
6272 reg = arm_tls_descseq_addr (x, reg);
6274 tp = arm_load_tp (NULL_RTX);
6276 dest = gen_rtx_PLUS (Pmode, tp, reg);
6280 /* Original scheme */
6281 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6282 dest = gen_reg_rtx (Pmode);
6283 emit_libcall_block (insns, dest, ret, x);
6287 case TLS_MODEL_LOCAL_DYNAMIC:
6288 if (TARGET_GNU2_TLS)
6290 reg = arm_tls_descseq_addr (x, reg);
6292 tp = arm_load_tp (NULL_RTX);
6294 dest = gen_rtx_PLUS (Pmode, tp, reg);
6298 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6300 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6301 share the LDM result with other LD model accesses. */
6302 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6304 dest = gen_reg_rtx (Pmode);
6305 emit_libcall_block (insns, dest, ret, eqv);
6307 /* Load the addend. */
6308 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6309 GEN_INT (TLS_LDO32)),
6311 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6312 dest = gen_rtx_PLUS (Pmode, dest, addend);
6316 case TLS_MODEL_INITIAL_EXEC:
6317 labelno = GEN_INT (pic_labelno++);
6318 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6319 label = gen_rtx_CONST (VOIDmode, label);
6320 sum = gen_rtx_UNSPEC (Pmode,
6321 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6322 GEN_INT (TARGET_ARM ? 8 : 4)),
6324 reg = load_tls_operand (sum, reg);
6327 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6328 else if (TARGET_THUMB2)
6329 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6332 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6333 emit_move_insn (reg, gen_const_mem (SImode, reg));
6336 tp = arm_load_tp (NULL_RTX);
6338 return gen_rtx_PLUS (Pmode, tp, reg);
6340 case TLS_MODEL_LOCAL_EXEC:
6341 tp = arm_load_tp (NULL_RTX);
6343 reg = gen_rtx_UNSPEC (Pmode,
6344 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6346 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6348 return gen_rtx_PLUS (Pmode, tp, reg);
6355 /* Try machine-dependent ways of modifying an illegitimate address
6356 to be legitimate. If we find one, return the new, valid address. */
6358 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6362 /* TODO: legitimize_address for Thumb2. */
6365 return thumb_legitimize_address (x, orig_x, mode);
6368 if (arm_tls_symbol_p (x))
6369 return legitimize_tls_address (x, NULL_RTX);
6371 if (GET_CODE (x) == PLUS)
6373 rtx xop0 = XEXP (x, 0);
6374 rtx xop1 = XEXP (x, 1);
6376 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6377 xop0 = force_reg (SImode, xop0);
6379 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6380 xop1 = force_reg (SImode, xop1);
6382 if (ARM_BASE_REGISTER_RTX_P (xop0)
6383 && GET_CODE (xop1) == CONST_INT)
6385 HOST_WIDE_INT n, low_n;
6389 /* VFP addressing modes actually allow greater offsets, but for
6390 now we just stick with the lowest common denominator. */
6392 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6404 low_n = ((mode) == TImode ? 0
6405 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6409 base_reg = gen_reg_rtx (SImode);
6410 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6411 emit_move_insn (base_reg, val);
6412 x = plus_constant (base_reg, low_n);
6414 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6415 x = gen_rtx_PLUS (SImode, xop0, xop1);
6418 /* XXX We don't allow MINUS any more -- see comment in
6419 arm_legitimate_address_outer_p (). */
6420 else if (GET_CODE (x) == MINUS)
6422 rtx xop0 = XEXP (x, 0);
6423 rtx xop1 = XEXP (x, 1);
6425 if (CONSTANT_P (xop0))
6426 xop0 = force_reg (SImode, xop0);
6428 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6429 xop1 = force_reg (SImode, xop1);
6431 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6432 x = gen_rtx_MINUS (SImode, xop0, xop1);
6435 /* Make sure to take full advantage of the pre-indexed addressing mode
6436 with absolute addresses which often allows for the base register to
6437 be factorized for multiple adjacent memory references, and it might
6438 even allows for the mini pool to be avoided entirely. */
6439 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6442 HOST_WIDE_INT mask, base, index;
6445 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6446 use a 8-bit index. So let's use a 12-bit index for SImode only and
6447 hope that arm_gen_constant will enable ldrb to use more bits. */
6448 bits = (mode == SImode) ? 12 : 8;
6449 mask = (1 << bits) - 1;
6450 base = INTVAL (x) & ~mask;
6451 index = INTVAL (x) & mask;
6452 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6454 /* It'll most probably be more efficient to generate the base
6455 with more bits set and use a negative index instead. */
6459 base_reg = force_reg (SImode, GEN_INT (base));
6460 x = plus_constant (base_reg, index);
6465 /* We need to find and carefully transform any SYMBOL and LABEL
6466 references; so go back to the original address expression. */
6467 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6469 if (new_x != orig_x)
6477 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6478 to be legitimate. If we find one, return the new, valid address. */
6480 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6482 if (arm_tls_symbol_p (x))
6483 return legitimize_tls_address (x, NULL_RTX);
6485 if (GET_CODE (x) == PLUS
6486 && GET_CODE (XEXP (x, 1)) == CONST_INT
6487 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6488 || INTVAL (XEXP (x, 1)) < 0))
6490 rtx xop0 = XEXP (x, 0);
6491 rtx xop1 = XEXP (x, 1);
6492 HOST_WIDE_INT offset = INTVAL (xop1);
6494 /* Try and fold the offset into a biasing of the base register and
6495 then offsetting that. Don't do this when optimizing for space
6496 since it can cause too many CSEs. */
6497 if (optimize_size && offset >= 0
6498 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6500 HOST_WIDE_INT delta;
6503 delta = offset - (256 - GET_MODE_SIZE (mode));
6504 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6505 delta = 31 * GET_MODE_SIZE (mode);
6507 delta = offset & (~31 * GET_MODE_SIZE (mode));
6509 xop0 = force_operand (plus_constant (xop0, offset - delta),
6511 x = plus_constant (xop0, delta);
6513 else if (offset < 0 && offset > -256)
6514 /* Small negative offsets are best done with a subtract before the
6515 dereference, forcing these into a register normally takes two
6517 x = force_operand (x, NULL_RTX);
6520 /* For the remaining cases, force the constant into a register. */
6521 xop1 = force_reg (SImode, xop1);
6522 x = gen_rtx_PLUS (SImode, xop0, xop1);
6525 else if (GET_CODE (x) == PLUS
6526 && s_register_operand (XEXP (x, 1), SImode)
6527 && !s_register_operand (XEXP (x, 0), SImode))
6529 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6531 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6536 /* We need to find and carefully transform any SYMBOL and LABEL
6537 references; so go back to the original address expression. */
6538 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6540 if (new_x != orig_x)
6548 arm_legitimize_reload_address (rtx *p,
6549 enum machine_mode mode,
6550 int opnum, int type,
6551 int ind_levels ATTRIBUTE_UNUSED)
6553 if (GET_CODE (*p) == PLUS
6554 && GET_CODE (XEXP (*p, 0)) == REG
6555 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6556 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6558 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6559 HOST_WIDE_INT low, high;
6561 /* Detect coprocessor load/stores. */
6562 bool coproc_p = ((TARGET_HARD_FLOAT
6563 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6564 && (mode == SFmode || mode == DFmode
6565 || (mode == DImode && TARGET_MAVERICK)))
6566 || (TARGET_REALLY_IWMMXT
6567 && VALID_IWMMXT_REG_MODE (mode))
6569 && (VALID_NEON_DREG_MODE (mode)
6570 || VALID_NEON_QREG_MODE (mode))));
6572 /* For some conditions, bail out when lower two bits are unaligned. */
6573 if ((val & 0x3) != 0
6574 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6576 /* For DI, and DF under soft-float: */
6577 || ((mode == DImode || mode == DFmode)
6578 /* Without ldrd, we use stm/ldm, which does not
6579 fair well with unaligned bits. */
6581 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6582 || TARGET_THUMB2))))
6585 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6586 of which the (reg+high) gets turned into a reload add insn,
6587 we try to decompose the index into high/low values that can often
6588 also lead to better reload CSE.
6590 ldr r0, [r2, #4100] // Offset too large
6591 ldr r1, [r2, #4104] // Offset too large
6593 is best reloaded as:
6599 which post-reload CSE can simplify in most cases to eliminate the
6600 second add instruction:
6605 The idea here is that we want to split out the bits of the constant
6606 as a mask, rather than as subtracting the maximum offset that the
6607 respective type of load/store used can handle.
6609 When encountering negative offsets, we can still utilize it even if
6610 the overall offset is positive; sometimes this may lead to an immediate
6611 that can be constructed with fewer instructions.
6613 ldr r0, [r2, #0x3FFFFC]
6615 This is best reloaded as:
6616 add t1, r2, #0x400000
6619 The trick for spotting this for a load insn with N bits of offset
6620 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6621 negative offset that is going to make bit N and all the bits below
6622 it become zero in the remainder part.
6624 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6625 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6626 used in most cases of ARM load/store instructions. */
6628 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6629 (((VAL) & ((1 << (N)) - 1)) \
6630 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6635 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6637 /* NEON quad-word load/stores are made of two double-word accesses,
6638 so the valid index range is reduced by 8. Treat as 9-bit range if
6640 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6641 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6643 else if (GET_MODE_SIZE (mode) == 8)
6646 low = (TARGET_THUMB2
6647 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6648 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6650 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6651 to access doublewords. The supported load/store offsets are
6652 -8, -4, and 4, which we try to produce here. */
6653 low = ((val & 0xf) ^ 0x8) - 0x8;
6655 else if (GET_MODE_SIZE (mode) < 8)
6657 /* NEON element load/stores do not have an offset. */
6658 if (TARGET_NEON_FP16 && mode == HFmode)
6663 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6664 Try the wider 12-bit range first, and re-try if the result
6666 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6668 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6672 if (mode == HImode || mode == HFmode)
6675 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6678 /* The storehi/movhi_bytes fallbacks can use only
6679 [-4094,+4094] of the full ldrb/strb index range. */
6680 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6681 if (low == 4095 || low == -4095)
6686 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6692 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6693 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6694 - (unsigned HOST_WIDE_INT) 0x80000000);
6695 /* Check for overflow or zero */
6696 if (low == 0 || high == 0 || (high + low != val))
6699 /* Reload the high part into a base reg; leave the low part
6701 *p = gen_rtx_PLUS (GET_MODE (*p),
6702 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6705 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6706 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6707 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6715 thumb_legitimize_reload_address (rtx *x_p,
6716 enum machine_mode mode,
6717 int opnum, int type,
6718 int ind_levels ATTRIBUTE_UNUSED)
6722 if (GET_CODE (x) == PLUS
6723 && GET_MODE_SIZE (mode) < 4
6724 && REG_P (XEXP (x, 0))
6725 && XEXP (x, 0) == stack_pointer_rtx
6726 && GET_CODE (XEXP (x, 1)) == CONST_INT
6727 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6732 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6733 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6737 /* If both registers are hi-regs, then it's better to reload the
6738 entire expression rather than each register individually. That
6739 only requires one reload register rather than two. */
6740 if (GET_CODE (x) == PLUS
6741 && REG_P (XEXP (x, 0))
6742 && REG_P (XEXP (x, 1))
6743 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6744 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6749 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6750 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6757 /* Test for various thread-local symbols. */
6759 /* Return TRUE if X is a thread-local symbol. */
6762 arm_tls_symbol_p (rtx x)
6764 if (! TARGET_HAVE_TLS)
6767 if (GET_CODE (x) != SYMBOL_REF)
6770 return SYMBOL_REF_TLS_MODEL (x) != 0;
6773 /* Helper for arm_tls_referenced_p. */
6776 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6778 if (GET_CODE (*x) == SYMBOL_REF)
6779 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6781 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6782 TLS offsets, not real symbol references. */
6783 if (GET_CODE (*x) == UNSPEC
6784 && XINT (*x, 1) == UNSPEC_TLS)
6790 /* Return TRUE if X contains any TLS symbol references. */
6793 arm_tls_referenced_p (rtx x)
6795 if (! TARGET_HAVE_TLS)
6798 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6801 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6803 On the ARM, allow any integer (invalid ones are removed later by insn
6804 patterns), nice doubles and symbol_refs which refer to the function's
6807 When generating pic allow anything. */
6810 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6812 /* At present, we have no support for Neon structure constants, so forbid
6813 them here. It might be possible to handle simple cases like 0 and -1
6815 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6818 return flag_pic || !label_mentioned_p (x);
6822 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6824 return (GET_CODE (x) == CONST_INT
6825 || GET_CODE (x) == CONST_DOUBLE
6826 || CONSTANT_ADDRESS_P (x)
6831 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6833 return (!arm_cannot_force_const_mem (mode, x)
6835 ? arm_legitimate_constant_p_1 (mode, x)
6836 : thumb_legitimate_constant_p (mode, x)));
6839 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6842 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6846 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6848 split_const (x, &base, &offset);
6849 if (GET_CODE (base) == SYMBOL_REF
6850 && !offset_within_block_p (base, INTVAL (offset)))
6853 return arm_tls_referenced_p (x);
6856 #define REG_OR_SUBREG_REG(X) \
6857 (GET_CODE (X) == REG \
6858 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6860 #define REG_OR_SUBREG_RTX(X) \
6861 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6864 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6866 enum machine_mode mode = GET_MODE (x);
6880 return COSTS_N_INSNS (1);
6883 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6886 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6893 return COSTS_N_INSNS (2) + cycles;
6895 return COSTS_N_INSNS (1) + 16;
6898 return (COSTS_N_INSNS (1)
6899 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6900 + GET_CODE (SET_DEST (x)) == MEM));
6905 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6907 if (thumb_shiftable_const (INTVAL (x)))
6908 return COSTS_N_INSNS (2);
6909 return COSTS_N_INSNS (3);
6911 else if ((outer == PLUS || outer == COMPARE)
6912 && INTVAL (x) < 256 && INTVAL (x) > -256)
6914 else if ((outer == IOR || outer == XOR || outer == AND)
6915 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6916 return COSTS_N_INSNS (1);
6917 else if (outer == AND)
6920 /* This duplicates the tests in the andsi3 expander. */
6921 for (i = 9; i <= 31; i++)
6922 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6923 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6924 return COSTS_N_INSNS (2);
6926 else if (outer == ASHIFT || outer == ASHIFTRT
6927 || outer == LSHIFTRT)
6929 return COSTS_N_INSNS (2);
6935 return COSTS_N_INSNS (3);
6953 /* XXX another guess. */
6954 /* Memory costs quite a lot for the first word, but subsequent words
6955 load at the equivalent of a single insn each. */
6956 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6957 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6962 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6968 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6969 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6975 return total + COSTS_N_INSNS (1);
6977 /* Assume a two-shift sequence. Increase the cost slightly so
6978 we prefer actual shifts over an extend operation. */
6979 return total + 1 + COSTS_N_INSNS (2);
6987 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6989 enum machine_mode mode = GET_MODE (x);
6990 enum rtx_code subcode;
6992 enum rtx_code code = GET_CODE (x);
6998 /* Memory costs quite a lot for the first word, but subsequent words
6999 load at the equivalent of a single insn each. */
7000 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7007 if (TARGET_HARD_FLOAT && mode == SFmode)
7008 *total = COSTS_N_INSNS (2);
7009 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7010 *total = COSTS_N_INSNS (4);
7012 *total = COSTS_N_INSNS (20);
7016 if (GET_CODE (XEXP (x, 1)) == REG)
7017 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7018 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7019 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7025 *total += COSTS_N_INSNS (4);
7030 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7031 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7034 *total += COSTS_N_INSNS (3);
7038 *total += COSTS_N_INSNS (1);
7039 /* Increase the cost of complex shifts because they aren't any faster,
7040 and reduce dual issue opportunities. */
7041 if (arm_tune_cortex_a9
7042 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7050 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7051 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7052 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7054 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7058 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7059 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7061 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7068 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7070 if (TARGET_HARD_FLOAT
7072 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7074 *total = COSTS_N_INSNS (1);
7075 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7076 && arm_const_double_rtx (XEXP (x, 0)))
7078 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7082 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7083 && arm_const_double_rtx (XEXP (x, 1)))
7085 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7091 *total = COSTS_N_INSNS (20);
7095 *total = COSTS_N_INSNS (1);
7096 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7097 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7099 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7103 subcode = GET_CODE (XEXP (x, 1));
7104 if (subcode == ASHIFT || subcode == ASHIFTRT
7105 || subcode == LSHIFTRT
7106 || subcode == ROTATE || subcode == ROTATERT)
7108 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7109 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7113 /* A shift as a part of RSB costs no more than RSB itself. */
7114 if (GET_CODE (XEXP (x, 0)) == MULT
7115 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7117 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7118 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7123 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7125 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7126 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7130 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7131 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7133 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7134 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7135 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7136 *total += COSTS_N_INSNS (1);
7144 if (code == PLUS && arm_arch6 && mode == SImode
7145 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7146 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7148 *total = COSTS_N_INSNS (1);
7149 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7151 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7155 /* MLA: All arguments must be registers. We filter out
7156 multiplication by a power of two, so that we fall down into
7158 if (GET_CODE (XEXP (x, 0)) == MULT
7159 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7161 /* The cost comes from the cost of the multiply. */
7165 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7167 if (TARGET_HARD_FLOAT
7169 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7171 *total = COSTS_N_INSNS (1);
7172 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7173 && arm_const_double_rtx (XEXP (x, 1)))
7175 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7182 *total = COSTS_N_INSNS (20);
7186 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7187 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7189 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7190 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7191 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7192 *total += COSTS_N_INSNS (1);
7198 case AND: case XOR: case IOR:
7200 /* Normally the frame registers will be spilt into reg+const during
7201 reload, so it is a bad idea to combine them with other instructions,
7202 since then they might not be moved outside of loops. As a compromise
7203 we allow integration with ops that have a constant as their second
7205 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7206 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7207 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7208 *total = COSTS_N_INSNS (1);
7212 *total += COSTS_N_INSNS (2);
7213 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7214 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7223 *total += COSTS_N_INSNS (1);
7224 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7225 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7227 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7230 subcode = GET_CODE (XEXP (x, 0));
7231 if (subcode == ASHIFT || subcode == ASHIFTRT
7232 || subcode == LSHIFTRT
7233 || subcode == ROTATE || subcode == ROTATERT)
7235 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7236 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7241 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7243 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7244 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7248 if (subcode == UMIN || subcode == UMAX
7249 || subcode == SMIN || subcode == SMAX)
7251 *total = COSTS_N_INSNS (3);
7258 /* This should have been handled by the CPU specific routines. */
7262 if (arm_arch3m && mode == SImode
7263 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7264 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7265 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7266 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7267 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7268 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7270 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7273 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7277 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7279 if (TARGET_HARD_FLOAT
7281 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7283 *total = COSTS_N_INSNS (1);
7286 *total = COSTS_N_INSNS (2);
7292 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7293 if (mode == SImode && code == NOT)
7295 subcode = GET_CODE (XEXP (x, 0));
7296 if (subcode == ASHIFT || subcode == ASHIFTRT
7297 || subcode == LSHIFTRT
7298 || subcode == ROTATE || subcode == ROTATERT
7300 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7302 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7303 /* Register shifts cost an extra cycle. */
7304 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7305 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7314 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7316 *total = COSTS_N_INSNS (4);
7320 operand = XEXP (x, 0);
7322 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7323 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7324 && GET_CODE (XEXP (operand, 0)) == REG
7325 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7326 *total += COSTS_N_INSNS (1);
7327 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7328 + rtx_cost (XEXP (x, 2), code, 2, speed));
7332 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7334 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7340 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7341 && mode == SImode && XEXP (x, 1) == const0_rtx)
7343 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7349 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7350 && mode == SImode && XEXP (x, 1) == const0_rtx)
7352 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7372 /* SCC insns. In the case where the comparison has already been
7373 performed, then they cost 2 instructions. Otherwise they need
7374 an additional comparison before them. */
7375 *total = COSTS_N_INSNS (2);
7376 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7383 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7389 *total += COSTS_N_INSNS (1);
7390 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7391 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7393 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7397 subcode = GET_CODE (XEXP (x, 0));
7398 if (subcode == ASHIFT || subcode == ASHIFTRT
7399 || subcode == LSHIFTRT
7400 || subcode == ROTATE || subcode == ROTATERT)
7402 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7408 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7410 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7421 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7422 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7423 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7424 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7428 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7430 if (TARGET_HARD_FLOAT
7432 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7434 *total = COSTS_N_INSNS (1);
7437 *total = COSTS_N_INSNS (20);
7440 *total = COSTS_N_INSNS (1);
7442 *total += COSTS_N_INSNS (3);
7448 if (GET_MODE_CLASS (mode) == MODE_INT)
7450 rtx op = XEXP (x, 0);
7451 enum machine_mode opmode = GET_MODE (op);
7454 *total += COSTS_N_INSNS (1);
7456 if (opmode != SImode)
7460 /* If !arm_arch4, we use one of the extendhisi2_mem
7461 or movhi_bytes patterns for HImode. For a QImode
7462 sign extension, we first zero-extend from memory
7463 and then perform a shift sequence. */
7464 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7465 *total += COSTS_N_INSNS (2);
7468 *total += COSTS_N_INSNS (1);
7470 /* We don't have the necessary insn, so we need to perform some
7472 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7473 /* An and with constant 255. */
7474 *total += COSTS_N_INSNS (1);
7476 /* A shift sequence. Increase costs slightly to avoid
7477 combining two shifts into an extend operation. */
7478 *total += COSTS_N_INSNS (2) + 1;
7484 switch (GET_MODE (XEXP (x, 0)))
7491 *total = COSTS_N_INSNS (1);
7501 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7505 if (const_ok_for_arm (INTVAL (x))
7506 || const_ok_for_arm (~INTVAL (x)))
7507 *total = COSTS_N_INSNS (1);
7509 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7510 INTVAL (x), NULL_RTX,
7517 *total = COSTS_N_INSNS (3);
7521 *total = COSTS_N_INSNS (1);
7525 *total = COSTS_N_INSNS (1);
7526 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7530 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7531 && (mode == SFmode || !TARGET_VFP_SINGLE))
7532 *total = COSTS_N_INSNS (1);
7534 *total = COSTS_N_INSNS (4);
7541 *total = COSTS_N_INSNS (4);
7546 /* Estimates the size cost of thumb1 instructions.
7547 For now most of the code is copied from thumb1_rtx_costs. We need more
7548 fine grain tuning when we have more related test cases. */
7550 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7552 enum machine_mode mode = GET_MODE (x);
7565 return COSTS_N_INSNS (1);
7568 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7570 /* Thumb1 mul instruction can't operate on const. We must Load it
7571 into a register first. */
7572 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7573 return COSTS_N_INSNS (1) + const_size;
7575 return COSTS_N_INSNS (1);
7578 return (COSTS_N_INSNS (1)
7579 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7580 + GET_CODE (SET_DEST (x)) == MEM));
7585 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7586 return COSTS_N_INSNS (1);
7587 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7588 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7589 return COSTS_N_INSNS (2);
7590 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7591 if (thumb_shiftable_const (INTVAL (x)))
7592 return COSTS_N_INSNS (2);
7593 return COSTS_N_INSNS (3);
7595 else if ((outer == PLUS || outer == COMPARE)
7596 && INTVAL (x) < 256 && INTVAL (x) > -256)
7598 else if ((outer == IOR || outer == XOR || outer == AND)
7599 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7600 return COSTS_N_INSNS (1);
7601 else if (outer == AND)
7604 /* This duplicates the tests in the andsi3 expander. */
7605 for (i = 9; i <= 31; i++)
7606 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7607 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7608 return COSTS_N_INSNS (2);
7610 else if (outer == ASHIFT || outer == ASHIFTRT
7611 || outer == LSHIFTRT)
7613 return COSTS_N_INSNS (2);
7619 return COSTS_N_INSNS (3);
7637 /* XXX another guess. */
7638 /* Memory costs quite a lot for the first word, but subsequent words
7639 load at the equivalent of a single insn each. */
7640 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7641 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7646 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7651 /* XXX still guessing. */
7652 switch (GET_MODE (XEXP (x, 0)))
7655 return (1 + (mode == DImode ? 4 : 0)
7656 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7659 return (4 + (mode == DImode ? 4 : 0)
7660 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7663 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7674 /* RTX costs when optimizing for size. */
7676 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7679 enum machine_mode mode = GET_MODE (x);
7682 *total = thumb1_size_rtx_costs (x, code, outer_code);
7686 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7690 /* A memory access costs 1 insn if the mode is small, or the address is
7691 a single register, otherwise it costs one insn per word. */
7692 if (REG_P (XEXP (x, 0)))
7693 *total = COSTS_N_INSNS (1);
7695 && GET_CODE (XEXP (x, 0)) == PLUS
7696 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7697 /* This will be split into two instructions.
7698 See arm.md:calculate_pic_address. */
7699 *total = COSTS_N_INSNS (2);
7701 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7708 /* Needs a libcall, so it costs about this. */
7709 *total = COSTS_N_INSNS (2);
7713 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7715 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7723 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7725 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7728 else if (mode == SImode)
7730 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7731 /* Slightly disparage register shifts, but not by much. */
7732 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7733 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7737 /* Needs a libcall. */
7738 *total = COSTS_N_INSNS (2);
7742 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7743 && (mode == SFmode || !TARGET_VFP_SINGLE))
7745 *total = COSTS_N_INSNS (1);
7751 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7752 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7754 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7755 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7756 || subcode1 == ROTATE || subcode1 == ROTATERT
7757 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7758 || subcode1 == ASHIFTRT)
7760 /* It's just the cost of the two operands. */
7765 *total = COSTS_N_INSNS (1);
7769 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7773 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7774 && (mode == SFmode || !TARGET_VFP_SINGLE))
7776 *total = COSTS_N_INSNS (1);
7780 /* A shift as a part of ADD costs nothing. */
7781 if (GET_CODE (XEXP (x, 0)) == MULT
7782 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7784 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7785 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7786 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7791 case AND: case XOR: case IOR:
7794 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7796 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7797 || subcode == LSHIFTRT || subcode == ASHIFTRT
7798 || (code == AND && subcode == NOT))
7800 /* It's just the cost of the two operands. */
7806 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7810 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7814 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7815 && (mode == SFmode || !TARGET_VFP_SINGLE))
7817 *total = COSTS_N_INSNS (1);
7823 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7832 if (cc_register (XEXP (x, 0), VOIDmode))
7835 *total = COSTS_N_INSNS (1);
7839 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7840 && (mode == SFmode || !TARGET_VFP_SINGLE))
7841 *total = COSTS_N_INSNS (1);
7843 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7848 return arm_rtx_costs_1 (x, outer_code, total, 0);
7851 if (const_ok_for_arm (INTVAL (x)))
7852 /* A multiplication by a constant requires another instruction
7853 to load the constant to a register. */
7854 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7856 else if (const_ok_for_arm (~INTVAL (x)))
7857 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7858 else if (const_ok_for_arm (-INTVAL (x)))
7860 if (outer_code == COMPARE || outer_code == PLUS
7861 || outer_code == MINUS)
7864 *total = COSTS_N_INSNS (1);
7867 *total = COSTS_N_INSNS (2);
7873 *total = COSTS_N_INSNS (2);
7877 *total = COSTS_N_INSNS (4);
7882 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7883 cost of these slightly. */
7884 *total = COSTS_N_INSNS (1) + 1;
7891 if (mode != VOIDmode)
7892 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7894 *total = COSTS_N_INSNS (4); /* How knows? */
7899 /* RTX costs when optimizing for size. */
7901 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7902 int *total, bool speed)
7905 return arm_size_rtx_costs (x, (enum rtx_code) code,
7906 (enum rtx_code) outer_code, total);
7908 return current_tune->rtx_costs (x, (enum rtx_code) code,
7909 (enum rtx_code) outer_code,
7913 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7914 supported on any "slowmul" cores, so it can be ignored. */
7917 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7918 int *total, bool speed)
7920 enum machine_mode mode = GET_MODE (x);
7924 *total = thumb1_rtx_costs (x, code, outer_code);
7931 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7934 *total = COSTS_N_INSNS (20);
7938 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7940 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7941 & (unsigned HOST_WIDE_INT) 0xffffffff);
7942 int cost, const_ok = const_ok_for_arm (i);
7943 int j, booth_unit_size;
7945 /* Tune as appropriate. */
7946 cost = const_ok ? 4 : 8;
7947 booth_unit_size = 2;
7948 for (j = 0; i && j < 32; j += booth_unit_size)
7950 i >>= booth_unit_size;
7954 *total = COSTS_N_INSNS (cost);
7955 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7959 *total = COSTS_N_INSNS (20);
7963 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7968 /* RTX cost for cores with a fast multiply unit (M variants). */
7971 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7972 int *total, bool speed)
7974 enum machine_mode mode = GET_MODE (x);
7978 *total = thumb1_rtx_costs (x, code, outer_code);
7982 /* ??? should thumb2 use different costs? */
7986 /* There is no point basing this on the tuning, since it is always the
7987 fast variant if it exists at all. */
7989 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7990 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7991 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7993 *total = COSTS_N_INSNS(2);
8000 *total = COSTS_N_INSNS (5);
8004 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8006 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8007 & (unsigned HOST_WIDE_INT) 0xffffffff);
8008 int cost, const_ok = const_ok_for_arm (i);
8009 int j, booth_unit_size;
8011 /* Tune as appropriate. */
8012 cost = const_ok ? 4 : 8;
8013 booth_unit_size = 8;
8014 for (j = 0; i && j < 32; j += booth_unit_size)
8016 i >>= booth_unit_size;
8020 *total = COSTS_N_INSNS(cost);
8026 *total = COSTS_N_INSNS (4);
8030 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8032 if (TARGET_HARD_FLOAT
8034 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8036 *total = COSTS_N_INSNS (1);
8041 /* Requires a lib call */
8042 *total = COSTS_N_INSNS (20);
8046 return arm_rtx_costs_1 (x, outer_code, total, speed);
8051 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8052 so it can be ignored. */
8055 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8056 int *total, bool speed)
8058 enum machine_mode mode = GET_MODE (x);
8062 *total = thumb1_rtx_costs (x, code, outer_code);
8069 if (GET_CODE (XEXP (x, 0)) != MULT)
8070 return arm_rtx_costs_1 (x, outer_code, total, speed);
8072 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8073 will stall until the multiplication is complete. */
8074 *total = COSTS_N_INSNS (3);
8078 /* There is no point basing this on the tuning, since it is always the
8079 fast variant if it exists at all. */
8081 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8082 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8083 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8085 *total = COSTS_N_INSNS (2);
8092 *total = COSTS_N_INSNS (5);
8096 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8098 /* If operand 1 is a constant we can more accurately
8099 calculate the cost of the multiply. The multiplier can
8100 retire 15 bits on the first cycle and a further 12 on the
8101 second. We do, of course, have to load the constant into
8102 a register first. */
8103 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8104 /* There's a general overhead of one cycle. */
8106 unsigned HOST_WIDE_INT masked_const;
8111 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8113 masked_const = i & 0xffff8000;
8114 if (masked_const != 0)
8117 masked_const = i & 0xf8000000;
8118 if (masked_const != 0)
8121 *total = COSTS_N_INSNS (cost);
8127 *total = COSTS_N_INSNS (3);
8131 /* Requires a lib call */
8132 *total = COSTS_N_INSNS (20);
8136 return arm_rtx_costs_1 (x, outer_code, total, speed);
8141 /* RTX costs for 9e (and later) cores. */
8144 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8145 int *total, bool speed)
8147 enum machine_mode mode = GET_MODE (x);
8154 *total = COSTS_N_INSNS (3);
8158 *total = thumb1_rtx_costs (x, code, outer_code);
8166 /* There is no point basing this on the tuning, since it is always the
8167 fast variant if it exists at all. */
8169 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8170 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8171 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8173 *total = COSTS_N_INSNS (2);
8180 *total = COSTS_N_INSNS (5);
8186 *total = COSTS_N_INSNS (2);
8190 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8192 if (TARGET_HARD_FLOAT
8194 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8196 *total = COSTS_N_INSNS (1);
8201 *total = COSTS_N_INSNS (20);
8205 return arm_rtx_costs_1 (x, outer_code, total, speed);
8208 /* All address computations that can be done are free, but rtx cost returns
8209 the same for practically all of them. So we weight the different types
8210 of address here in the order (most pref first):
8211 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8213 arm_arm_address_cost (rtx x)
8215 enum rtx_code c = GET_CODE (x);
8217 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8219 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8224 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8227 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8237 arm_thumb_address_cost (rtx x)
8239 enum rtx_code c = GET_CODE (x);
8244 && GET_CODE (XEXP (x, 0)) == REG
8245 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8252 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8254 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8257 /* Adjust cost hook for XScale. */
8259 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8261 /* Some true dependencies can have a higher cost depending
8262 on precisely how certain input operands are used. */
8263 if (REG_NOTE_KIND(link) == 0
8264 && recog_memoized (insn) >= 0
8265 && recog_memoized (dep) >= 0)
8267 int shift_opnum = get_attr_shift (insn);
8268 enum attr_type attr_type = get_attr_type (dep);
8270 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8271 operand for INSN. If we have a shifted input operand and the
8272 instruction we depend on is another ALU instruction, then we may
8273 have to account for an additional stall. */
8274 if (shift_opnum != 0
8275 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8277 rtx shifted_operand;
8280 /* Get the shifted operand. */
8281 extract_insn (insn);
8282 shifted_operand = recog_data.operand[shift_opnum];
8284 /* Iterate over all the operands in DEP. If we write an operand
8285 that overlaps with SHIFTED_OPERAND, then we have increase the
8286 cost of this dependency. */
8288 preprocess_constraints ();
8289 for (opno = 0; opno < recog_data.n_operands; opno++)
8291 /* We can ignore strict inputs. */
8292 if (recog_data.operand_type[opno] == OP_IN)
8295 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8307 /* Adjust cost hook for Cortex A9. */
8309 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8311 switch (REG_NOTE_KIND (link))
8318 case REG_DEP_OUTPUT:
8319 if (recog_memoized (insn) >= 0
8320 && recog_memoized (dep) >= 0)
8322 if (GET_CODE (PATTERN (insn)) == SET)
8325 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8327 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8329 enum attr_type attr_type_insn = get_attr_type (insn);
8330 enum attr_type attr_type_dep = get_attr_type (dep);
8332 /* By default all dependencies of the form
8335 have an extra latency of 1 cycle because
8336 of the input and output dependency in this
8337 case. However this gets modeled as an true
8338 dependency and hence all these checks. */
8339 if (REG_P (SET_DEST (PATTERN (insn)))
8340 && REG_P (SET_DEST (PATTERN (dep)))
8341 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8342 SET_DEST (PATTERN (dep))))
8344 /* FMACS is a special case where the dependant
8345 instruction can be issued 3 cycles before
8346 the normal latency in case of an output
8348 if ((attr_type_insn == TYPE_FMACS
8349 || attr_type_insn == TYPE_FMACD)
8350 && (attr_type_dep == TYPE_FMACS
8351 || attr_type_dep == TYPE_FMACD))
8353 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8354 *cost = insn_default_latency (dep) - 3;
8356 *cost = insn_default_latency (dep);
8361 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8362 *cost = insn_default_latency (dep) + 1;
8364 *cost = insn_default_latency (dep);
8380 /* Adjust cost hook for FA726TE. */
8382 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8384 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8385 have penalty of 3. */
8386 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8387 && recog_memoized (insn) >= 0
8388 && recog_memoized (dep) >= 0
8389 && get_attr_conds (dep) == CONDS_SET)
8391 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8392 if (get_attr_conds (insn) == CONDS_USE
8393 && get_attr_type (insn) != TYPE_BRANCH)
8399 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8400 || get_attr_conds (insn) == CONDS_USE)
8410 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8411 It corrects the value of COST based on the relationship between
8412 INSN and DEP through the dependence LINK. It returns the new
8413 value. There is a per-core adjust_cost hook to adjust scheduler costs
8414 and the per-core hook can choose to completely override the generic
8415 adjust_cost function. Only put bits of code into arm_adjust_cost that
8416 are common across all cores. */
8418 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8422 /* When generating Thumb-1 code, we want to place flag-setting operations
8423 close to a conditional branch which depends on them, so that we can
8424 omit the comparison. */
8426 && REG_NOTE_KIND (link) == 0
8427 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8428 && recog_memoized (dep) >= 0
8429 && get_attr_conds (dep) == CONDS_SET)
8432 if (current_tune->sched_adjust_cost != NULL)
8434 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8438 /* XXX This is not strictly true for the FPA. */
8439 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8440 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8443 /* Call insns don't incur a stall, even if they follow a load. */
8444 if (REG_NOTE_KIND (link) == 0
8445 && GET_CODE (insn) == CALL_INSN)
8448 if ((i_pat = single_set (insn)) != NULL
8449 && GET_CODE (SET_SRC (i_pat)) == MEM
8450 && (d_pat = single_set (dep)) != NULL
8451 && GET_CODE (SET_DEST (d_pat)) == MEM)
8453 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8454 /* This is a load after a store, there is no conflict if the load reads
8455 from a cached area. Assume that loads from the stack, and from the
8456 constant pool are cached, and that others will miss. This is a
8459 if ((GET_CODE (src_mem) == SYMBOL_REF
8460 && CONSTANT_POOL_ADDRESS_P (src_mem))
8461 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8462 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8463 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8471 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8474 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8476 return (optimize > 0) ? 2 : 0;
8480 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8482 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8485 static int fp_consts_inited = 0;
8487 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8488 static const char * const strings_fp[8] =
8491 "4", "5", "0.5", "10"
8494 static REAL_VALUE_TYPE values_fp[8];
8497 init_fp_table (void)
8503 fp_consts_inited = 1;
8505 fp_consts_inited = 8;
8507 for (i = 0; i < fp_consts_inited; i++)
8509 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8514 /* Return TRUE if rtx X is a valid immediate FP constant. */
8516 arm_const_double_rtx (rtx x)
8521 if (!fp_consts_inited)
8524 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8525 if (REAL_VALUE_MINUS_ZERO (r))
8528 for (i = 0; i < fp_consts_inited; i++)
8529 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8535 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8537 neg_const_double_rtx_ok_for_fpa (rtx x)
8542 if (!fp_consts_inited)
8545 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8546 r = real_value_negate (&r);
8547 if (REAL_VALUE_MINUS_ZERO (r))
8550 for (i = 0; i < 8; i++)
8551 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8558 /* VFPv3 has a fairly wide range of representable immediates, formed from
8559 "quarter-precision" floating-point values. These can be evaluated using this
8560 formula (with ^ for exponentiation):
8564 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8565 16 <= n <= 31 and 0 <= r <= 7.
8567 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8569 - A (most-significant) is the sign bit.
8570 - BCD are the exponent (encoded as r XOR 3).
8571 - EFGH are the mantissa (encoded as n - 16).
8574 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8575 fconst[sd] instruction, or -1 if X isn't suitable. */
8577 vfp3_const_double_index (rtx x)
8579 REAL_VALUE_TYPE r, m;
8581 unsigned HOST_WIDE_INT mantissa, mant_hi;
8582 unsigned HOST_WIDE_INT mask;
8583 HOST_WIDE_INT m1, m2;
8584 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8586 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8589 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8591 /* We can't represent these things, so detect them first. */
8592 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8595 /* Extract sign, exponent and mantissa. */
8596 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8597 r = real_value_abs (&r);
8598 exponent = REAL_EXP (&r);
8599 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8600 highest (sign) bit, with a fixed binary point at bit point_pos.
8601 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8602 bits for the mantissa, this may fail (low bits would be lost). */
8603 real_ldexp (&m, &r, point_pos - exponent);
8604 REAL_VALUE_TO_INT (&m1, &m2, m);
8608 /* If there are bits set in the low part of the mantissa, we can't
8609 represent this value. */
8613 /* Now make it so that mantissa contains the most-significant bits, and move
8614 the point_pos to indicate that the least-significant bits have been
8616 point_pos -= HOST_BITS_PER_WIDE_INT;
8619 /* We can permit four significant bits of mantissa only, plus a high bit
8620 which is always 1. */
8621 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8622 if ((mantissa & mask) != 0)
8625 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8626 mantissa >>= point_pos - 5;
8628 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8629 floating-point immediate zero with Neon using an integer-zero load, but
8630 that case is handled elsewhere.) */
8634 gcc_assert (mantissa >= 16 && mantissa <= 31);
8636 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8637 normalized significands are in the range [1, 2). (Our mantissa is shifted
8638 left 4 places at this point relative to normalized IEEE754 values). GCC
8639 internally uses [0.5, 1) (see real.c), so the exponent returned from
8640 REAL_EXP must be altered. */
8641 exponent = 5 - exponent;
8643 if (exponent < 0 || exponent > 7)
8646 /* Sign, mantissa and exponent are now in the correct form to plug into the
8647 formula described in the comment above. */
8648 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8651 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8653 vfp3_const_double_rtx (rtx x)
8658 return vfp3_const_double_index (x) != -1;
8661 /* Recognize immediates which can be used in various Neon instructions. Legal
8662 immediates are described by the following table (for VMVN variants, the
8663 bitwise inverse of the constant shown is recognized. In either case, VMOV
8664 is output and the correct instruction to use for a given constant is chosen
8665 by the assembler). The constant shown is replicated across all elements of
8666 the destination vector.
8668 insn elems variant constant (binary)
8669 ---- ----- ------- -----------------
8670 vmov i32 0 00000000 00000000 00000000 abcdefgh
8671 vmov i32 1 00000000 00000000 abcdefgh 00000000
8672 vmov i32 2 00000000 abcdefgh 00000000 00000000
8673 vmov i32 3 abcdefgh 00000000 00000000 00000000
8674 vmov i16 4 00000000 abcdefgh
8675 vmov i16 5 abcdefgh 00000000
8676 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8677 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8678 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8679 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8680 vmvn i16 10 00000000 abcdefgh
8681 vmvn i16 11 abcdefgh 00000000
8682 vmov i32 12 00000000 00000000 abcdefgh 11111111
8683 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8684 vmov i32 14 00000000 abcdefgh 11111111 11111111
8685 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8687 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8688 eeeeeeee ffffffff gggggggg hhhhhhhh
8689 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8691 For case 18, B = !b. Representable values are exactly those accepted by
8692 vfp3_const_double_index, but are output as floating-point numbers rather
8695 Variants 0-5 (inclusive) may also be used as immediates for the second
8696 operand of VORR/VBIC instructions.
8698 The INVERSE argument causes the bitwise inverse of the given operand to be
8699 recognized instead (used for recognizing legal immediates for the VAND/VORN
8700 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8701 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8702 output, rather than the real insns vbic/vorr).
8704 INVERSE makes no difference to the recognition of float vectors.
8706 The return value is the variant of immediate as shown in the above table, or
8707 -1 if the given value doesn't match any of the listed patterns.
8710 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8711 rtx *modconst, int *elementwidth)
8713 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8715 for (i = 0; i < idx; i += (STRIDE)) \
8720 immtype = (CLASS); \
8721 elsize = (ELSIZE); \
8725 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8726 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8727 unsigned char bytes[16];
8728 int immtype = -1, matches;
8729 unsigned int invmask = inverse ? 0xff : 0;
8731 /* Vectors of float constants. */
8732 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8734 rtx el0 = CONST_VECTOR_ELT (op, 0);
8737 if (!vfp3_const_double_rtx (el0))
8740 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8742 for (i = 1; i < n_elts; i++)
8744 rtx elt = CONST_VECTOR_ELT (op, i);
8747 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8749 if (!REAL_VALUES_EQUAL (r0, re))
8754 *modconst = CONST_VECTOR_ELT (op, 0);
8762 /* Splat vector constant out into a byte vector. */
8763 for (i = 0; i < n_elts; i++)
8765 rtx el = CONST_VECTOR_ELT (op, i);
8766 unsigned HOST_WIDE_INT elpart;
8767 unsigned int part, parts;
8769 if (GET_CODE (el) == CONST_INT)
8771 elpart = INTVAL (el);
8774 else if (GET_CODE (el) == CONST_DOUBLE)
8776 elpart = CONST_DOUBLE_LOW (el);
8782 for (part = 0; part < parts; part++)
8785 for (byte = 0; byte < innersize; byte++)
8787 bytes[idx++] = (elpart & 0xff) ^ invmask;
8788 elpart >>= BITS_PER_UNIT;
8790 if (GET_CODE (el) == CONST_DOUBLE)
8791 elpart = CONST_DOUBLE_HIGH (el);
8796 gcc_assert (idx == GET_MODE_SIZE (mode));
8800 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8801 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8803 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8804 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8806 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8807 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8809 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8810 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8812 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8814 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8816 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8817 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8819 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8820 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8822 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8823 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8825 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8826 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8828 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8830 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8832 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8833 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8835 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8836 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8838 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8839 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8841 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8842 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8844 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8846 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8847 && bytes[i] == bytes[(i + 8) % idx]);
8855 *elementwidth = elsize;
8859 unsigned HOST_WIDE_INT imm = 0;
8861 /* Un-invert bytes of recognized vector, if necessary. */
8863 for (i = 0; i < idx; i++)
8864 bytes[i] ^= invmask;
8868 /* FIXME: Broken on 32-bit H_W_I hosts. */
8869 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8871 for (i = 0; i < 8; i++)
8872 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8873 << (i * BITS_PER_UNIT);
8875 *modconst = GEN_INT (imm);
8879 unsigned HOST_WIDE_INT imm = 0;
8881 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8882 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8884 *modconst = GEN_INT (imm);
8892 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8893 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8894 float elements), and a modified constant (whatever should be output for a
8895 VMOV) in *MODCONST. */
8898 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8899 rtx *modconst, int *elementwidth)
8903 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8909 *modconst = tmpconst;
8912 *elementwidth = tmpwidth;
8917 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8918 the immediate is valid, write a constant suitable for using as an operand
8919 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8920 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8923 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8924 rtx *modconst, int *elementwidth)
8928 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8930 if (retval < 0 || retval > 5)
8934 *modconst = tmpconst;
8937 *elementwidth = tmpwidth;
8942 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
8943 the immediate is valid, write a constant suitable for using as an operand
8944 to VSHR/VSHL to *MODCONST and the corresponding element width to
8945 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
8946 because they have different limitations. */
8949 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
8950 rtx *modconst, int *elementwidth,
8953 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8954 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
8955 unsigned HOST_WIDE_INT last_elt = 0;
8956 unsigned HOST_WIDE_INT maxshift;
8958 /* Split vector constant out into a byte vector. */
8959 for (i = 0; i < n_elts; i++)
8961 rtx el = CONST_VECTOR_ELT (op, i);
8962 unsigned HOST_WIDE_INT elpart;
8964 if (GET_CODE (el) == CONST_INT)
8965 elpart = INTVAL (el);
8966 else if (GET_CODE (el) == CONST_DOUBLE)
8971 if (i != 0 && elpart != last_elt)
8977 /* Shift less than element size. */
8978 maxshift = innersize * 8;
8982 /* Left shift immediate value can be from 0 to <size>-1. */
8983 if (last_elt >= maxshift)
8988 /* Right shift immediate value can be from 1 to <size>. */
8989 if (last_elt == 0 || last_elt > maxshift)
8994 *elementwidth = innersize * 8;
8997 *modconst = CONST_VECTOR_ELT (op, 0);
9002 /* Return a string suitable for output of Neon immediate logic operation
9006 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9007 int inverse, int quad)
9009 int width, is_valid;
9010 static char templ[40];
9012 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9014 gcc_assert (is_valid != 0);
9017 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9019 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9024 /* Return a string suitable for output of Neon immediate shift operation
9025 (VSHR or VSHL) MNEM. */
9028 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9029 enum machine_mode mode, int quad,
9032 int width, is_valid;
9033 static char templ[40];
9035 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9036 gcc_assert (is_valid != 0);
9039 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9041 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9046 /* Output a sequence of pairwise operations to implement a reduction.
9047 NOTE: We do "too much work" here, because pairwise operations work on two
9048 registers-worth of operands in one go. Unfortunately we can't exploit those
9049 extra calculations to do the full operation in fewer steps, I don't think.
9050 Although all vector elements of the result but the first are ignored, we
9051 actually calculate the same result in each of the elements. An alternative
9052 such as initially loading a vector with zero to use as each of the second
9053 operands would use up an additional register and take an extra instruction,
9054 for no particular gain. */
9057 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9058 rtx (*reduc) (rtx, rtx, rtx))
9060 enum machine_mode inner = GET_MODE_INNER (mode);
9061 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9064 for (i = parts / 2; i >= 1; i /= 2)
9066 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9067 emit_insn (reduc (dest, tmpsum, tmpsum));
9072 /* If VALS is a vector constant that can be loaded into a register
9073 using VDUP, generate instructions to do so and return an RTX to
9074 assign to the register. Otherwise return NULL_RTX. */
9077 neon_vdup_constant (rtx vals)
9079 enum machine_mode mode = GET_MODE (vals);
9080 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9081 int n_elts = GET_MODE_NUNITS (mode);
9082 bool all_same = true;
9086 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9089 for (i = 0; i < n_elts; ++i)
9091 x = XVECEXP (vals, 0, i);
9092 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9097 /* The elements are not all the same. We could handle repeating
9098 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9099 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9103 /* We can load this constant by using VDUP and a constant in a
9104 single ARM register. This will be cheaper than a vector
9107 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9108 return gen_rtx_VEC_DUPLICATE (mode, x);
9111 /* Generate code to load VALS, which is a PARALLEL containing only
9112 constants (for vec_init) or CONST_VECTOR, efficiently into a
9113 register. Returns an RTX to copy into the register, or NULL_RTX
9114 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9117 neon_make_constant (rtx vals)
9119 enum machine_mode mode = GET_MODE (vals);
9121 rtx const_vec = NULL_RTX;
9122 int n_elts = GET_MODE_NUNITS (mode);
9126 if (GET_CODE (vals) == CONST_VECTOR)
9128 else if (GET_CODE (vals) == PARALLEL)
9130 /* A CONST_VECTOR must contain only CONST_INTs and
9131 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9132 Only store valid constants in a CONST_VECTOR. */
9133 for (i = 0; i < n_elts; ++i)
9135 rtx x = XVECEXP (vals, 0, i);
9136 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9139 if (n_const == n_elts)
9140 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9145 if (const_vec != NULL
9146 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9147 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9149 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9150 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9151 pipeline cycle; creating the constant takes one or two ARM
9154 else if (const_vec != NULL_RTX)
9155 /* Load from constant pool. On Cortex-A8 this takes two cycles
9156 (for either double or quad vectors). We can not take advantage
9157 of single-cycle VLD1 because we need a PC-relative addressing
9161 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9162 We can not construct an initializer. */
9166 /* Initialize vector TARGET to VALS. */
9169 neon_expand_vector_init (rtx target, rtx vals)
9171 enum machine_mode mode = GET_MODE (target);
9172 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9173 int n_elts = GET_MODE_NUNITS (mode);
9174 int n_var = 0, one_var = -1;
9175 bool all_same = true;
9179 for (i = 0; i < n_elts; ++i)
9181 x = XVECEXP (vals, 0, i);
9182 if (!CONSTANT_P (x))
9183 ++n_var, one_var = i;
9185 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9191 rtx constant = neon_make_constant (vals);
9192 if (constant != NULL_RTX)
9194 emit_move_insn (target, constant);
9199 /* Splat a single non-constant element if we can. */
9200 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9202 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9203 emit_insn (gen_rtx_SET (VOIDmode, target,
9204 gen_rtx_VEC_DUPLICATE (mode, x)));
9208 /* One field is non-constant. Load constant then overwrite varying
9209 field. This is more efficient than using the stack. */
9212 rtx copy = copy_rtx (vals);
9213 rtx index = GEN_INT (one_var);
9215 /* Load constant part of vector, substitute neighboring value for
9217 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9218 neon_expand_vector_init (target, copy);
9220 /* Insert variable. */
9221 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9225 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9228 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9231 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9234 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9237 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9240 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9243 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9246 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9249 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9257 /* Construct the vector in memory one field at a time
9258 and load the whole vector. */
9259 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9260 for (i = 0; i < n_elts; i++)
9261 emit_move_insn (adjust_address_nv (mem, inner_mode,
9262 i * GET_MODE_SIZE (inner_mode)),
9263 XVECEXP (vals, 0, i));
9264 emit_move_insn (target, mem);
9267 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9268 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9269 reported source locations are bogus. */
9272 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9277 gcc_assert (GET_CODE (operand) == CONST_INT);
9279 lane = INTVAL (operand);
9281 if (lane < low || lane >= high)
9285 /* Bounds-check lanes. */
9288 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9290 bounds_check (operand, low, high, "lane out of range");
9293 /* Bounds-check constants. */
9296 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9298 bounds_check (operand, low, high, "constant out of range");
9302 neon_element_bits (enum machine_mode mode)
9305 return GET_MODE_BITSIZE (mode);
9307 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9311 /* Predicates for `match_operand' and `match_operator'. */
9313 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9315 cirrus_memory_offset (rtx op)
9317 /* Reject eliminable registers. */
9318 if (! (reload_in_progress || reload_completed)
9319 && ( reg_mentioned_p (frame_pointer_rtx, op)
9320 || reg_mentioned_p (arg_pointer_rtx, op)
9321 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9322 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9323 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9324 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9327 if (GET_CODE (op) == MEM)
9333 /* Match: (mem (reg)). */
9334 if (GET_CODE (ind) == REG)
9340 if (GET_CODE (ind) == PLUS
9341 && GET_CODE (XEXP (ind, 0)) == REG
9342 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9343 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9350 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9351 WB is true if full writeback address modes are allowed and is false
9352 if limited writeback address modes (POST_INC and PRE_DEC) are
9356 arm_coproc_mem_operand (rtx op, bool wb)
9360 /* Reject eliminable registers. */
9361 if (! (reload_in_progress || reload_completed)
9362 && ( reg_mentioned_p (frame_pointer_rtx, op)
9363 || reg_mentioned_p (arg_pointer_rtx, op)
9364 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9365 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9366 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9367 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9370 /* Constants are converted into offsets from labels. */
9371 if (GET_CODE (op) != MEM)
9376 if (reload_completed
9377 && (GET_CODE (ind) == LABEL_REF
9378 || (GET_CODE (ind) == CONST
9379 && GET_CODE (XEXP (ind, 0)) == PLUS
9380 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9381 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9384 /* Match: (mem (reg)). */
9385 if (GET_CODE (ind) == REG)
9386 return arm_address_register_rtx_p (ind, 0);
9388 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9389 acceptable in any case (subject to verification by
9390 arm_address_register_rtx_p). We need WB to be true to accept
9391 PRE_INC and POST_DEC. */
9392 if (GET_CODE (ind) == POST_INC
9393 || GET_CODE (ind) == PRE_DEC
9395 && (GET_CODE (ind) == PRE_INC
9396 || GET_CODE (ind) == POST_DEC)))
9397 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9400 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9401 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9402 && GET_CODE (XEXP (ind, 1)) == PLUS
9403 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9404 ind = XEXP (ind, 1);
9409 if (GET_CODE (ind) == PLUS
9410 && GET_CODE (XEXP (ind, 0)) == REG
9411 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9412 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9413 && INTVAL (XEXP (ind, 1)) > -1024
9414 && INTVAL (XEXP (ind, 1)) < 1024
9415 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9421 /* Return TRUE if OP is a memory operand which we can load or store a vector
9422 to/from. TYPE is one of the following values:
9423 0 - Vector load/stor (vldr)
9424 1 - Core registers (ldm)
9425 2 - Element/structure loads (vld1)
9428 neon_vector_mem_operand (rtx op, int type)
9432 /* Reject eliminable registers. */
9433 if (! (reload_in_progress || reload_completed)
9434 && ( reg_mentioned_p (frame_pointer_rtx, op)
9435 || reg_mentioned_p (arg_pointer_rtx, op)
9436 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9437 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9438 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9439 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9442 /* Constants are converted into offsets from labels. */
9443 if (GET_CODE (op) != MEM)
9448 if (reload_completed
9449 && (GET_CODE (ind) == LABEL_REF
9450 || (GET_CODE (ind) == CONST
9451 && GET_CODE (XEXP (ind, 0)) == PLUS
9452 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9453 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9456 /* Match: (mem (reg)). */
9457 if (GET_CODE (ind) == REG)
9458 return arm_address_register_rtx_p (ind, 0);
9460 /* Allow post-increment with Neon registers. */
9461 if ((type != 1 && GET_CODE (ind) == POST_INC)
9462 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9463 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9465 /* FIXME: vld1 allows register post-modify. */
9471 && GET_CODE (ind) == PLUS
9472 && GET_CODE (XEXP (ind, 0)) == REG
9473 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9474 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9475 && INTVAL (XEXP (ind, 1)) > -1024
9476 && INTVAL (XEXP (ind, 1)) < 1016
9477 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9483 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9486 neon_struct_mem_operand (rtx op)
9490 /* Reject eliminable registers. */
9491 if (! (reload_in_progress || reload_completed)
9492 && ( reg_mentioned_p (frame_pointer_rtx, op)
9493 || reg_mentioned_p (arg_pointer_rtx, op)
9494 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9495 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9496 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9497 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9500 /* Constants are converted into offsets from labels. */
9501 if (GET_CODE (op) != MEM)
9506 if (reload_completed
9507 && (GET_CODE (ind) == LABEL_REF
9508 || (GET_CODE (ind) == CONST
9509 && GET_CODE (XEXP (ind, 0)) == PLUS
9510 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9511 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9514 /* Match: (mem (reg)). */
9515 if (GET_CODE (ind) == REG)
9516 return arm_address_register_rtx_p (ind, 0);
9518 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9519 if (GET_CODE (ind) == POST_INC
9520 || GET_CODE (ind) == PRE_DEC)
9521 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9526 /* Return true if X is a register that will be eliminated later on. */
9528 arm_eliminable_register (rtx x)
9530 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9531 || REGNO (x) == ARG_POINTER_REGNUM
9532 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9533 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9536 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9537 coprocessor registers. Otherwise return NO_REGS. */
9540 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9544 if (!TARGET_NEON_FP16)
9545 return GENERAL_REGS;
9546 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9548 return GENERAL_REGS;
9551 /* The neon move patterns handle all legitimate vector and struct
9554 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9555 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9556 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9557 || VALID_NEON_STRUCT_MODE (mode)))
9560 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9563 return GENERAL_REGS;
9566 /* Values which must be returned in the most-significant end of the return
9570 arm_return_in_msb (const_tree valtype)
9572 return (TARGET_AAPCS_BASED
9574 && (AGGREGATE_TYPE_P (valtype)
9575 || TREE_CODE (valtype) == COMPLEX_TYPE
9576 || FIXED_POINT_TYPE_P (valtype)));
9579 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9580 Use by the Cirrus Maverick code which has to workaround
9581 a hardware bug triggered by such instructions. */
9583 arm_memory_load_p (rtx insn)
9585 rtx body, lhs, rhs;;
9587 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9590 body = PATTERN (insn);
9592 if (GET_CODE (body) != SET)
9595 lhs = XEXP (body, 0);
9596 rhs = XEXP (body, 1);
9598 lhs = REG_OR_SUBREG_RTX (lhs);
9600 /* If the destination is not a general purpose
9601 register we do not have to worry. */
9602 if (GET_CODE (lhs) != REG
9603 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9606 /* As well as loads from memory we also have to react
9607 to loads of invalid constants which will be turned
9608 into loads from the minipool. */
9609 return (GET_CODE (rhs) == MEM
9610 || GET_CODE (rhs) == SYMBOL_REF
9611 || note_invalid_constants (insn, -1, false));
9614 /* Return TRUE if INSN is a Cirrus instruction. */
9616 arm_cirrus_insn_p (rtx insn)
9618 enum attr_cirrus attr;
9620 /* get_attr cannot accept USE or CLOBBER. */
9622 || GET_CODE (insn) != INSN
9623 || GET_CODE (PATTERN (insn)) == USE
9624 || GET_CODE (PATTERN (insn)) == CLOBBER)
9627 attr = get_attr_cirrus (insn);
9629 return attr != CIRRUS_NOT;
9632 /* Cirrus reorg for invalid instruction combinations. */
9634 cirrus_reorg (rtx first)
9636 enum attr_cirrus attr;
9637 rtx body = PATTERN (first);
9641 /* Any branch must be followed by 2 non Cirrus instructions. */
9642 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9645 t = next_nonnote_insn (first);
9647 if (arm_cirrus_insn_p (t))
9650 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9654 emit_insn_after (gen_nop (), first);
9659 /* (float (blah)) is in parallel with a clobber. */
9660 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9661 body = XVECEXP (body, 0, 0);
9663 if (GET_CODE (body) == SET)
9665 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9667 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9668 be followed by a non Cirrus insn. */
9669 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9671 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9672 emit_insn_after (gen_nop (), first);
9676 else if (arm_memory_load_p (first))
9678 unsigned int arm_regno;
9680 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9681 ldr/cfmv64hr combination where the Rd field is the same
9682 in both instructions must be split with a non Cirrus
9689 /* Get Arm register number for ldr insn. */
9690 if (GET_CODE (lhs) == REG)
9691 arm_regno = REGNO (lhs);
9694 gcc_assert (GET_CODE (rhs) == REG);
9695 arm_regno = REGNO (rhs);
9699 first = next_nonnote_insn (first);
9701 if (! arm_cirrus_insn_p (first))
9704 body = PATTERN (first);
9706 /* (float (blah)) is in parallel with a clobber. */
9707 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9708 body = XVECEXP (body, 0, 0);
9710 if (GET_CODE (body) == FLOAT)
9711 body = XEXP (body, 0);
9713 if (get_attr_cirrus (first) == CIRRUS_MOVE
9714 && GET_CODE (XEXP (body, 1)) == REG
9715 && arm_regno == REGNO (XEXP (body, 1)))
9716 emit_insn_after (gen_nop (), first);
9722 /* get_attr cannot accept USE or CLOBBER. */
9724 || GET_CODE (first) != INSN
9725 || GET_CODE (PATTERN (first)) == USE
9726 || GET_CODE (PATTERN (first)) == CLOBBER)
9729 attr = get_attr_cirrus (first);
9731 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9732 must be followed by a non-coprocessor instruction. */
9733 if (attr == CIRRUS_COMPARE)
9737 t = next_nonnote_insn (first);
9739 if (arm_cirrus_insn_p (t))
9742 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9746 emit_insn_after (gen_nop (), first);
9752 /* Return TRUE if X references a SYMBOL_REF. */
9754 symbol_mentioned_p (rtx x)
9759 if (GET_CODE (x) == SYMBOL_REF)
9762 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9763 are constant offsets, not symbols. */
9764 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9767 fmt = GET_RTX_FORMAT (GET_CODE (x));
9769 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9775 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9776 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9779 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9786 /* Return TRUE if X references a LABEL_REF. */
9788 label_mentioned_p (rtx x)
9793 if (GET_CODE (x) == LABEL_REF)
9796 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9797 instruction, but they are constant offsets, not symbols. */
9798 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9801 fmt = GET_RTX_FORMAT (GET_CODE (x));
9802 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9808 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9809 if (label_mentioned_p (XVECEXP (x, i, j)))
9812 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9820 tls_mentioned_p (rtx x)
9822 switch (GET_CODE (x))
9825 return tls_mentioned_p (XEXP (x, 0));
9828 if (XINT (x, 1) == UNSPEC_TLS)
9836 /* Must not copy any rtx that uses a pc-relative address. */
9839 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9841 if (GET_CODE (*x) == UNSPEC
9842 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9848 arm_cannot_copy_insn_p (rtx insn)
9850 /* The tls call insn cannot be copied, as it is paired with a data
9852 if (recog_memoized (insn) == CODE_FOR_tlscall)
9855 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9861 enum rtx_code code = GET_CODE (x);
9878 /* Return 1 if memory locations are adjacent. */
9880 adjacent_mem_locations (rtx a, rtx b)
9882 /* We don't guarantee to preserve the order of these memory refs. */
9883 if (volatile_refs_p (a) || volatile_refs_p (b))
9886 if ((GET_CODE (XEXP (a, 0)) == REG
9887 || (GET_CODE (XEXP (a, 0)) == PLUS
9888 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9889 && (GET_CODE (XEXP (b, 0)) == REG
9890 || (GET_CODE (XEXP (b, 0)) == PLUS
9891 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9893 HOST_WIDE_INT val0 = 0, val1 = 0;
9897 if (GET_CODE (XEXP (a, 0)) == PLUS)
9899 reg0 = XEXP (XEXP (a, 0), 0);
9900 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9905 if (GET_CODE (XEXP (b, 0)) == PLUS)
9907 reg1 = XEXP (XEXP (b, 0), 0);
9908 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9913 /* Don't accept any offset that will require multiple
9914 instructions to handle, since this would cause the
9915 arith_adjacentmem pattern to output an overlong sequence. */
9916 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9919 /* Don't allow an eliminable register: register elimination can make
9920 the offset too large. */
9921 if (arm_eliminable_register (reg0))
9924 val_diff = val1 - val0;
9928 /* If the target has load delay slots, then there's no benefit
9929 to using an ldm instruction unless the offset is zero and
9930 we are optimizing for size. */
9931 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9932 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9933 && (val_diff == 4 || val_diff == -4));
9936 return ((REGNO (reg0) == REGNO (reg1))
9937 && (val_diff == 4 || val_diff == -4));
9943 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9944 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9945 instruction. ADD_OFFSET is nonzero if the base address register needs
9946 to be modified with an add instruction before we can use it. */
9949 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9950 int nops, HOST_WIDE_INT add_offset)
9952 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9953 if the offset isn't small enough. The reason 2 ldrs are faster
9954 is because these ARMs are able to do more than one cache access
9955 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9956 whilst the ARM8 has a double bandwidth cache. This means that
9957 these cores can do both an instruction fetch and a data fetch in
9958 a single cycle, so the trick of calculating the address into a
9959 scratch register (one of the result regs) and then doing a load
9960 multiple actually becomes slower (and no smaller in code size).
9961 That is the transformation
9963 ldr rd1, [rbase + offset]
9964 ldr rd2, [rbase + offset + 4]
9968 add rd1, rbase, offset
9969 ldmia rd1, {rd1, rd2}
9971 produces worse code -- '3 cycles + any stalls on rd2' instead of
9972 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9973 access per cycle, the first sequence could never complete in less
9974 than 6 cycles, whereas the ldm sequence would only take 5 and
9975 would make better use of sequential accesses if not hitting the
9978 We cheat here and test 'arm_ld_sched' which we currently know to
9979 only be true for the ARM8, ARM9 and StrongARM. If this ever
9980 changes, then the test below needs to be reworked. */
9981 if (nops == 2 && arm_ld_sched && add_offset != 0)
9984 /* XScale has load-store double instructions, but they have stricter
9985 alignment requirements than load-store multiple, so we cannot
9988 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9989 the pipeline until completion.
9997 An ldr instruction takes 1-3 cycles, but does not block the
10006 Best case ldr will always win. However, the more ldr instructions
10007 we issue, the less likely we are to be able to schedule them well.
10008 Using ldr instructions also increases code size.
10010 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10011 for counts of 3 or 4 regs. */
10012 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10017 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10018 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10019 an array ORDER which describes the sequence to use when accessing the
10020 offsets that produces an ascending order. In this sequence, each
10021 offset must be larger by exactly 4 than the previous one. ORDER[0]
10022 must have been filled in with the lowest offset by the caller.
10023 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10024 we use to verify that ORDER produces an ascending order of registers.
10025 Return true if it was possible to construct such an order, false if
10029 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10030 int *unsorted_regs)
10033 for (i = 1; i < nops; i++)
10037 order[i] = order[i - 1];
10038 for (j = 0; j < nops; j++)
10039 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10041 /* We must find exactly one offset that is higher than the
10042 previous one by 4. */
10043 if (order[i] != order[i - 1])
10047 if (order[i] == order[i - 1])
10049 /* The register numbers must be ascending. */
10050 if (unsorted_regs != NULL
10051 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10057 /* Used to determine in a peephole whether a sequence of load
10058 instructions can be changed into a load-multiple instruction.
10059 NOPS is the number of separate load instructions we are examining. The
10060 first NOPS entries in OPERANDS are the destination registers, the
10061 next NOPS entries are memory operands. If this function is
10062 successful, *BASE is set to the common base register of the memory
10063 accesses; *LOAD_OFFSET is set to the first memory location's offset
10064 from that base register.
10065 REGS is an array filled in with the destination register numbers.
10066 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10067 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10068 the sequence of registers in REGS matches the loads from ascending memory
10069 locations, and the function verifies that the register numbers are
10070 themselves ascending. If CHECK_REGS is false, the register numbers
10071 are stored in the order they are found in the operands. */
10073 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10074 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10076 int unsorted_regs[MAX_LDM_STM_OPS];
10077 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10078 int order[MAX_LDM_STM_OPS];
10079 rtx base_reg_rtx = NULL;
10083 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10084 easily extended if required. */
10085 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10087 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10089 /* Loop over the operands and check that the memory references are
10090 suitable (i.e. immediate offsets from the same base register). At
10091 the same time, extract the target register, and the memory
10093 for (i = 0; i < nops; i++)
10098 /* Convert a subreg of a mem into the mem itself. */
10099 if (GET_CODE (operands[nops + i]) == SUBREG)
10100 operands[nops + i] = alter_subreg (operands + (nops + i));
10102 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10104 /* Don't reorder volatile memory references; it doesn't seem worth
10105 looking for the case where the order is ok anyway. */
10106 if (MEM_VOLATILE_P (operands[nops + i]))
10109 offset = const0_rtx;
10111 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10112 || (GET_CODE (reg) == SUBREG
10113 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10114 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10115 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10117 || (GET_CODE (reg) == SUBREG
10118 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10119 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10124 base_reg = REGNO (reg);
10125 base_reg_rtx = reg;
10126 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10129 else if (base_reg != (int) REGNO (reg))
10130 /* Not addressed from the same base register. */
10133 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10134 ? REGNO (operands[i])
10135 : REGNO (SUBREG_REG (operands[i])));
10137 /* If it isn't an integer register, or if it overwrites the
10138 base register but isn't the last insn in the list, then
10139 we can't do this. */
10140 if (unsorted_regs[i] < 0
10141 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10142 || unsorted_regs[i] > 14
10143 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10146 unsorted_offsets[i] = INTVAL (offset);
10147 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10151 /* Not a suitable memory address. */
10155 /* All the useful information has now been extracted from the
10156 operands into unsorted_regs and unsorted_offsets; additionally,
10157 order[0] has been set to the lowest offset in the list. Sort
10158 the offsets into order, verifying that they are adjacent, and
10159 check that the register numbers are ascending. */
10160 if (!compute_offset_order (nops, unsorted_offsets, order,
10161 check_regs ? unsorted_regs : NULL))
10165 memcpy (saved_order, order, sizeof order);
10171 for (i = 0; i < nops; i++)
10172 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10174 *load_offset = unsorted_offsets[order[0]];
10178 && !peep2_reg_dead_p (nops, base_reg_rtx))
10181 if (unsorted_offsets[order[0]] == 0)
10182 ldm_case = 1; /* ldmia */
10183 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10184 ldm_case = 2; /* ldmib */
10185 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10186 ldm_case = 3; /* ldmda */
10187 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10188 ldm_case = 4; /* ldmdb */
10189 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10190 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10195 if (!multiple_operation_profitable_p (false, nops,
10197 ? unsorted_offsets[order[0]] : 0))
10203 /* Used to determine in a peephole whether a sequence of store instructions can
10204 be changed into a store-multiple instruction.
10205 NOPS is the number of separate store instructions we are examining.
10206 NOPS_TOTAL is the total number of instructions recognized by the peephole
10208 The first NOPS entries in OPERANDS are the source registers, the next
10209 NOPS entries are memory operands. If this function is successful, *BASE is
10210 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10211 to the first memory location's offset from that base register. REGS is an
10212 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10213 likewise filled with the corresponding rtx's.
10214 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10215 numbers to an ascending order of stores.
10216 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10217 from ascending memory locations, and the function verifies that the register
10218 numbers are themselves ascending. If CHECK_REGS is false, the register
10219 numbers are stored in the order they are found in the operands. */
10221 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10222 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10223 HOST_WIDE_INT *load_offset, bool check_regs)
10225 int unsorted_regs[MAX_LDM_STM_OPS];
10226 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10227 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10228 int order[MAX_LDM_STM_OPS];
10230 rtx base_reg_rtx = NULL;
10233 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10234 easily extended if required. */
10235 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10237 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10239 /* Loop over the operands and check that the memory references are
10240 suitable (i.e. immediate offsets from the same base register). At
10241 the same time, extract the target register, and the memory
10243 for (i = 0; i < nops; i++)
10248 /* Convert a subreg of a mem into the mem itself. */
10249 if (GET_CODE (operands[nops + i]) == SUBREG)
10250 operands[nops + i] = alter_subreg (operands + (nops + i));
10252 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10254 /* Don't reorder volatile memory references; it doesn't seem worth
10255 looking for the case where the order is ok anyway. */
10256 if (MEM_VOLATILE_P (operands[nops + i]))
10259 offset = const0_rtx;
10261 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10262 || (GET_CODE (reg) == SUBREG
10263 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10264 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10265 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10267 || (GET_CODE (reg) == SUBREG
10268 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10269 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10272 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10273 ? operands[i] : SUBREG_REG (operands[i]));
10274 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10278 base_reg = REGNO (reg);
10279 base_reg_rtx = reg;
10280 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10283 else if (base_reg != (int) REGNO (reg))
10284 /* Not addressed from the same base register. */
10287 /* If it isn't an integer register, then we can't do this. */
10288 if (unsorted_regs[i] < 0
10289 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10290 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
10291 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10292 || unsorted_regs[i] > 14)
10295 unsorted_offsets[i] = INTVAL (offset);
10296 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10300 /* Not a suitable memory address. */
10304 /* All the useful information has now been extracted from the
10305 operands into unsorted_regs and unsorted_offsets; additionally,
10306 order[0] has been set to the lowest offset in the list. Sort
10307 the offsets into order, verifying that they are adjacent, and
10308 check that the register numbers are ascending. */
10309 if (!compute_offset_order (nops, unsorted_offsets, order,
10310 check_regs ? unsorted_regs : NULL))
10314 memcpy (saved_order, order, sizeof order);
10320 for (i = 0; i < nops; i++)
10322 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10324 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10327 *load_offset = unsorted_offsets[order[0]];
10331 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10334 if (unsorted_offsets[order[0]] == 0)
10335 stm_case = 1; /* stmia */
10336 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10337 stm_case = 2; /* stmib */
10338 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10339 stm_case = 3; /* stmda */
10340 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10341 stm_case = 4; /* stmdb */
10345 if (!multiple_operation_profitable_p (false, nops, 0))
10351 /* Routines for use in generating RTL. */
10353 /* Generate a load-multiple instruction. COUNT is the number of loads in
10354 the instruction; REGS and MEMS are arrays containing the operands.
10355 BASEREG is the base register to be used in addressing the memory operands.
10356 WBACK_OFFSET is nonzero if the instruction should update the base
10360 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10361 HOST_WIDE_INT wback_offset)
10366 if (!multiple_operation_profitable_p (false, count, 0))
10372 for (i = 0; i < count; i++)
10373 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10375 if (wback_offset != 0)
10376 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10378 seq = get_insns ();
10384 result = gen_rtx_PARALLEL (VOIDmode,
10385 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10386 if (wback_offset != 0)
10388 XVECEXP (result, 0, 0)
10389 = gen_rtx_SET (VOIDmode, basereg,
10390 plus_constant (basereg, wback_offset));
10395 for (j = 0; i < count; i++, j++)
10396 XVECEXP (result, 0, i)
10397 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10402 /* Generate a store-multiple instruction. COUNT is the number of stores in
10403 the instruction; REGS and MEMS are arrays containing the operands.
10404 BASEREG is the base register to be used in addressing the memory operands.
10405 WBACK_OFFSET is nonzero if the instruction should update the base
10409 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10410 HOST_WIDE_INT wback_offset)
10415 if (GET_CODE (basereg) == PLUS)
10416 basereg = XEXP (basereg, 0);
10418 if (!multiple_operation_profitable_p (false, count, 0))
10424 for (i = 0; i < count; i++)
10425 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10427 if (wback_offset != 0)
10428 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10430 seq = get_insns ();
10436 result = gen_rtx_PARALLEL (VOIDmode,
10437 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10438 if (wback_offset != 0)
10440 XVECEXP (result, 0, 0)
10441 = gen_rtx_SET (VOIDmode, basereg,
10442 plus_constant (basereg, wback_offset));
10447 for (j = 0; i < count; i++, j++)
10448 XVECEXP (result, 0, i)
10449 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10454 /* Generate either a load-multiple or a store-multiple instruction. This
10455 function can be used in situations where we can start with a single MEM
10456 rtx and adjust its address upwards.
10457 COUNT is the number of operations in the instruction, not counting a
10458 possible update of the base register. REGS is an array containing the
10460 BASEREG is the base register to be used in addressing the memory operands,
10461 which are constructed from BASEMEM.
10462 WRITE_BACK specifies whether the generated instruction should include an
10463 update of the base register.
10464 OFFSETP is used to pass an offset to and from this function; this offset
10465 is not used when constructing the address (instead BASEMEM should have an
10466 appropriate offset in its address), it is used only for setting
10467 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10470 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10471 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10473 rtx mems[MAX_LDM_STM_OPS];
10474 HOST_WIDE_INT offset = *offsetp;
10477 gcc_assert (count <= MAX_LDM_STM_OPS);
10479 if (GET_CODE (basereg) == PLUS)
10480 basereg = XEXP (basereg, 0);
10482 for (i = 0; i < count; i++)
10484 rtx addr = plus_constant (basereg, i * 4);
10485 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10493 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10494 write_back ? 4 * count : 0);
10496 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10497 write_back ? 4 * count : 0);
10501 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10502 rtx basemem, HOST_WIDE_INT *offsetp)
10504 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10509 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10510 rtx basemem, HOST_WIDE_INT *offsetp)
10512 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10516 /* Called from a peephole2 expander to turn a sequence of loads into an
10517 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10518 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10519 is true if we can reorder the registers because they are used commutatively
10521 Returns true iff we could generate a new instruction. */
10524 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10526 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10527 rtx mems[MAX_LDM_STM_OPS];
10528 int i, j, base_reg;
10530 HOST_WIDE_INT offset;
10531 int write_back = FALSE;
10535 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10536 &base_reg, &offset, !sort_regs);
10542 for (i = 0; i < nops - 1; i++)
10543 for (j = i + 1; j < nops; j++)
10544 if (regs[i] > regs[j])
10550 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10554 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10555 gcc_assert (ldm_case == 1 || ldm_case == 5);
10561 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10562 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10564 if (!TARGET_THUMB1)
10566 base_reg = regs[0];
10567 base_reg_rtx = newbase;
10571 for (i = 0; i < nops; i++)
10573 addr = plus_constant (base_reg_rtx, offset + i * 4);
10574 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10577 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10578 write_back ? offset + i * 4 : 0));
10582 /* Called from a peephole2 expander to turn a sequence of stores into an
10583 STM instruction. OPERANDS are the operands found by the peephole matcher;
10584 NOPS indicates how many separate stores we are trying to combine.
10585 Returns true iff we could generate a new instruction. */
10588 gen_stm_seq (rtx *operands, int nops)
10591 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10592 rtx mems[MAX_LDM_STM_OPS];
10595 HOST_WIDE_INT offset;
10596 int write_back = FALSE;
10599 bool base_reg_dies;
10601 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10602 mem_order, &base_reg, &offset, true);
10607 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10609 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10612 gcc_assert (base_reg_dies);
10618 gcc_assert (base_reg_dies);
10619 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10623 addr = plus_constant (base_reg_rtx, offset);
10625 for (i = 0; i < nops; i++)
10627 addr = plus_constant (base_reg_rtx, offset + i * 4);
10628 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10631 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10632 write_back ? offset + i * 4 : 0));
10636 /* Called from a peephole2 expander to turn a sequence of stores that are
10637 preceded by constant loads into an STM instruction. OPERANDS are the
10638 operands found by the peephole matcher; NOPS indicates how many
10639 separate stores we are trying to combine; there are 2 * NOPS
10640 instructions in the peephole.
10641 Returns true iff we could generate a new instruction. */
10644 gen_const_stm_seq (rtx *operands, int nops)
10646 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10647 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10648 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10649 rtx mems[MAX_LDM_STM_OPS];
10652 HOST_WIDE_INT offset;
10653 int write_back = FALSE;
10656 bool base_reg_dies;
10658 HARD_REG_SET allocated;
10660 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10661 mem_order, &base_reg, &offset, false);
10666 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10668 /* If the same register is used more than once, try to find a free
10670 CLEAR_HARD_REG_SET (allocated);
10671 for (i = 0; i < nops; i++)
10673 for (j = i + 1; j < nops; j++)
10674 if (regs[i] == regs[j])
10676 rtx t = peep2_find_free_register (0, nops * 2,
10677 TARGET_THUMB1 ? "l" : "r",
10678 SImode, &allocated);
10682 regs[i] = REGNO (t);
10686 /* Compute an ordering that maps the register numbers to an ascending
10689 for (i = 0; i < nops; i++)
10690 if (regs[i] < regs[reg_order[0]])
10693 for (i = 1; i < nops; i++)
10695 int this_order = reg_order[i - 1];
10696 for (j = 0; j < nops; j++)
10697 if (regs[j] > regs[reg_order[i - 1]]
10698 && (this_order == reg_order[i - 1]
10699 || regs[j] < regs[this_order]))
10701 reg_order[i] = this_order;
10704 /* Ensure that registers that must be live after the instruction end
10705 up with the correct value. */
10706 for (i = 0; i < nops; i++)
10708 int this_order = reg_order[i];
10709 if ((this_order != mem_order[i]
10710 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10711 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10715 /* Load the constants. */
10716 for (i = 0; i < nops; i++)
10718 rtx op = operands[2 * nops + mem_order[i]];
10719 sorted_regs[i] = regs[reg_order[i]];
10720 emit_move_insn (reg_rtxs[reg_order[i]], op);
10723 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10725 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10728 gcc_assert (base_reg_dies);
10734 gcc_assert (base_reg_dies);
10735 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10739 addr = plus_constant (base_reg_rtx, offset);
10741 for (i = 0; i < nops; i++)
10743 addr = plus_constant (base_reg_rtx, offset + i * 4);
10744 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10747 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10748 write_back ? offset + i * 4 : 0));
10753 arm_gen_movmemqi (rtx *operands)
10755 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10756 HOST_WIDE_INT srcoffset, dstoffset;
10758 rtx src, dst, srcbase, dstbase;
10759 rtx part_bytes_reg = NULL;
10762 if (GET_CODE (operands[2]) != CONST_INT
10763 || GET_CODE (operands[3]) != CONST_INT
10764 || INTVAL (operands[2]) > 64
10765 || INTVAL (operands[3]) & 3)
10768 dstbase = operands[0];
10769 srcbase = operands[1];
10771 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10772 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10774 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10775 out_words_to_go = INTVAL (operands[2]) / 4;
10776 last_bytes = INTVAL (operands[2]) & 3;
10777 dstoffset = srcoffset = 0;
10779 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10780 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10782 for (i = 0; in_words_to_go >= 2; i+=4)
10784 if (in_words_to_go > 4)
10785 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10786 TRUE, srcbase, &srcoffset));
10788 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10789 src, FALSE, srcbase,
10792 if (out_words_to_go)
10794 if (out_words_to_go > 4)
10795 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10796 TRUE, dstbase, &dstoffset));
10797 else if (out_words_to_go != 1)
10798 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10799 out_words_to_go, dst,
10802 dstbase, &dstoffset));
10805 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10806 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10807 if (last_bytes != 0)
10809 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10815 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10816 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10819 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10820 if (out_words_to_go)
10824 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10825 sreg = copy_to_reg (mem);
10827 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10828 emit_move_insn (mem, sreg);
10831 gcc_assert (!in_words_to_go); /* Sanity check */
10834 if (in_words_to_go)
10836 gcc_assert (in_words_to_go > 0);
10838 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10839 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10842 gcc_assert (!last_bytes || part_bytes_reg);
10844 if (BYTES_BIG_ENDIAN && last_bytes)
10846 rtx tmp = gen_reg_rtx (SImode);
10848 /* The bytes we want are in the top end of the word. */
10849 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10850 GEN_INT (8 * (4 - last_bytes))));
10851 part_bytes_reg = tmp;
10855 mem = adjust_automodify_address (dstbase, QImode,
10856 plus_constant (dst, last_bytes - 1),
10857 dstoffset + last_bytes - 1);
10858 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10862 tmp = gen_reg_rtx (SImode);
10863 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10864 part_bytes_reg = tmp;
10871 if (last_bytes > 1)
10873 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10874 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10878 rtx tmp = gen_reg_rtx (SImode);
10879 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10880 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10881 part_bytes_reg = tmp;
10888 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10889 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10896 /* Select a dominance comparison mode if possible for a test of the general
10897 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10898 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10899 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10900 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10901 In all cases OP will be either EQ or NE, but we don't need to know which
10902 here. If we are unable to support a dominance comparison we return
10903 CC mode. This will then fail to match for the RTL expressions that
10904 generate this call. */
10906 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10908 enum rtx_code cond1, cond2;
10911 /* Currently we will probably get the wrong result if the individual
10912 comparisons are not simple. This also ensures that it is safe to
10913 reverse a comparison if necessary. */
10914 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10916 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10920 /* The if_then_else variant of this tests the second condition if the
10921 first passes, but is true if the first fails. Reverse the first
10922 condition to get a true "inclusive-or" expression. */
10923 if (cond_or == DOM_CC_NX_OR_Y)
10924 cond1 = reverse_condition (cond1);
10926 /* If the comparisons are not equal, and one doesn't dominate the other,
10927 then we can't do this. */
10929 && !comparison_dominates_p (cond1, cond2)
10930 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10935 enum rtx_code temp = cond1;
10943 if (cond_or == DOM_CC_X_AND_Y)
10948 case EQ: return CC_DEQmode;
10949 case LE: return CC_DLEmode;
10950 case LEU: return CC_DLEUmode;
10951 case GE: return CC_DGEmode;
10952 case GEU: return CC_DGEUmode;
10953 default: gcc_unreachable ();
10957 if (cond_or == DOM_CC_X_AND_Y)
10969 gcc_unreachable ();
10973 if (cond_or == DOM_CC_X_AND_Y)
10985 gcc_unreachable ();
10989 if (cond_or == DOM_CC_X_AND_Y)
10990 return CC_DLTUmode;
10995 return CC_DLTUmode;
10997 return CC_DLEUmode;
11001 gcc_unreachable ();
11005 if (cond_or == DOM_CC_X_AND_Y)
11006 return CC_DGTUmode;
11011 return CC_DGTUmode;
11013 return CC_DGEUmode;
11017 gcc_unreachable ();
11020 /* The remaining cases only occur when both comparisons are the
11023 gcc_assert (cond1 == cond2);
11027 gcc_assert (cond1 == cond2);
11031 gcc_assert (cond1 == cond2);
11035 gcc_assert (cond1 == cond2);
11036 return CC_DLEUmode;
11039 gcc_assert (cond1 == cond2);
11040 return CC_DGEUmode;
11043 gcc_unreachable ();
11048 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11050 /* All floating point compares return CCFP if it is an equality
11051 comparison, and CCFPE otherwise. */
11052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11072 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11077 gcc_unreachable ();
11081 /* A compare with a shifted operand. Because of canonicalization, the
11082 comparison will have to be swapped when we emit the assembler. */
11083 if (GET_MODE (y) == SImode
11084 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11085 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11086 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11087 || GET_CODE (x) == ROTATERT))
11090 /* This operation is performed swapped, but since we only rely on the Z
11091 flag we don't need an additional mode. */
11092 if (GET_MODE (y) == SImode
11093 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11094 && GET_CODE (x) == NEG
11095 && (op == EQ || op == NE))
11098 /* This is a special case that is used by combine to allow a
11099 comparison of a shifted byte load to be split into a zero-extend
11100 followed by a comparison of the shifted integer (only valid for
11101 equalities and unsigned inequalities). */
11102 if (GET_MODE (x) == SImode
11103 && GET_CODE (x) == ASHIFT
11104 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11105 && GET_CODE (XEXP (x, 0)) == SUBREG
11106 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11107 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11108 && (op == EQ || op == NE
11109 || op == GEU || op == GTU || op == LTU || op == LEU)
11110 && GET_CODE (y) == CONST_INT)
11113 /* A construct for a conditional compare, if the false arm contains
11114 0, then both conditions must be true, otherwise either condition
11115 must be true. Not all conditions are possible, so CCmode is
11116 returned if it can't be done. */
11117 if (GET_CODE (x) == IF_THEN_ELSE
11118 && (XEXP (x, 2) == const0_rtx
11119 || XEXP (x, 2) == const1_rtx)
11120 && COMPARISON_P (XEXP (x, 0))
11121 && COMPARISON_P (XEXP (x, 1)))
11122 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11123 INTVAL (XEXP (x, 2)));
11125 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11126 if (GET_CODE (x) == AND
11127 && (op == EQ || op == NE)
11128 && COMPARISON_P (XEXP (x, 0))
11129 && COMPARISON_P (XEXP (x, 1)))
11130 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11133 if (GET_CODE (x) == IOR
11134 && (op == EQ || op == NE)
11135 && COMPARISON_P (XEXP (x, 0))
11136 && COMPARISON_P (XEXP (x, 1)))
11137 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11140 /* An operation (on Thumb) where we want to test for a single bit.
11141 This is done by shifting that bit up into the top bit of a
11142 scratch register; we can then branch on the sign bit. */
11144 && GET_MODE (x) == SImode
11145 && (op == EQ || op == NE)
11146 && GET_CODE (x) == ZERO_EXTRACT
11147 && XEXP (x, 1) == const1_rtx)
11150 /* An operation that sets the condition codes as a side-effect, the
11151 V flag is not set correctly, so we can only use comparisons where
11152 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11154 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11155 if (GET_MODE (x) == SImode
11157 && (op == EQ || op == NE || op == LT || op == GE)
11158 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11159 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11160 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11161 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11162 || GET_CODE (x) == LSHIFTRT
11163 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11164 || GET_CODE (x) == ROTATERT
11165 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11166 return CC_NOOVmode;
11168 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11171 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11172 && GET_CODE (x) == PLUS
11173 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11176 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11178 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11180 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11187 /* A DImode comparison against zero can be implemented by
11188 or'ing the two halves together. */
11189 if (y == const0_rtx)
11192 /* We can do an equality test in three Thumb instructions. */
11202 /* DImode unsigned comparisons can be implemented by cmp +
11203 cmpeq without a scratch register. Not worth doing in
11214 /* DImode signed and unsigned comparisons can be implemented
11215 by cmp + sbcs with a scratch register, but that does not
11216 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11217 gcc_assert (op != EQ && op != NE);
11221 gcc_unreachable ();
11228 /* X and Y are two things to compare using CODE. Emit the compare insn and
11229 return the rtx for register 0 in the proper mode. FP means this is a
11230 floating point compare: I don't think that it is needed on the arm. */
11232 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11234 enum machine_mode mode;
11236 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11238 /* We might have X as a constant, Y as a register because of the predicates
11239 used for cmpdi. If so, force X to a register here. */
11240 if (dimode_comparison && !REG_P (x))
11241 x = force_reg (DImode, x);
11243 mode = SELECT_CC_MODE (code, x, y);
11244 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11246 if (dimode_comparison
11247 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11248 && mode != CC_CZmode)
11252 /* To compare two non-zero values for equality, XOR them and
11253 then compare against zero. Not used for ARM mode; there
11254 CC_CZmode is cheaper. */
11255 if (mode == CC_Zmode && y != const0_rtx)
11257 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11260 /* A scratch register is required. */
11261 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11262 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11266 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11271 /* Generate a sequence of insns that will generate the correct return
11272 address mask depending on the physical architecture that the program
11275 arm_gen_return_addr_mask (void)
11277 rtx reg = gen_reg_rtx (Pmode);
11279 emit_insn (gen_return_addr_mask (reg));
11284 arm_reload_in_hi (rtx *operands)
11286 rtx ref = operands[1];
11288 HOST_WIDE_INT offset = 0;
11290 if (GET_CODE (ref) == SUBREG)
11292 offset = SUBREG_BYTE (ref);
11293 ref = SUBREG_REG (ref);
11296 if (GET_CODE (ref) == REG)
11298 /* We have a pseudo which has been spilt onto the stack; there
11299 are two cases here: the first where there is a simple
11300 stack-slot replacement and a second where the stack-slot is
11301 out of range, or is used as a subreg. */
11302 if (reg_equiv_mem (REGNO (ref)))
11304 ref = reg_equiv_mem (REGNO (ref));
11305 base = find_replacement (&XEXP (ref, 0));
11308 /* The slot is out of range, or was dressed up in a SUBREG. */
11309 base = reg_equiv_address (REGNO (ref));
11312 base = find_replacement (&XEXP (ref, 0));
11314 /* Handle the case where the address is too complex to be offset by 1. */
11315 if (GET_CODE (base) == MINUS
11316 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11318 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11320 emit_set_insn (base_plus, base);
11323 else if (GET_CODE (base) == PLUS)
11325 /* The addend must be CONST_INT, or we would have dealt with it above. */
11326 HOST_WIDE_INT hi, lo;
11328 offset += INTVAL (XEXP (base, 1));
11329 base = XEXP (base, 0);
11331 /* Rework the address into a legal sequence of insns. */
11332 /* Valid range for lo is -4095 -> 4095 */
11335 : -((-offset) & 0xfff));
11337 /* Corner case, if lo is the max offset then we would be out of range
11338 once we have added the additional 1 below, so bump the msb into the
11339 pre-loading insn(s). */
11343 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11344 ^ (HOST_WIDE_INT) 0x80000000)
11345 - (HOST_WIDE_INT) 0x80000000);
11347 gcc_assert (hi + lo == offset);
11351 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11353 /* Get the base address; addsi3 knows how to handle constants
11354 that require more than one insn. */
11355 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11361 /* Operands[2] may overlap operands[0] (though it won't overlap
11362 operands[1]), that's why we asked for a DImode reg -- so we can
11363 use the bit that does not overlap. */
11364 if (REGNO (operands[2]) == REGNO (operands[0]))
11365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11367 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11369 emit_insn (gen_zero_extendqisi2 (scratch,
11370 gen_rtx_MEM (QImode,
11371 plus_constant (base,
11373 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11374 gen_rtx_MEM (QImode,
11375 plus_constant (base,
11377 if (!BYTES_BIG_ENDIAN)
11378 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11379 gen_rtx_IOR (SImode,
11382 gen_rtx_SUBREG (SImode, operands[0], 0),
11386 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11387 gen_rtx_IOR (SImode,
11388 gen_rtx_ASHIFT (SImode, scratch,
11390 gen_rtx_SUBREG (SImode, operands[0], 0)));
11393 /* Handle storing a half-word to memory during reload by synthesizing as two
11394 byte stores. Take care not to clobber the input values until after we
11395 have moved them somewhere safe. This code assumes that if the DImode
11396 scratch in operands[2] overlaps either the input value or output address
11397 in some way, then that value must die in this insn (we absolutely need
11398 two scratch registers for some corner cases). */
11400 arm_reload_out_hi (rtx *operands)
11402 rtx ref = operands[0];
11403 rtx outval = operands[1];
11405 HOST_WIDE_INT offset = 0;
11407 if (GET_CODE (ref) == SUBREG)
11409 offset = SUBREG_BYTE (ref);
11410 ref = SUBREG_REG (ref);
11413 if (GET_CODE (ref) == REG)
11415 /* We have a pseudo which has been spilt onto the stack; there
11416 are two cases here: the first where there is a simple
11417 stack-slot replacement and a second where the stack-slot is
11418 out of range, or is used as a subreg. */
11419 if (reg_equiv_mem (REGNO (ref)))
11421 ref = reg_equiv_mem (REGNO (ref));
11422 base = find_replacement (&XEXP (ref, 0));
11425 /* The slot is out of range, or was dressed up in a SUBREG. */
11426 base = reg_equiv_address (REGNO (ref));
11429 base = find_replacement (&XEXP (ref, 0));
11431 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11433 /* Handle the case where the address is too complex to be offset by 1. */
11434 if (GET_CODE (base) == MINUS
11435 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11437 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11439 /* Be careful not to destroy OUTVAL. */
11440 if (reg_overlap_mentioned_p (base_plus, outval))
11442 /* Updating base_plus might destroy outval, see if we can
11443 swap the scratch and base_plus. */
11444 if (!reg_overlap_mentioned_p (scratch, outval))
11447 scratch = base_plus;
11452 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11454 /* Be conservative and copy OUTVAL into the scratch now,
11455 this should only be necessary if outval is a subreg
11456 of something larger than a word. */
11457 /* XXX Might this clobber base? I can't see how it can,
11458 since scratch is known to overlap with OUTVAL, and
11459 must be wider than a word. */
11460 emit_insn (gen_movhi (scratch_hi, outval));
11461 outval = scratch_hi;
11465 emit_set_insn (base_plus, base);
11468 else if (GET_CODE (base) == PLUS)
11470 /* The addend must be CONST_INT, or we would have dealt with it above. */
11471 HOST_WIDE_INT hi, lo;
11473 offset += INTVAL (XEXP (base, 1));
11474 base = XEXP (base, 0);
11476 /* Rework the address into a legal sequence of insns. */
11477 /* Valid range for lo is -4095 -> 4095 */
11480 : -((-offset) & 0xfff));
11482 /* Corner case, if lo is the max offset then we would be out of range
11483 once we have added the additional 1 below, so bump the msb into the
11484 pre-loading insn(s). */
11488 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11489 ^ (HOST_WIDE_INT) 0x80000000)
11490 - (HOST_WIDE_INT) 0x80000000);
11492 gcc_assert (hi + lo == offset);
11496 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11498 /* Be careful not to destroy OUTVAL. */
11499 if (reg_overlap_mentioned_p (base_plus, outval))
11501 /* Updating base_plus might destroy outval, see if we
11502 can swap the scratch and base_plus. */
11503 if (!reg_overlap_mentioned_p (scratch, outval))
11506 scratch = base_plus;
11511 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11513 /* Be conservative and copy outval into scratch now,
11514 this should only be necessary if outval is a
11515 subreg of something larger than a word. */
11516 /* XXX Might this clobber base? I can't see how it
11517 can, since scratch is known to overlap with
11519 emit_insn (gen_movhi (scratch_hi, outval));
11520 outval = scratch_hi;
11524 /* Get the base address; addsi3 knows how to handle constants
11525 that require more than one insn. */
11526 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11532 if (BYTES_BIG_ENDIAN)
11534 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11535 plus_constant (base, offset + 1)),
11536 gen_lowpart (QImode, outval)));
11537 emit_insn (gen_lshrsi3 (scratch,
11538 gen_rtx_SUBREG (SImode, outval, 0),
11540 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11541 gen_lowpart (QImode, scratch)));
11545 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11546 gen_lowpart (QImode, outval)));
11547 emit_insn (gen_lshrsi3 (scratch,
11548 gen_rtx_SUBREG (SImode, outval, 0),
11550 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11551 plus_constant (base, offset + 1)),
11552 gen_lowpart (QImode, scratch)));
11556 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11557 (padded to the size of a word) should be passed in a register. */
11560 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11562 if (TARGET_AAPCS_BASED)
11563 return must_pass_in_stack_var_size (mode, type);
11565 return must_pass_in_stack_var_size_or_pad (mode, type);
11569 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11570 Return true if an argument passed on the stack should be padded upwards,
11571 i.e. if the least-significant byte has useful data.
11572 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11573 aggregate types are placed in the lowest memory address. */
11576 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11578 if (!TARGET_AAPCS_BASED)
11579 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11581 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11584 /* Half-float values are only passed to libcalls, not regular functions.
11585 They should be passed and returned as "short"s (see RTABI). To achieve
11586 that effect in big-endian mode, pad downwards so the value is passed in
11587 the least-significant end of the register. ??? This needs to be here
11588 rather than in arm_pad_reg_upward due to peculiarity in the handling of
11589 libcall arguments. */
11590 if (BYTES_BIG_ENDIAN && mode == HFmode)
11597 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11598 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11599 byte of the register has useful data, and return the opposite if the
11600 most significant byte does.
11601 For AAPCS, small aggregates and small complex types are always padded
11605 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11606 tree type, int first ATTRIBUTE_UNUSED)
11608 if (TARGET_AAPCS_BASED
11609 && BYTES_BIG_ENDIAN
11610 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE
11611 || FIXED_POINT_TYPE_P (type))
11612 && int_size_in_bytes (type) <= 4)
11615 /* Otherwise, use default padding. */
11616 return !BYTES_BIG_ENDIAN;
11620 /* Print a symbolic form of X to the debug file, F. */
11622 arm_print_value (FILE *f, rtx x)
11624 switch (GET_CODE (x))
11627 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11631 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11639 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11641 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11642 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11650 fprintf (f, "\"%s\"", XSTR (x, 0));
11654 fprintf (f, "`%s'", XSTR (x, 0));
11658 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11662 arm_print_value (f, XEXP (x, 0));
11666 arm_print_value (f, XEXP (x, 0));
11668 arm_print_value (f, XEXP (x, 1));
11676 fprintf (f, "????");
11681 /* Routines for manipulation of the constant pool. */
11683 /* Arm instructions cannot load a large constant directly into a
11684 register; they have to come from a pc relative load. The constant
11685 must therefore be placed in the addressable range of the pc
11686 relative load. Depending on the precise pc relative load
11687 instruction the range is somewhere between 256 bytes and 4k. This
11688 means that we often have to dump a constant inside a function, and
11689 generate code to branch around it.
11691 It is important to minimize this, since the branches will slow
11692 things down and make the code larger.
11694 Normally we can hide the table after an existing unconditional
11695 branch so that there is no interruption of the flow, but in the
11696 worst case the code looks like this:
11714 We fix this by performing a scan after scheduling, which notices
11715 which instructions need to have their operands fetched from the
11716 constant table and builds the table.
11718 The algorithm starts by building a table of all the constants that
11719 need fixing up and all the natural barriers in the function (places
11720 where a constant table can be dropped without breaking the flow).
11721 For each fixup we note how far the pc-relative replacement will be
11722 able to reach and the offset of the instruction into the function.
11724 Having built the table we then group the fixes together to form
11725 tables that are as large as possible (subject to addressing
11726 constraints) and emit each table of constants after the last
11727 barrier that is within range of all the instructions in the group.
11728 If a group does not contain a barrier, then we forcibly create one
11729 by inserting a jump instruction into the flow. Once the table has
11730 been inserted, the insns are then modified to reference the
11731 relevant entry in the pool.
11733 Possible enhancements to the algorithm (not implemented) are:
11735 1) For some processors and object formats, there may be benefit in
11736 aligning the pools to the start of cache lines; this alignment
11737 would need to be taken into account when calculating addressability
11740 /* These typedefs are located at the start of this file, so that
11741 they can be used in the prototypes there. This comment is to
11742 remind readers of that fact so that the following structures
11743 can be understood more easily.
11745 typedef struct minipool_node Mnode;
11746 typedef struct minipool_fixup Mfix; */
11748 struct minipool_node
11750 /* Doubly linked chain of entries. */
11753 /* The maximum offset into the code that this entry can be placed. While
11754 pushing fixes for forward references, all entries are sorted in order
11755 of increasing max_address. */
11756 HOST_WIDE_INT max_address;
11757 /* Similarly for an entry inserted for a backwards ref. */
11758 HOST_WIDE_INT min_address;
11759 /* The number of fixes referencing this entry. This can become zero
11760 if we "unpush" an entry. In this case we ignore the entry when we
11761 come to emit the code. */
11763 /* The offset from the start of the minipool. */
11764 HOST_WIDE_INT offset;
11765 /* The value in table. */
11767 /* The mode of value. */
11768 enum machine_mode mode;
11769 /* The size of the value. With iWMMXt enabled
11770 sizes > 4 also imply an alignment of 8-bytes. */
11774 struct minipool_fixup
11778 HOST_WIDE_INT address;
11780 enum machine_mode mode;
11784 HOST_WIDE_INT forwards;
11785 HOST_WIDE_INT backwards;
11788 /* Fixes less than a word need padding out to a word boundary. */
11789 #define MINIPOOL_FIX_SIZE(mode) \
11790 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11792 static Mnode * minipool_vector_head;
11793 static Mnode * minipool_vector_tail;
11794 static rtx minipool_vector_label;
11795 static int minipool_pad;
11797 /* The linked list of all minipool fixes required for this function. */
11798 Mfix * minipool_fix_head;
11799 Mfix * minipool_fix_tail;
11800 /* The fix entry for the current minipool, once it has been placed. */
11801 Mfix * minipool_barrier;
11803 /* Determines if INSN is the start of a jump table. Returns the end
11804 of the TABLE or NULL_RTX. */
11806 is_jump_table (rtx insn)
11810 if (jump_to_label_p (insn)
11811 && ((table = next_real_insn (JUMP_LABEL (insn)))
11812 == next_real_insn (insn))
11814 && GET_CODE (table) == JUMP_INSN
11815 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11816 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11822 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11823 #define JUMP_TABLES_IN_TEXT_SECTION 0
11826 static HOST_WIDE_INT
11827 get_jump_table_size (rtx insn)
11829 /* ADDR_VECs only take room if read-only data does into the text
11831 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11833 rtx body = PATTERN (insn);
11834 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11835 HOST_WIDE_INT size;
11836 HOST_WIDE_INT modesize;
11838 modesize = GET_MODE_SIZE (GET_MODE (body));
11839 size = modesize * XVECLEN (body, elt);
11843 /* Round up size of TBB table to a halfword boundary. */
11844 size = (size + 1) & ~(HOST_WIDE_INT)1;
11847 /* No padding necessary for TBH. */
11850 /* Add two bytes for alignment on Thumb. */
11855 gcc_unreachable ();
11863 /* Return the maximum amount of padding that will be inserted before
11866 static HOST_WIDE_INT
11867 get_label_padding (rtx label)
11869 HOST_WIDE_INT align, min_insn_size;
11871 align = 1 << label_to_alignment (label);
11872 min_insn_size = TARGET_THUMB ? 2 : 4;
11873 return align > min_insn_size ? align - min_insn_size : 0;
11876 /* Move a minipool fix MP from its current location to before MAX_MP.
11877 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11878 constraints may need updating. */
11880 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11881 HOST_WIDE_INT max_address)
11883 /* The code below assumes these are different. */
11884 gcc_assert (mp != max_mp);
11886 if (max_mp == NULL)
11888 if (max_address < mp->max_address)
11889 mp->max_address = max_address;
11893 if (max_address > max_mp->max_address - mp->fix_size)
11894 mp->max_address = max_mp->max_address - mp->fix_size;
11896 mp->max_address = max_address;
11898 /* Unlink MP from its current position. Since max_mp is non-null,
11899 mp->prev must be non-null. */
11900 mp->prev->next = mp->next;
11901 if (mp->next != NULL)
11902 mp->next->prev = mp->prev;
11904 minipool_vector_tail = mp->prev;
11906 /* Re-insert it before MAX_MP. */
11908 mp->prev = max_mp->prev;
11911 if (mp->prev != NULL)
11912 mp->prev->next = mp;
11914 minipool_vector_head = mp;
11917 /* Save the new entry. */
11920 /* Scan over the preceding entries and adjust their addresses as
11922 while (mp->prev != NULL
11923 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11925 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11932 /* Add a constant to the minipool for a forward reference. Returns the
11933 node added or NULL if the constant will not fit in this pool. */
11935 add_minipool_forward_ref (Mfix *fix)
11937 /* If set, max_mp is the first pool_entry that has a lower
11938 constraint than the one we are trying to add. */
11939 Mnode * max_mp = NULL;
11940 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11943 /* If the minipool starts before the end of FIX->INSN then this FIX
11944 can not be placed into the current pool. Furthermore, adding the
11945 new constant pool entry may cause the pool to start FIX_SIZE bytes
11947 if (minipool_vector_head &&
11948 (fix->address + get_attr_length (fix->insn)
11949 >= minipool_vector_head->max_address - fix->fix_size))
11952 /* Scan the pool to see if a constant with the same value has
11953 already been added. While we are doing this, also note the
11954 location where we must insert the constant if it doesn't already
11956 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11958 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11959 && fix->mode == mp->mode
11960 && (GET_CODE (fix->value) != CODE_LABEL
11961 || (CODE_LABEL_NUMBER (fix->value)
11962 == CODE_LABEL_NUMBER (mp->value)))
11963 && rtx_equal_p (fix->value, mp->value))
11965 /* More than one fix references this entry. */
11967 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11970 /* Note the insertion point if necessary. */
11972 && mp->max_address > max_address)
11975 /* If we are inserting an 8-bytes aligned quantity and
11976 we have not already found an insertion point, then
11977 make sure that all such 8-byte aligned quantities are
11978 placed at the start of the pool. */
11979 if (ARM_DOUBLEWORD_ALIGN
11981 && fix->fix_size >= 8
11982 && mp->fix_size < 8)
11985 max_address = mp->max_address;
11989 /* The value is not currently in the minipool, so we need to create
11990 a new entry for it. If MAX_MP is NULL, the entry will be put on
11991 the end of the list since the placement is less constrained than
11992 any existing entry. Otherwise, we insert the new fix before
11993 MAX_MP and, if necessary, adjust the constraints on the other
11996 mp->fix_size = fix->fix_size;
11997 mp->mode = fix->mode;
11998 mp->value = fix->value;
12000 /* Not yet required for a backwards ref. */
12001 mp->min_address = -65536;
12003 if (max_mp == NULL)
12005 mp->max_address = max_address;
12007 mp->prev = minipool_vector_tail;
12009 if (mp->prev == NULL)
12011 minipool_vector_head = mp;
12012 minipool_vector_label = gen_label_rtx ();
12015 mp->prev->next = mp;
12017 minipool_vector_tail = mp;
12021 if (max_address > max_mp->max_address - mp->fix_size)
12022 mp->max_address = max_mp->max_address - mp->fix_size;
12024 mp->max_address = max_address;
12027 mp->prev = max_mp->prev;
12029 if (mp->prev != NULL)
12030 mp->prev->next = mp;
12032 minipool_vector_head = mp;
12035 /* Save the new entry. */
12038 /* Scan over the preceding entries and adjust their addresses as
12040 while (mp->prev != NULL
12041 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12043 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12051 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12052 HOST_WIDE_INT min_address)
12054 HOST_WIDE_INT offset;
12056 /* The code below assumes these are different. */
12057 gcc_assert (mp != min_mp);
12059 if (min_mp == NULL)
12061 if (min_address > mp->min_address)
12062 mp->min_address = min_address;
12066 /* We will adjust this below if it is too loose. */
12067 mp->min_address = min_address;
12069 /* Unlink MP from its current position. Since min_mp is non-null,
12070 mp->next must be non-null. */
12071 mp->next->prev = mp->prev;
12072 if (mp->prev != NULL)
12073 mp->prev->next = mp->next;
12075 minipool_vector_head = mp->next;
12077 /* Reinsert it after MIN_MP. */
12079 mp->next = min_mp->next;
12081 if (mp->next != NULL)
12082 mp->next->prev = mp;
12084 minipool_vector_tail = mp;
12090 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12092 mp->offset = offset;
12093 if (mp->refcount > 0)
12094 offset += mp->fix_size;
12096 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12097 mp->next->min_address = mp->min_address + mp->fix_size;
12103 /* Add a constant to the minipool for a backward reference. Returns the
12104 node added or NULL if the constant will not fit in this pool.
12106 Note that the code for insertion for a backwards reference can be
12107 somewhat confusing because the calculated offsets for each fix do
12108 not take into account the size of the pool (which is still under
12111 add_minipool_backward_ref (Mfix *fix)
12113 /* If set, min_mp is the last pool_entry that has a lower constraint
12114 than the one we are trying to add. */
12115 Mnode *min_mp = NULL;
12116 /* This can be negative, since it is only a constraint. */
12117 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12120 /* If we can't reach the current pool from this insn, or if we can't
12121 insert this entry at the end of the pool without pushing other
12122 fixes out of range, then we don't try. This ensures that we
12123 can't fail later on. */
12124 if (min_address >= minipool_barrier->address
12125 || (minipool_vector_tail->min_address + fix->fix_size
12126 >= minipool_barrier->address))
12129 /* Scan the pool to see if a constant with the same value has
12130 already been added. While we are doing this, also note the
12131 location where we must insert the constant if it doesn't already
12133 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12135 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12136 && fix->mode == mp->mode
12137 && (GET_CODE (fix->value) != CODE_LABEL
12138 || (CODE_LABEL_NUMBER (fix->value)
12139 == CODE_LABEL_NUMBER (mp->value)))
12140 && rtx_equal_p (fix->value, mp->value)
12141 /* Check that there is enough slack to move this entry to the
12142 end of the table (this is conservative). */
12143 && (mp->max_address
12144 > (minipool_barrier->address
12145 + minipool_vector_tail->offset
12146 + minipool_vector_tail->fix_size)))
12149 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12152 if (min_mp != NULL)
12153 mp->min_address += fix->fix_size;
12156 /* Note the insertion point if necessary. */
12157 if (mp->min_address < min_address)
12159 /* For now, we do not allow the insertion of 8-byte alignment
12160 requiring nodes anywhere but at the start of the pool. */
12161 if (ARM_DOUBLEWORD_ALIGN
12162 && fix->fix_size >= 8 && mp->fix_size < 8)
12167 else if (mp->max_address
12168 < minipool_barrier->address + mp->offset + fix->fix_size)
12170 /* Inserting before this entry would push the fix beyond
12171 its maximum address (which can happen if we have
12172 re-located a forwards fix); force the new fix to come
12174 if (ARM_DOUBLEWORD_ALIGN
12175 && fix->fix_size >= 8 && mp->fix_size < 8)
12180 min_address = mp->min_address + fix->fix_size;
12183 /* Do not insert a non-8-byte aligned quantity before 8-byte
12184 aligned quantities. */
12185 else if (ARM_DOUBLEWORD_ALIGN
12186 && fix->fix_size < 8
12187 && mp->fix_size >= 8)
12190 min_address = mp->min_address + fix->fix_size;
12195 /* We need to create a new entry. */
12197 mp->fix_size = fix->fix_size;
12198 mp->mode = fix->mode;
12199 mp->value = fix->value;
12201 mp->max_address = minipool_barrier->address + 65536;
12203 mp->min_address = min_address;
12205 if (min_mp == NULL)
12208 mp->next = minipool_vector_head;
12210 if (mp->next == NULL)
12212 minipool_vector_tail = mp;
12213 minipool_vector_label = gen_label_rtx ();
12216 mp->next->prev = mp;
12218 minipool_vector_head = mp;
12222 mp->next = min_mp->next;
12226 if (mp->next != NULL)
12227 mp->next->prev = mp;
12229 minipool_vector_tail = mp;
12232 /* Save the new entry. */
12240 /* Scan over the following entries and adjust their offsets. */
12241 while (mp->next != NULL)
12243 if (mp->next->min_address < mp->min_address + mp->fix_size)
12244 mp->next->min_address = mp->min_address + mp->fix_size;
12247 mp->next->offset = mp->offset + mp->fix_size;
12249 mp->next->offset = mp->offset;
12258 assign_minipool_offsets (Mfix *barrier)
12260 HOST_WIDE_INT offset = 0;
12263 minipool_barrier = barrier;
12265 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12267 mp->offset = offset;
12269 if (mp->refcount > 0)
12270 offset += mp->fix_size;
12274 /* Output the literal table */
12276 dump_minipool (rtx scan)
12282 if (ARM_DOUBLEWORD_ALIGN)
12283 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12284 if (mp->refcount > 0 && mp->fix_size >= 8)
12291 fprintf (dump_file,
12292 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12293 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12295 scan = emit_label_after (gen_label_rtx (), scan);
12296 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12297 scan = emit_label_after (minipool_vector_label, scan);
12299 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12301 if (mp->refcount > 0)
12305 fprintf (dump_file,
12306 ";; Offset %u, min %ld, max %ld ",
12307 (unsigned) mp->offset, (unsigned long) mp->min_address,
12308 (unsigned long) mp->max_address);
12309 arm_print_value (dump_file, mp->value);
12310 fputc ('\n', dump_file);
12313 switch (mp->fix_size)
12315 #ifdef HAVE_consttable_1
12317 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12321 #ifdef HAVE_consttable_2
12323 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12327 #ifdef HAVE_consttable_4
12329 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12333 #ifdef HAVE_consttable_8
12335 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12339 #ifdef HAVE_consttable_16
12341 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12346 gcc_unreachable ();
12354 minipool_vector_head = minipool_vector_tail = NULL;
12355 scan = emit_insn_after (gen_consttable_end (), scan);
12356 scan = emit_barrier_after (scan);
12359 /* Return the cost of forcibly inserting a barrier after INSN. */
12361 arm_barrier_cost (rtx insn)
12363 /* Basing the location of the pool on the loop depth is preferable,
12364 but at the moment, the basic block information seems to be
12365 corrupt by this stage of the compilation. */
12366 int base_cost = 50;
12367 rtx next = next_nonnote_insn (insn);
12369 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12372 switch (GET_CODE (insn))
12375 /* It will always be better to place the table before the label, rather
12384 return base_cost - 10;
12387 return base_cost + 10;
12391 /* Find the best place in the insn stream in the range
12392 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12393 Create the barrier by inserting a jump and add a new fix entry for
12396 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12398 HOST_WIDE_INT count = 0;
12400 rtx from = fix->insn;
12401 /* The instruction after which we will insert the jump. */
12402 rtx selected = NULL;
12404 /* The address at which the jump instruction will be placed. */
12405 HOST_WIDE_INT selected_address;
12407 HOST_WIDE_INT max_count = max_address - fix->address;
12408 rtx label = gen_label_rtx ();
12410 selected_cost = arm_barrier_cost (from);
12411 selected_address = fix->address;
12413 while (from && count < max_count)
12418 /* This code shouldn't have been called if there was a natural barrier
12420 gcc_assert (GET_CODE (from) != BARRIER);
12422 /* Count the length of this insn. This must stay in sync with the
12423 code that pushes minipool fixes. */
12424 if (LABEL_P (from))
12425 count += get_label_padding (from);
12427 count += get_attr_length (from);
12429 /* If there is a jump table, add its length. */
12430 tmp = is_jump_table (from);
12433 count += get_jump_table_size (tmp);
12435 /* Jump tables aren't in a basic block, so base the cost on
12436 the dispatch insn. If we select this location, we will
12437 still put the pool after the table. */
12438 new_cost = arm_barrier_cost (from);
12440 if (count < max_count
12441 && (!selected || new_cost <= selected_cost))
12444 selected_cost = new_cost;
12445 selected_address = fix->address + count;
12448 /* Continue after the dispatch table. */
12449 from = NEXT_INSN (tmp);
12453 new_cost = arm_barrier_cost (from);
12455 if (count < max_count
12456 && (!selected || new_cost <= selected_cost))
12459 selected_cost = new_cost;
12460 selected_address = fix->address + count;
12463 from = NEXT_INSN (from);
12466 /* Make sure that we found a place to insert the jump. */
12467 gcc_assert (selected);
12469 /* Make sure we do not split a call and its corresponding
12470 CALL_ARG_LOCATION note. */
12471 if (CALL_P (selected))
12473 rtx next = NEXT_INSN (selected);
12474 if (next && NOTE_P (next)
12475 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12479 /* Create a new JUMP_INSN that branches around a barrier. */
12480 from = emit_jump_insn_after (gen_jump (label), selected);
12481 JUMP_LABEL (from) = label;
12482 barrier = emit_barrier_after (from);
12483 emit_label_after (label, barrier);
12485 /* Create a minipool barrier entry for the new barrier. */
12486 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12487 new_fix->insn = barrier;
12488 new_fix->address = selected_address;
12489 new_fix->next = fix->next;
12490 fix->next = new_fix;
12495 /* Record that there is a natural barrier in the insn stream at
12498 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12500 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12503 fix->address = address;
12506 if (minipool_fix_head != NULL)
12507 minipool_fix_tail->next = fix;
12509 minipool_fix_head = fix;
12511 minipool_fix_tail = fix;
12514 /* Record INSN, which will need fixing up to load a value from the
12515 minipool. ADDRESS is the offset of the insn since the start of the
12516 function; LOC is a pointer to the part of the insn which requires
12517 fixing; VALUE is the constant that must be loaded, which is of type
12520 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12521 enum machine_mode mode, rtx value)
12523 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12526 fix->address = address;
12529 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12530 fix->value = value;
12531 fix->forwards = get_attr_pool_range (insn);
12532 fix->backwards = get_attr_neg_pool_range (insn);
12533 fix->minipool = NULL;
12535 /* If an insn doesn't have a range defined for it, then it isn't
12536 expecting to be reworked by this code. Better to stop now than
12537 to generate duff assembly code. */
12538 gcc_assert (fix->forwards || fix->backwards);
12540 /* If an entry requires 8-byte alignment then assume all constant pools
12541 require 4 bytes of padding. Trying to do this later on a per-pool
12542 basis is awkward because existing pool entries have to be modified. */
12543 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12548 fprintf (dump_file,
12549 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12550 GET_MODE_NAME (mode),
12551 INSN_UID (insn), (unsigned long) address,
12552 -1 * (long)fix->backwards, (long)fix->forwards);
12553 arm_print_value (dump_file, fix->value);
12554 fprintf (dump_file, "\n");
12557 /* Add it to the chain of fixes. */
12560 if (minipool_fix_head != NULL)
12561 minipool_fix_tail->next = fix;
12563 minipool_fix_head = fix;
12565 minipool_fix_tail = fix;
12568 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12569 Returns the number of insns needed, or 99 if we don't know how to
12572 arm_const_double_inline_cost (rtx val)
12574 rtx lowpart, highpart;
12575 enum machine_mode mode;
12577 mode = GET_MODE (val);
12579 if (mode == VOIDmode)
12582 gcc_assert (GET_MODE_SIZE (mode) == 8);
12584 lowpart = gen_lowpart (SImode, val);
12585 highpart = gen_highpart_mode (SImode, mode, val);
12587 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12588 gcc_assert (GET_CODE (highpart) == CONST_INT);
12590 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12591 NULL_RTX, NULL_RTX, 0, 0)
12592 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12593 NULL_RTX, NULL_RTX, 0, 0));
12596 /* Return true if it is worthwhile to split a 64-bit constant into two
12597 32-bit operations. This is the case if optimizing for size, or
12598 if we have load delay slots, or if one 32-bit part can be done with
12599 a single data operation. */
12601 arm_const_double_by_parts (rtx val)
12603 enum machine_mode mode = GET_MODE (val);
12606 if (optimize_size || arm_ld_sched)
12609 if (mode == VOIDmode)
12612 part = gen_highpart_mode (SImode, mode, val);
12614 gcc_assert (GET_CODE (part) == CONST_INT);
12616 if (const_ok_for_arm (INTVAL (part))
12617 || const_ok_for_arm (~INTVAL (part)))
12620 part = gen_lowpart (SImode, val);
12622 gcc_assert (GET_CODE (part) == CONST_INT);
12624 if (const_ok_for_arm (INTVAL (part))
12625 || const_ok_for_arm (~INTVAL (part)))
12631 /* Return true if it is possible to inline both the high and low parts
12632 of a 64-bit constant into 32-bit data processing instructions. */
12634 arm_const_double_by_immediates (rtx val)
12636 enum machine_mode mode = GET_MODE (val);
12639 if (mode == VOIDmode)
12642 part = gen_highpart_mode (SImode, mode, val);
12644 gcc_assert (GET_CODE (part) == CONST_INT);
12646 if (!const_ok_for_arm (INTVAL (part)))
12649 part = gen_lowpart (SImode, val);
12651 gcc_assert (GET_CODE (part) == CONST_INT);
12653 if (!const_ok_for_arm (INTVAL (part)))
12659 /* Scan INSN and note any of its operands that need fixing.
12660 If DO_PUSHES is false we do not actually push any of the fixups
12661 needed. The function returns TRUE if any fixups were needed/pushed.
12662 This is used by arm_memory_load_p() which needs to know about loads
12663 of constants that will be converted into minipool loads. */
12665 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12667 bool result = false;
12670 extract_insn (insn);
12672 if (!constrain_operands (1))
12673 fatal_insn_not_found (insn);
12675 if (recog_data.n_alternatives == 0)
12678 /* Fill in recog_op_alt with information about the constraints of
12680 preprocess_constraints ();
12682 for (opno = 0; opno < recog_data.n_operands; opno++)
12684 /* Things we need to fix can only occur in inputs. */
12685 if (recog_data.operand_type[opno] != OP_IN)
12688 /* If this alternative is a memory reference, then any mention
12689 of constants in this alternative is really to fool reload
12690 into allowing us to accept one there. We need to fix them up
12691 now so that we output the right code. */
12692 if (recog_op_alt[opno][which_alternative].memory_ok)
12694 rtx op = recog_data.operand[opno];
12696 if (CONSTANT_P (op))
12699 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12700 recog_data.operand_mode[opno], op);
12703 else if (GET_CODE (op) == MEM
12704 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12705 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12709 rtx cop = avoid_constant_pool_reference (op);
12711 /* Casting the address of something to a mode narrower
12712 than a word can cause avoid_constant_pool_reference()
12713 to return the pool reference itself. That's no good to
12714 us here. Lets just hope that we can use the
12715 constant pool value directly. */
12717 cop = get_pool_constant (XEXP (op, 0));
12719 push_minipool_fix (insn, address,
12720 recog_data.operand_loc[opno],
12721 recog_data.operand_mode[opno], cop);
12732 /* Convert instructions to their cc-clobbering variant if possible, since
12733 that allows us to use smaller encodings. */
12736 thumb2_reorg (void)
12741 INIT_REG_SET (&live);
12743 /* We are freeing block_for_insn in the toplev to keep compatibility
12744 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12745 compute_bb_for_insn ();
12752 COPY_REG_SET (&live, DF_LR_OUT (bb));
12753 df_simulate_initialize_backwards (bb, &live);
12754 FOR_BB_INSNS_REVERSE (bb, insn)
12756 if (NONJUMP_INSN_P (insn)
12757 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12759 rtx pat = PATTERN (insn);
12760 if (GET_CODE (pat) == SET
12761 && low_register_operand (XEXP (pat, 0), SImode)
12762 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12763 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12764 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12766 rtx dst = XEXP (pat, 0);
12767 rtx src = XEXP (pat, 1);
12768 rtx op0 = XEXP (src, 0);
12769 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12770 ? XEXP (src, 1) : NULL);
12772 if (rtx_equal_p (dst, op0)
12773 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12775 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12776 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12777 rtvec vec = gen_rtvec (2, pat, clobber);
12779 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12780 INSN_CODE (insn) = -1;
12782 /* We can also handle a commutative operation where the
12783 second operand matches the destination. */
12784 else if (op1 && rtx_equal_p (dst, op1))
12786 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12787 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12790 src = copy_rtx (src);
12791 XEXP (src, 0) = op1;
12792 XEXP (src, 1) = op0;
12793 pat = gen_rtx_SET (VOIDmode, dst, src);
12794 vec = gen_rtvec (2, pat, clobber);
12795 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12796 INSN_CODE (insn) = -1;
12801 if (NONDEBUG_INSN_P (insn))
12802 df_simulate_one_insn_backwards (bb, insn, &live);
12806 CLEAR_REG_SET (&live);
12809 /* Gcc puts the pool in the wrong place for ARM, since we can only
12810 load addresses a limited distance around the pc. We do some
12811 special munging to move the constant pool values to the correct
12812 point in the code. */
12817 HOST_WIDE_INT address = 0;
12823 minipool_fix_head = minipool_fix_tail = NULL;
12825 /* The first insn must always be a note, or the code below won't
12826 scan it properly. */
12827 insn = get_insns ();
12828 gcc_assert (GET_CODE (insn) == NOTE);
12831 /* Scan all the insns and record the operands that will need fixing. */
12832 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12834 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12835 && (arm_cirrus_insn_p (insn)
12836 || GET_CODE (insn) == JUMP_INSN
12837 || arm_memory_load_p (insn)))
12838 cirrus_reorg (insn);
12840 if (GET_CODE (insn) == BARRIER)
12841 push_minipool_barrier (insn, address);
12842 else if (INSN_P (insn))
12846 note_invalid_constants (insn, address, true);
12847 address += get_attr_length (insn);
12849 /* If the insn is a vector jump, add the size of the table
12850 and skip the table. */
12851 if ((table = is_jump_table (insn)) != NULL)
12853 address += get_jump_table_size (table);
12857 else if (LABEL_P (insn))
12858 /* Add the worst-case padding due to alignment. We don't add
12859 the _current_ padding because the minipool insertions
12860 themselves might change it. */
12861 address += get_label_padding (insn);
12864 fix = minipool_fix_head;
12866 /* Now scan the fixups and perform the required changes. */
12871 Mfix * last_added_fix;
12872 Mfix * last_barrier = NULL;
12875 /* Skip any further barriers before the next fix. */
12876 while (fix && GET_CODE (fix->insn) == BARRIER)
12879 /* No more fixes. */
12883 last_added_fix = NULL;
12885 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12887 if (GET_CODE (ftmp->insn) == BARRIER)
12889 if (ftmp->address >= minipool_vector_head->max_address)
12892 last_barrier = ftmp;
12894 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12897 last_added_fix = ftmp; /* Keep track of the last fix added. */
12900 /* If we found a barrier, drop back to that; any fixes that we
12901 could have reached but come after the barrier will now go in
12902 the next mini-pool. */
12903 if (last_barrier != NULL)
12905 /* Reduce the refcount for those fixes that won't go into this
12907 for (fdel = last_barrier->next;
12908 fdel && fdel != ftmp;
12911 fdel->minipool->refcount--;
12912 fdel->minipool = NULL;
12915 ftmp = last_barrier;
12919 /* ftmp is first fix that we can't fit into this pool and
12920 there no natural barriers that we could use. Insert a
12921 new barrier in the code somewhere between the previous
12922 fix and this one, and arrange to jump around it. */
12923 HOST_WIDE_INT max_address;
12925 /* The last item on the list of fixes must be a barrier, so
12926 we can never run off the end of the list of fixes without
12927 last_barrier being set. */
12930 max_address = minipool_vector_head->max_address;
12931 /* Check that there isn't another fix that is in range that
12932 we couldn't fit into this pool because the pool was
12933 already too large: we need to put the pool before such an
12934 instruction. The pool itself may come just after the
12935 fix because create_fix_barrier also allows space for a
12936 jump instruction. */
12937 if (ftmp->address < max_address)
12938 max_address = ftmp->address + 1;
12940 last_barrier = create_fix_barrier (last_added_fix, max_address);
12943 assign_minipool_offsets (last_barrier);
12947 if (GET_CODE (ftmp->insn) != BARRIER
12948 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12955 /* Scan over the fixes we have identified for this pool, fixing them
12956 up and adding the constants to the pool itself. */
12957 for (this_fix = fix; this_fix && ftmp != this_fix;
12958 this_fix = this_fix->next)
12959 if (GET_CODE (this_fix->insn) != BARRIER)
12962 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12963 minipool_vector_label),
12964 this_fix->minipool->offset);
12965 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12968 dump_minipool (last_barrier->insn);
12972 /* From now on we must synthesize any constants that we can't handle
12973 directly. This can happen if the RTL gets split during final
12974 instruction generation. */
12975 after_arm_reorg = 1;
12977 /* Free the minipool memory. */
12978 obstack_free (&minipool_obstack, minipool_startobj);
12981 /* Routines to output assembly language. */
12983 /* If the rtx is the correct value then return the string of the number.
12984 In this way we can ensure that valid double constants are generated even
12985 when cross compiling. */
12987 fp_immediate_constant (rtx x)
12992 if (!fp_consts_inited)
12995 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12996 for (i = 0; i < 8; i++)
12997 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12998 return strings_fp[i];
13000 gcc_unreachable ();
13003 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13004 static const char *
13005 fp_const_from_val (REAL_VALUE_TYPE *r)
13009 if (!fp_consts_inited)
13012 for (i = 0; i < 8; i++)
13013 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13014 return strings_fp[i];
13016 gcc_unreachable ();
13019 /* Output the operands of a LDM/STM instruction to STREAM.
13020 MASK is the ARM register set mask of which only bits 0-15 are important.
13021 REG is the base register, either the frame pointer or the stack pointer,
13022 INSTR is the possibly suffixed load or store instruction.
13023 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13026 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13027 unsigned long mask, int rfe)
13030 bool not_first = FALSE;
13032 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13033 fputc ('\t', stream);
13034 asm_fprintf (stream, instr, reg);
13035 fputc ('{', stream);
13037 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13038 if (mask & (1 << i))
13041 fprintf (stream, ", ");
13043 asm_fprintf (stream, "%r", i);
13048 fprintf (stream, "}^\n");
13050 fprintf (stream, "}\n");
13054 /* Output a FLDMD instruction to STREAM.
13055 BASE if the register containing the address.
13056 REG and COUNT specify the register range.
13057 Extra registers may be added to avoid hardware bugs.
13059 We output FLDMD even for ARMv5 VFP implementations. Although
13060 FLDMD is technically not supported until ARMv6, it is believed
13061 that all VFP implementations support its use in this context. */
13064 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13068 /* Workaround ARM10 VFPr1 bug. */
13069 if (count == 2 && !arm_arch6)
13076 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13077 load into multiple parts if we have to handle more than 16 registers. */
13080 vfp_output_fldmd (stream, base, reg, 16);
13081 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13085 fputc ('\t', stream);
13086 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13088 for (i = reg; i < reg + count; i++)
13091 fputs (", ", stream);
13092 asm_fprintf (stream, "d%d", i);
13094 fputs ("}\n", stream);
13099 /* Output the assembly for a store multiple. */
13102 vfp_output_fstmd (rtx * operands)
13109 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13110 p = strlen (pattern);
13112 gcc_assert (GET_CODE (operands[1]) == REG);
13114 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13115 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13117 p += sprintf (&pattern[p], ", d%d", base + i);
13119 strcpy (&pattern[p], "}");
13121 output_asm_insn (pattern, operands);
13126 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13127 number of bytes pushed. */
13130 vfp_emit_fstmd (int base_reg, int count)
13137 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13138 register pairs are stored by a store multiple insn. We avoid this
13139 by pushing an extra pair. */
13140 if (count == 2 && !arm_arch6)
13142 if (base_reg == LAST_VFP_REGNUM - 3)
13147 /* FSTMD may not store more than 16 doubleword registers at once. Split
13148 larger stores into multiple parts (up to a maximum of two, in
13153 /* NOTE: base_reg is an internal register number, so each D register
13155 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13156 saved += vfp_emit_fstmd (base_reg, 16);
13160 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13161 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13163 reg = gen_rtx_REG (DFmode, base_reg);
13166 XVECEXP (par, 0, 0)
13167 = gen_rtx_SET (VOIDmode,
13170 gen_rtx_PRE_MODIFY (Pmode,
13173 (stack_pointer_rtx,
13176 gen_rtx_UNSPEC (BLKmode,
13177 gen_rtvec (1, reg),
13178 UNSPEC_PUSH_MULT));
13180 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13181 plus_constant (stack_pointer_rtx, -(count * 8)));
13182 RTX_FRAME_RELATED_P (tmp) = 1;
13183 XVECEXP (dwarf, 0, 0) = tmp;
13185 tmp = gen_rtx_SET (VOIDmode,
13186 gen_frame_mem (DFmode, stack_pointer_rtx),
13188 RTX_FRAME_RELATED_P (tmp) = 1;
13189 XVECEXP (dwarf, 0, 1) = tmp;
13191 for (i = 1; i < count; i++)
13193 reg = gen_rtx_REG (DFmode, base_reg);
13195 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13197 tmp = gen_rtx_SET (VOIDmode,
13198 gen_frame_mem (DFmode,
13199 plus_constant (stack_pointer_rtx,
13202 RTX_FRAME_RELATED_P (tmp) = 1;
13203 XVECEXP (dwarf, 0, i + 1) = tmp;
13206 par = emit_insn (par);
13207 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13208 RTX_FRAME_RELATED_P (par) = 1;
13213 /* Emit a call instruction with pattern PAT. ADDR is the address of
13214 the call target. */
13217 arm_emit_call_insn (rtx pat, rtx addr)
13221 insn = emit_call_insn (pat);
13223 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13224 If the call might use such an entry, add a use of the PIC register
13225 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13226 if (TARGET_VXWORKS_RTP
13228 && GET_CODE (addr) == SYMBOL_REF
13229 && (SYMBOL_REF_DECL (addr)
13230 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13231 : !SYMBOL_REF_LOCAL_P (addr)))
13233 require_pic_register ();
13234 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13238 /* Output a 'call' insn. */
13240 output_call (rtx *operands)
13242 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13244 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13245 if (REGNO (operands[0]) == LR_REGNUM)
13247 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13248 output_asm_insn ("mov%?\t%0, %|lr", operands);
13251 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13253 if (TARGET_INTERWORK || arm_arch4t)
13254 output_asm_insn ("bx%?\t%0", operands);
13256 output_asm_insn ("mov%?\t%|pc, %0", operands);
13261 /* Output a 'call' insn that is a reference in memory. This is
13262 disabled for ARMv5 and we prefer a blx instead because otherwise
13263 there's a significant performance overhead. */
13265 output_call_mem (rtx *operands)
13267 gcc_assert (!arm_arch5);
13268 if (TARGET_INTERWORK)
13270 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13271 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13272 output_asm_insn ("bx%?\t%|ip", operands);
13274 else if (regno_use_in (LR_REGNUM, operands[0]))
13276 /* LR is used in the memory address. We load the address in the
13277 first instruction. It's safe to use IP as the target of the
13278 load since the call will kill it anyway. */
13279 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13280 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13282 output_asm_insn ("bx%?\t%|ip", operands);
13284 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13288 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13289 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13296 /* Output a move from arm registers to an fpa registers.
13297 OPERANDS[0] is an fpa register.
13298 OPERANDS[1] is the first registers of an arm register pair. */
13300 output_mov_long_double_fpa_from_arm (rtx *operands)
13302 int arm_reg0 = REGNO (operands[1]);
13305 gcc_assert (arm_reg0 != IP_REGNUM);
13307 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13308 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13309 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13311 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13312 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13317 /* Output a move from an fpa register to arm registers.
13318 OPERANDS[0] is the first registers of an arm register pair.
13319 OPERANDS[1] is an fpa register. */
13321 output_mov_long_double_arm_from_fpa (rtx *operands)
13323 int arm_reg0 = REGNO (operands[0]);
13326 gcc_assert (arm_reg0 != IP_REGNUM);
13328 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13329 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13330 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13332 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13333 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13337 /* Output a move from arm registers to arm registers of a long double
13338 OPERANDS[0] is the destination.
13339 OPERANDS[1] is the source. */
13341 output_mov_long_double_arm_from_arm (rtx *operands)
13343 /* We have to be careful here because the two might overlap. */
13344 int dest_start = REGNO (operands[0]);
13345 int src_start = REGNO (operands[1]);
13349 if (dest_start < src_start)
13351 for (i = 0; i < 3; i++)
13353 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13354 ops[1] = gen_rtx_REG (SImode, src_start + i);
13355 output_asm_insn ("mov%?\t%0, %1", ops);
13360 for (i = 2; i >= 0; i--)
13362 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13363 ops[1] = gen_rtx_REG (SImode, src_start + i);
13364 output_asm_insn ("mov%?\t%0, %1", ops);
13372 arm_emit_movpair (rtx dest, rtx src)
13374 /* If the src is an immediate, simplify it. */
13375 if (CONST_INT_P (src))
13377 HOST_WIDE_INT val = INTVAL (src);
13378 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13379 if ((val >> 16) & 0x0000ffff)
13380 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13382 GEN_INT ((val >> 16) & 0x0000ffff));
13385 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13386 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13389 /* Output a move from arm registers to an fpa registers.
13390 OPERANDS[0] is an fpa register.
13391 OPERANDS[1] is the first registers of an arm register pair. */
13393 output_mov_double_fpa_from_arm (rtx *operands)
13395 int arm_reg0 = REGNO (operands[1]);
13398 gcc_assert (arm_reg0 != IP_REGNUM);
13400 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13401 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13402 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13403 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13407 /* Output a move from an fpa register to arm registers.
13408 OPERANDS[0] is the first registers of an arm register pair.
13409 OPERANDS[1] is an fpa register. */
13411 output_mov_double_arm_from_fpa (rtx *operands)
13413 int arm_reg0 = REGNO (operands[0]);
13416 gcc_assert (arm_reg0 != IP_REGNUM);
13418 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13419 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13420 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13421 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13425 /* Output a move between double words. It must be REG<-MEM
13428 output_move_double (rtx *operands, bool emit, int *count)
13430 enum rtx_code code0 = GET_CODE (operands[0]);
13431 enum rtx_code code1 = GET_CODE (operands[1]);
13436 /* The only case when this might happen is when
13437 you are looking at the length of a DImode instruction
13438 that has an invalid constant in it. */
13439 if (code0 == REG && code1 != MEM)
13441 gcc_assert (!emit);
13449 unsigned int reg0 = REGNO (operands[0]);
13451 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13453 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13455 switch (GET_CODE (XEXP (operands[1], 0)))
13462 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13463 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13465 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13470 gcc_assert (TARGET_LDRD);
13472 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13480 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13482 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13491 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13493 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13498 gcc_assert (TARGET_LDRD);
13500 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13505 /* Autoicrement addressing modes should never have overlapping
13506 base and destination registers, and overlapping index registers
13507 are already prohibited, so this doesn't need to worry about
13509 otherops[0] = operands[0];
13510 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13511 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13513 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13515 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13517 /* Registers overlap so split out the increment. */
13520 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13521 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13528 /* Use a single insn if we can.
13529 FIXME: IWMMXT allows offsets larger than ldrd can
13530 handle, fix these up with a pair of ldr. */
13532 || GET_CODE (otherops[2]) != CONST_INT
13533 || (INTVAL (otherops[2]) > -256
13534 && INTVAL (otherops[2]) < 256))
13537 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13543 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13544 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13554 /* Use a single insn if we can.
13555 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13556 fix these up with a pair of ldr. */
13558 || GET_CODE (otherops[2]) != CONST_INT
13559 || (INTVAL (otherops[2]) > -256
13560 && INTVAL (otherops[2]) < 256))
13563 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13569 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13570 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13580 /* We might be able to use ldrd %0, %1 here. However the range is
13581 different to ldr/adr, and it is broken on some ARMv7-M
13582 implementations. */
13583 /* Use the second register of the pair to avoid problematic
13585 otherops[1] = operands[1];
13587 output_asm_insn ("adr%?\t%0, %1", otherops);
13588 operands[1] = otherops[0];
13592 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13594 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13601 /* ??? This needs checking for thumb2. */
13603 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13604 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13606 otherops[0] = operands[0];
13607 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13608 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13610 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13612 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13614 switch ((int) INTVAL (otherops[2]))
13618 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13624 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13630 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13634 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13635 operands[1] = otherops[0];
13637 && (GET_CODE (otherops[2]) == REG
13639 || (GET_CODE (otherops[2]) == CONST_INT
13640 && INTVAL (otherops[2]) > -256
13641 && INTVAL (otherops[2]) < 256)))
13643 if (reg_overlap_mentioned_p (operands[0],
13647 /* Swap base and index registers over to
13648 avoid a conflict. */
13650 otherops[1] = otherops[2];
13653 /* If both registers conflict, it will usually
13654 have been fixed by a splitter. */
13655 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13656 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13660 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13661 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13668 otherops[0] = operands[0];
13670 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13675 if (GET_CODE (otherops[2]) == CONST_INT)
13679 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13680 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13682 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13689 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13695 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13699 return "ldr%(d%)\t%0, [%1]";
13701 return "ldm%(ia%)\t%1, %M0";
13705 otherops[1] = adjust_address (operands[1], SImode, 4);
13706 /* Take care of overlapping base/data reg. */
13707 if (reg_mentioned_p (operands[0], operands[1]))
13711 output_asm_insn ("ldr%?\t%0, %1", otherops);
13712 output_asm_insn ("ldr%?\t%0, %1", operands);
13722 output_asm_insn ("ldr%?\t%0, %1", operands);
13723 output_asm_insn ("ldr%?\t%0, %1", otherops);
13733 /* Constraints should ensure this. */
13734 gcc_assert (code0 == MEM && code1 == REG);
13735 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13737 switch (GET_CODE (XEXP (operands[0], 0)))
13743 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13745 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13750 gcc_assert (TARGET_LDRD);
13752 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13759 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13761 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13769 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13771 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13776 gcc_assert (TARGET_LDRD);
13778 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13783 otherops[0] = operands[1];
13784 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13785 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13787 /* IWMMXT allows offsets larger than ldrd can handle,
13788 fix these up with a pair of ldr. */
13790 && GET_CODE (otherops[2]) == CONST_INT
13791 && (INTVAL(otherops[2]) <= -256
13792 || INTVAL(otherops[2]) >= 256))
13794 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13798 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13799 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13808 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13809 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13815 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13818 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13823 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13828 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13829 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13831 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13835 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13842 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13849 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13854 && (GET_CODE (otherops[2]) == REG
13856 || (GET_CODE (otherops[2]) == CONST_INT
13857 && INTVAL (otherops[2]) > -256
13858 && INTVAL (otherops[2]) < 256)))
13860 otherops[0] = operands[1];
13861 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13863 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13869 otherops[0] = adjust_address (operands[0], SImode, 4);
13870 otherops[1] = operands[1];
13873 output_asm_insn ("str%?\t%1, %0", operands);
13874 output_asm_insn ("str%?\t%H1, %0", otherops);
13885 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13886 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13889 output_move_quad (rtx *operands)
13891 if (REG_P (operands[0]))
13893 /* Load, or reg->reg move. */
13895 if (MEM_P (operands[1]))
13897 switch (GET_CODE (XEXP (operands[1], 0)))
13900 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13905 output_asm_insn ("adr%?\t%0, %1", operands);
13906 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13910 gcc_unreachable ();
13918 gcc_assert (REG_P (operands[1]));
13920 dest = REGNO (operands[0]);
13921 src = REGNO (operands[1]);
13923 /* This seems pretty dumb, but hopefully GCC won't try to do it
13926 for (i = 0; i < 4; i++)
13928 ops[0] = gen_rtx_REG (SImode, dest + i);
13929 ops[1] = gen_rtx_REG (SImode, src + i);
13930 output_asm_insn ("mov%?\t%0, %1", ops);
13933 for (i = 3; i >= 0; i--)
13935 ops[0] = gen_rtx_REG (SImode, dest + i);
13936 ops[1] = gen_rtx_REG (SImode, src + i);
13937 output_asm_insn ("mov%?\t%0, %1", ops);
13943 gcc_assert (MEM_P (operands[0]));
13944 gcc_assert (REG_P (operands[1]));
13945 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13947 switch (GET_CODE (XEXP (operands[0], 0)))
13950 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13954 gcc_unreachable ();
13961 /* Output a VFP load or store instruction. */
13964 output_move_vfp (rtx *operands)
13966 rtx reg, mem, addr, ops[2];
13967 int load = REG_P (operands[0]);
13968 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13969 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13972 enum machine_mode mode;
13974 reg = operands[!load];
13975 mem = operands[load];
13977 mode = GET_MODE (reg);
13979 gcc_assert (REG_P (reg));
13980 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13981 gcc_assert (mode == SFmode
13985 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13986 gcc_assert (MEM_P (mem));
13988 addr = XEXP (mem, 0);
13990 switch (GET_CODE (addr))
13993 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13994 ops[0] = XEXP (addr, 0);
13999 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14000 ops[0] = XEXP (addr, 0);
14005 templ = "f%s%c%%?\t%%%s0, %%1%s";
14011 sprintf (buff, templ,
14012 load ? "ld" : "st",
14015 integer_p ? "\t%@ int" : "");
14016 output_asm_insn (buff, ops);
14021 /* Output a Neon quad-word load or store, or a load or store for
14022 larger structure modes.
14024 WARNING: The ordering of elements is weird in big-endian mode,
14025 because we use VSTM, as required by the EABI. GCC RTL defines
14026 element ordering based on in-memory order. This can be differ
14027 from the architectural ordering of elements within a NEON register.
14028 The intrinsics defined in arm_neon.h use the NEON register element
14029 ordering, not the GCC RTL element ordering.
14031 For example, the in-memory ordering of a big-endian a quadword
14032 vector with 16-bit elements when stored from register pair {d0,d1}
14033 will be (lowest address first, d0[N] is NEON register element N):
14035 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14037 When necessary, quadword registers (dN, dN+1) are moved to ARM
14038 registers from rN in the order:
14040 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14042 So that STM/LDM can be used on vectors in ARM registers, and the
14043 same memory layout will result as if VSTM/VLDM were used. */
14046 output_move_neon (rtx *operands)
14048 rtx reg, mem, addr, ops[2];
14049 int regno, load = REG_P (operands[0]);
14052 enum machine_mode mode;
14054 reg = operands[!load];
14055 mem = operands[load];
14057 mode = GET_MODE (reg);
14059 gcc_assert (REG_P (reg));
14060 regno = REGNO (reg);
14061 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14062 || NEON_REGNO_OK_FOR_QUAD (regno));
14063 gcc_assert (VALID_NEON_DREG_MODE (mode)
14064 || VALID_NEON_QREG_MODE (mode)
14065 || VALID_NEON_STRUCT_MODE (mode));
14066 gcc_assert (MEM_P (mem));
14068 addr = XEXP (mem, 0);
14070 /* Strip off const from addresses like (const (plus (...))). */
14071 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14072 addr = XEXP (addr, 0);
14074 switch (GET_CODE (addr))
14077 templ = "v%smia%%?\t%%0!, %%h1";
14078 ops[0] = XEXP (addr, 0);
14083 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14084 templ = "v%smdb%%?\t%%0!, %%h1";
14085 ops[0] = XEXP (addr, 0);
14090 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14091 gcc_unreachable ();
14096 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14099 for (i = 0; i < nregs; i++)
14101 /* We're only using DImode here because it's a convenient size. */
14102 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14103 ops[1] = adjust_address (mem, DImode, 8 * i);
14104 if (reg_overlap_mentioned_p (ops[0], mem))
14106 gcc_assert (overlap == -1);
14111 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14112 output_asm_insn (buff, ops);
14117 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14118 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14119 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14120 output_asm_insn (buff, ops);
14127 templ = "v%smia%%?\t%%m0, %%h1";
14132 sprintf (buff, templ, load ? "ld" : "st");
14133 output_asm_insn (buff, ops);
14138 /* Compute and return the length of neon_mov<mode>, where <mode> is
14139 one of VSTRUCT modes: EI, OI, CI or XI. */
14141 arm_attr_length_move_neon (rtx insn)
14143 rtx reg, mem, addr;
14145 enum machine_mode mode;
14147 extract_insn_cached (insn);
14149 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14151 mode = GET_MODE (recog_data.operand[0]);
14162 gcc_unreachable ();
14166 load = REG_P (recog_data.operand[0]);
14167 reg = recog_data.operand[!load];
14168 mem = recog_data.operand[load];
14170 gcc_assert (MEM_P (mem));
14172 mode = GET_MODE (reg);
14173 addr = XEXP (mem, 0);
14175 /* Strip off const from addresses like (const (plus (...))). */
14176 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14177 addr = XEXP (addr, 0);
14179 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14181 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14188 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14192 arm_address_offset_is_imm (rtx insn)
14196 extract_insn_cached (insn);
14198 if (REG_P (recog_data.operand[0]))
14201 mem = recog_data.operand[0];
14203 gcc_assert (MEM_P (mem));
14205 addr = XEXP (mem, 0);
14207 if (GET_CODE (addr) == REG
14208 || (GET_CODE (addr) == PLUS
14209 && GET_CODE (XEXP (addr, 0)) == REG
14210 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14216 /* Output an ADD r, s, #n where n may be too big for one instruction.
14217 If adding zero to one register, output nothing. */
14219 output_add_immediate (rtx *operands)
14221 HOST_WIDE_INT n = INTVAL (operands[2]);
14223 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14226 output_multi_immediate (operands,
14227 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14230 output_multi_immediate (operands,
14231 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14238 /* Output a multiple immediate operation.
14239 OPERANDS is the vector of operands referred to in the output patterns.
14240 INSTR1 is the output pattern to use for the first constant.
14241 INSTR2 is the output pattern to use for subsequent constants.
14242 IMMED_OP is the index of the constant slot in OPERANDS.
14243 N is the constant value. */
14244 static const char *
14245 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14246 int immed_op, HOST_WIDE_INT n)
14248 #if HOST_BITS_PER_WIDE_INT > 32
14254 /* Quick and easy output. */
14255 operands[immed_op] = const0_rtx;
14256 output_asm_insn (instr1, operands);
14261 const char * instr = instr1;
14263 /* Note that n is never zero here (which would give no output). */
14264 for (i = 0; i < 32; i += 2)
14268 operands[immed_op] = GEN_INT (n & (255 << i));
14269 output_asm_insn (instr, operands);
14279 /* Return the name of a shifter operation. */
14280 static const char *
14281 arm_shift_nmem(enum rtx_code code)
14286 return ARM_LSL_NAME;
14302 /* Return the appropriate ARM instruction for the operation code.
14303 The returned result should not be overwritten. OP is the rtx of the
14304 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14307 arithmetic_instr (rtx op, int shift_first_arg)
14309 switch (GET_CODE (op))
14315 return shift_first_arg ? "rsb" : "sub";
14330 return arm_shift_nmem(GET_CODE(op));
14333 gcc_unreachable ();
14337 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14338 for the operation code. The returned result should not be overwritten.
14339 OP is the rtx code of the shift.
14340 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14342 static const char *
14343 shift_op (rtx op, HOST_WIDE_INT *amountp)
14346 enum rtx_code code = GET_CODE (op);
14348 switch (GET_CODE (XEXP (op, 1)))
14356 *amountp = INTVAL (XEXP (op, 1));
14360 gcc_unreachable ();
14366 gcc_assert (*amountp != -1);
14367 *amountp = 32 - *amountp;
14370 /* Fall through. */
14376 mnem = arm_shift_nmem(code);
14380 /* We never have to worry about the amount being other than a
14381 power of 2, since this case can never be reloaded from a reg. */
14382 gcc_assert (*amountp != -1);
14383 *amountp = int_log2 (*amountp);
14384 return ARM_LSL_NAME;
14387 gcc_unreachable ();
14390 if (*amountp != -1)
14392 /* This is not 100% correct, but follows from the desire to merge
14393 multiplication by a power of 2 with the recognizer for a
14394 shift. >=32 is not a valid shift for "lsl", so we must try and
14395 output a shift that produces the correct arithmetical result.
14396 Using lsr #32 is identical except for the fact that the carry bit
14397 is not set correctly if we set the flags; but we never use the
14398 carry bit from such an operation, so we can ignore that. */
14399 if (code == ROTATERT)
14400 /* Rotate is just modulo 32. */
14402 else if (*amountp != (*amountp & 31))
14404 if (code == ASHIFT)
14409 /* Shifts of 0 are no-ops. */
14417 /* Obtain the shift from the POWER of two. */
14419 static HOST_WIDE_INT
14420 int_log2 (HOST_WIDE_INT power)
14422 HOST_WIDE_INT shift = 0;
14424 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14426 gcc_assert (shift <= 31);
14433 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14434 because /bin/as is horribly restrictive. The judgement about
14435 whether or not each character is 'printable' (and can be output as
14436 is) or not (and must be printed with an octal escape) must be made
14437 with reference to the *host* character set -- the situation is
14438 similar to that discussed in the comments above pp_c_char in
14439 c-pretty-print.c. */
14441 #define MAX_ASCII_LEN 51
14444 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14447 int len_so_far = 0;
14449 fputs ("\t.ascii\t\"", stream);
14451 for (i = 0; i < len; i++)
14455 if (len_so_far >= MAX_ASCII_LEN)
14457 fputs ("\"\n\t.ascii\t\"", stream);
14463 if (c == '\\' || c == '\"')
14465 putc ('\\', stream);
14473 fprintf (stream, "\\%03o", c);
14478 fputs ("\"\n", stream);
14481 /* Compute the register save mask for registers 0 through 12
14482 inclusive. This code is used by arm_compute_save_reg_mask. */
14484 static unsigned long
14485 arm_compute_save_reg0_reg12_mask (void)
14487 unsigned long func_type = arm_current_func_type ();
14488 unsigned long save_reg_mask = 0;
14491 if (IS_INTERRUPT (func_type))
14493 unsigned int max_reg;
14494 /* Interrupt functions must not corrupt any registers,
14495 even call clobbered ones. If this is a leaf function
14496 we can just examine the registers used by the RTL, but
14497 otherwise we have to assume that whatever function is
14498 called might clobber anything, and so we have to save
14499 all the call-clobbered registers as well. */
14500 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14501 /* FIQ handlers have registers r8 - r12 banked, so
14502 we only need to check r0 - r7, Normal ISRs only
14503 bank r14 and r15, so we must check up to r12.
14504 r13 is the stack pointer which is always preserved,
14505 so we do not need to consider it here. */
14510 for (reg = 0; reg <= max_reg; reg++)
14511 if (df_regs_ever_live_p (reg)
14512 || (! current_function_is_leaf && call_used_regs[reg]))
14513 save_reg_mask |= (1 << reg);
14515 /* Also save the pic base register if necessary. */
14517 && !TARGET_SINGLE_PIC_BASE
14518 && arm_pic_register != INVALID_REGNUM
14519 && crtl->uses_pic_offset_table)
14520 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14522 else if (IS_VOLATILE(func_type))
14524 /* For noreturn functions we historically omitted register saves
14525 altogether. However this really messes up debugging. As a
14526 compromise save just the frame pointers. Combined with the link
14527 register saved elsewhere this should be sufficient to get
14529 if (frame_pointer_needed)
14530 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14531 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14532 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14533 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14534 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14538 /* In the normal case we only need to save those registers
14539 which are call saved and which are used by this function. */
14540 for (reg = 0; reg <= 11; reg++)
14541 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14542 save_reg_mask |= (1 << reg);
14544 /* Handle the frame pointer as a special case. */
14545 if (frame_pointer_needed)
14546 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14548 /* If we aren't loading the PIC register,
14549 don't stack it even though it may be live. */
14551 && !TARGET_SINGLE_PIC_BASE
14552 && arm_pic_register != INVALID_REGNUM
14553 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14554 || crtl->uses_pic_offset_table))
14555 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14557 /* The prologue will copy SP into R0, so save it. */
14558 if (IS_STACKALIGN (func_type))
14559 save_reg_mask |= 1;
14562 /* Save registers so the exception handler can modify them. */
14563 if (crtl->calls_eh_return)
14569 reg = EH_RETURN_DATA_REGNO (i);
14570 if (reg == INVALID_REGNUM)
14572 save_reg_mask |= 1 << reg;
14576 return save_reg_mask;
14580 /* Compute the number of bytes used to store the static chain register on the
14581 stack, above the stack frame. We need to know this accurately to get the
14582 alignment of the rest of the stack frame correct. */
14584 static int arm_compute_static_chain_stack_bytes (void)
14586 unsigned long func_type = arm_current_func_type ();
14587 int static_chain_stack_bytes = 0;
14589 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14590 IS_NESTED (func_type) &&
14591 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14592 static_chain_stack_bytes = 4;
14594 return static_chain_stack_bytes;
14598 /* Compute a bit mask of which registers need to be
14599 saved on the stack for the current function.
14600 This is used by arm_get_frame_offsets, which may add extra registers. */
14602 static unsigned long
14603 arm_compute_save_reg_mask (void)
14605 unsigned int save_reg_mask = 0;
14606 unsigned long func_type = arm_current_func_type ();
14609 if (IS_NAKED (func_type))
14610 /* This should never really happen. */
14613 /* If we are creating a stack frame, then we must save the frame pointer,
14614 IP (which will hold the old stack pointer), LR and the PC. */
14615 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14617 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14620 | (1 << PC_REGNUM);
14622 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14624 /* Decide if we need to save the link register.
14625 Interrupt routines have their own banked link register,
14626 so they never need to save it.
14627 Otherwise if we do not use the link register we do not need to save
14628 it. If we are pushing other registers onto the stack however, we
14629 can save an instruction in the epilogue by pushing the link register
14630 now and then popping it back into the PC. This incurs extra memory
14631 accesses though, so we only do it when optimizing for size, and only
14632 if we know that we will not need a fancy return sequence. */
14633 if (df_regs_ever_live_p (LR_REGNUM)
14636 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14637 && !crtl->calls_eh_return))
14638 save_reg_mask |= 1 << LR_REGNUM;
14640 if (cfun->machine->lr_save_eliminated)
14641 save_reg_mask &= ~ (1 << LR_REGNUM);
14643 if (TARGET_REALLY_IWMMXT
14644 && ((bit_count (save_reg_mask)
14645 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14646 arm_compute_static_chain_stack_bytes())
14649 /* The total number of registers that are going to be pushed
14650 onto the stack is odd. We need to ensure that the stack
14651 is 64-bit aligned before we start to save iWMMXt registers,
14652 and also before we start to create locals. (A local variable
14653 might be a double or long long which we will load/store using
14654 an iWMMXt instruction). Therefore we need to push another
14655 ARM register, so that the stack will be 64-bit aligned. We
14656 try to avoid using the arg registers (r0 -r3) as they might be
14657 used to pass values in a tail call. */
14658 for (reg = 4; reg <= 12; reg++)
14659 if ((save_reg_mask & (1 << reg)) == 0)
14663 save_reg_mask |= (1 << reg);
14666 cfun->machine->sibcall_blocked = 1;
14667 save_reg_mask |= (1 << 3);
14671 /* We may need to push an additional register for use initializing the
14672 PIC base register. */
14673 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14674 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14676 reg = thumb_find_work_register (1 << 4);
14677 if (!call_used_regs[reg])
14678 save_reg_mask |= (1 << reg);
14681 return save_reg_mask;
14685 /* Compute a bit mask of which registers need to be
14686 saved on the stack for the current function. */
14687 static unsigned long
14688 thumb1_compute_save_reg_mask (void)
14690 unsigned long mask;
14694 for (reg = 0; reg < 12; reg ++)
14695 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14699 && !TARGET_SINGLE_PIC_BASE
14700 && arm_pic_register != INVALID_REGNUM
14701 && crtl->uses_pic_offset_table)
14702 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14704 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14705 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14706 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14708 /* LR will also be pushed if any lo regs are pushed. */
14709 if (mask & 0xff || thumb_force_lr_save ())
14710 mask |= (1 << LR_REGNUM);
14712 /* Make sure we have a low work register if we need one.
14713 We will need one if we are going to push a high register,
14714 but we are not currently intending to push a low register. */
14715 if ((mask & 0xff) == 0
14716 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14718 /* Use thumb_find_work_register to choose which register
14719 we will use. If the register is live then we will
14720 have to push it. Use LAST_LO_REGNUM as our fallback
14721 choice for the register to select. */
14722 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14723 /* Make sure the register returned by thumb_find_work_register is
14724 not part of the return value. */
14725 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14726 reg = LAST_LO_REGNUM;
14728 if (! call_used_regs[reg])
14732 /* The 504 below is 8 bytes less than 512 because there are two possible
14733 alignment words. We can't tell here if they will be present or not so we
14734 have to play it safe and assume that they are. */
14735 if ((CALLER_INTERWORKING_SLOT_SIZE +
14736 ROUND_UP_WORD (get_frame_size ()) +
14737 crtl->outgoing_args_size) >= 504)
14739 /* This is the same as the code in thumb1_expand_prologue() which
14740 determines which register to use for stack decrement. */
14741 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14742 if (mask & (1 << reg))
14745 if (reg > LAST_LO_REGNUM)
14747 /* Make sure we have a register available for stack decrement. */
14748 mask |= 1 << LAST_LO_REGNUM;
14756 /* Return the number of bytes required to save VFP registers. */
14758 arm_get_vfp_saved_size (void)
14760 unsigned int regno;
14765 /* Space for saved VFP registers. */
14766 if (TARGET_HARD_FLOAT && TARGET_VFP)
14769 for (regno = FIRST_VFP_REGNUM;
14770 regno < LAST_VFP_REGNUM;
14773 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14774 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14778 /* Workaround ARM10 VFPr1 bug. */
14779 if (count == 2 && !arm_arch6)
14781 saved += count * 8;
14790 if (count == 2 && !arm_arch6)
14792 saved += count * 8;
14799 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14800 everything bar the final return instruction. */
14802 output_return_instruction (rtx operand, int really_return, int reverse)
14804 char conditional[10];
14807 unsigned long live_regs_mask;
14808 unsigned long func_type;
14809 arm_stack_offsets *offsets;
14811 func_type = arm_current_func_type ();
14813 if (IS_NAKED (func_type))
14816 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14818 /* If this function was declared non-returning, and we have
14819 found a tail call, then we have to trust that the called
14820 function won't return. */
14825 /* Otherwise, trap an attempted return by aborting. */
14827 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14829 assemble_external_libcall (ops[1]);
14830 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14836 gcc_assert (!cfun->calls_alloca || really_return);
14838 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14840 cfun->machine->return_used_this_function = 1;
14842 offsets = arm_get_frame_offsets ();
14843 live_regs_mask = offsets->saved_regs_mask;
14845 if (live_regs_mask)
14847 const char * return_reg;
14849 /* If we do not have any special requirements for function exit
14850 (e.g. interworking) then we can load the return address
14851 directly into the PC. Otherwise we must load it into LR. */
14853 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14854 return_reg = reg_names[PC_REGNUM];
14856 return_reg = reg_names[LR_REGNUM];
14858 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14860 /* There are three possible reasons for the IP register
14861 being saved. 1) a stack frame was created, in which case
14862 IP contains the old stack pointer, or 2) an ISR routine
14863 corrupted it, or 3) it was saved to align the stack on
14864 iWMMXt. In case 1, restore IP into SP, otherwise just
14866 if (frame_pointer_needed)
14868 live_regs_mask &= ~ (1 << IP_REGNUM);
14869 live_regs_mask |= (1 << SP_REGNUM);
14872 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14875 /* On some ARM architectures it is faster to use LDR rather than
14876 LDM to load a single register. On other architectures, the
14877 cost is the same. In 26 bit mode, or for exception handlers,
14878 we have to use LDM to load the PC so that the CPSR is also
14880 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14881 if (live_regs_mask == (1U << reg))
14884 if (reg <= LAST_ARM_REGNUM
14885 && (reg != LR_REGNUM
14887 || ! IS_INTERRUPT (func_type)))
14889 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14890 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14897 /* Generate the load multiple instruction to restore the
14898 registers. Note we can get here, even if
14899 frame_pointer_needed is true, but only if sp already
14900 points to the base of the saved core registers. */
14901 if (live_regs_mask & (1 << SP_REGNUM))
14903 unsigned HOST_WIDE_INT stack_adjust;
14905 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14906 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14908 if (stack_adjust && arm_arch5 && TARGET_ARM)
14909 if (TARGET_UNIFIED_ASM)
14910 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14912 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14915 /* If we can't use ldmib (SA110 bug),
14916 then try to pop r3 instead. */
14918 live_regs_mask |= 1 << 3;
14920 if (TARGET_UNIFIED_ASM)
14921 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14923 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14927 if (TARGET_UNIFIED_ASM)
14928 sprintf (instr, "pop%s\t{", conditional);
14930 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14932 p = instr + strlen (instr);
14934 for (reg = 0; reg <= SP_REGNUM; reg++)
14935 if (live_regs_mask & (1 << reg))
14937 int l = strlen (reg_names[reg]);
14943 memcpy (p, ", ", 2);
14947 memcpy (p, "%|", 2);
14948 memcpy (p + 2, reg_names[reg], l);
14952 if (live_regs_mask & (1 << LR_REGNUM))
14954 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14955 /* If returning from an interrupt, restore the CPSR. */
14956 if (IS_INTERRUPT (func_type))
14963 output_asm_insn (instr, & operand);
14965 /* See if we need to generate an extra instruction to
14966 perform the actual function return. */
14968 && func_type != ARM_FT_INTERWORKED
14969 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14971 /* The return has already been handled
14972 by loading the LR into the PC. */
14979 switch ((int) ARM_FUNC_TYPE (func_type))
14983 /* ??? This is wrong for unified assembly syntax. */
14984 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14987 case ARM_FT_INTERWORKED:
14988 sprintf (instr, "bx%s\t%%|lr", conditional);
14991 case ARM_FT_EXCEPTION:
14992 /* ??? This is wrong for unified assembly syntax. */
14993 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14997 /* Use bx if it's available. */
14998 if (arm_arch5 || arm_arch4t)
14999 sprintf (instr, "bx%s\t%%|lr", conditional);
15001 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15005 output_asm_insn (instr, & operand);
15011 /* Write the function name into the code section, directly preceding
15012 the function prologue.
15014 Code will be output similar to this:
15016 .ascii "arm_poke_function_name", 0
15019 .word 0xff000000 + (t1 - t0)
15020 arm_poke_function_name
15022 stmfd sp!, {fp, ip, lr, pc}
15025 When performing a stack backtrace, code can inspect the value
15026 of 'pc' stored at 'fp' + 0. If the trace function then looks
15027 at location pc - 12 and the top 8 bits are set, then we know
15028 that there is a function name embedded immediately preceding this
15029 location and has length ((pc[-3]) & 0xff000000).
15031 We assume that pc is declared as a pointer to an unsigned long.
15033 It is of no benefit to output the function name if we are assembling
15034 a leaf function. These function types will not contain a stack
15035 backtrace structure, therefore it is not possible to determine the
15038 arm_poke_function_name (FILE *stream, const char *name)
15040 unsigned long alignlength;
15041 unsigned long length;
15044 length = strlen (name) + 1;
15045 alignlength = ROUND_UP_WORD (length);
15047 ASM_OUTPUT_ASCII (stream, name, length);
15048 ASM_OUTPUT_ALIGN (stream, 2);
15049 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15050 assemble_aligned_integer (UNITS_PER_WORD, x);
15053 /* Place some comments into the assembler stream
15054 describing the current function. */
15056 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15058 unsigned long func_type;
15060 /* ??? Do we want to print some of the below anyway? */
15064 /* Sanity check. */
15065 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15067 func_type = arm_current_func_type ();
15069 switch ((int) ARM_FUNC_TYPE (func_type))
15072 case ARM_FT_NORMAL:
15074 case ARM_FT_INTERWORKED:
15075 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15078 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15081 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15083 case ARM_FT_EXCEPTION:
15084 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15088 if (IS_NAKED (func_type))
15089 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15091 if (IS_VOLATILE (func_type))
15092 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15094 if (IS_NESTED (func_type))
15095 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15096 if (IS_STACKALIGN (func_type))
15097 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15099 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15101 crtl->args.pretend_args_size, frame_size);
15103 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15104 frame_pointer_needed,
15105 cfun->machine->uses_anonymous_args);
15107 if (cfun->machine->lr_save_eliminated)
15108 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15110 if (crtl->calls_eh_return)
15111 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15116 arm_output_epilogue (rtx sibling)
15119 unsigned long saved_regs_mask;
15120 unsigned long func_type;
15121 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15122 frame that is $fp + 4 for a non-variadic function. */
15123 int floats_offset = 0;
15125 FILE * f = asm_out_file;
15126 unsigned int lrm_count = 0;
15127 int really_return = (sibling == NULL);
15129 arm_stack_offsets *offsets;
15131 /* If we have already generated the return instruction
15132 then it is futile to generate anything else. */
15133 if (use_return_insn (FALSE, sibling) &&
15134 (cfun->machine->return_used_this_function != 0))
15137 func_type = arm_current_func_type ();
15139 if (IS_NAKED (func_type))
15140 /* Naked functions don't have epilogues. */
15143 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15147 /* A volatile function should never return. Call abort. */
15148 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15149 assemble_external_libcall (op);
15150 output_asm_insn ("bl\t%a0", &op);
15155 /* If we are throwing an exception, then we really must be doing a
15156 return, so we can't tail-call. */
15157 gcc_assert (!crtl->calls_eh_return || really_return);
15159 offsets = arm_get_frame_offsets ();
15160 saved_regs_mask = offsets->saved_regs_mask;
15163 lrm_count = bit_count (saved_regs_mask);
15165 floats_offset = offsets->saved_args;
15166 /* Compute how far away the floats will be. */
15167 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15168 if (saved_regs_mask & (1 << reg))
15169 floats_offset += 4;
15171 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15173 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15174 int vfp_offset = offsets->frame;
15176 if (TARGET_FPA_EMU2)
15178 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15179 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15181 floats_offset += 12;
15182 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15183 reg, FP_REGNUM, floats_offset - vfp_offset);
15188 start_reg = LAST_FPA_REGNUM;
15190 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15192 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15194 floats_offset += 12;
15196 /* We can't unstack more than four registers at once. */
15197 if (start_reg - reg == 3)
15199 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15200 reg, FP_REGNUM, floats_offset - vfp_offset);
15201 start_reg = reg - 1;
15206 if (reg != start_reg)
15207 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15208 reg + 1, start_reg - reg,
15209 FP_REGNUM, floats_offset - vfp_offset);
15210 start_reg = reg - 1;
15214 /* Just in case the last register checked also needs unstacking. */
15215 if (reg != start_reg)
15216 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15217 reg + 1, start_reg - reg,
15218 FP_REGNUM, floats_offset - vfp_offset);
15221 if (TARGET_HARD_FLOAT && TARGET_VFP)
15225 /* The fldmd insns do not have base+offset addressing
15226 modes, so we use IP to hold the address. */
15227 saved_size = arm_get_vfp_saved_size ();
15229 if (saved_size > 0)
15231 floats_offset += saved_size;
15232 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15233 FP_REGNUM, floats_offset - vfp_offset);
15235 start_reg = FIRST_VFP_REGNUM;
15236 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15238 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15239 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15241 if (start_reg != reg)
15242 vfp_output_fldmd (f, IP_REGNUM,
15243 (start_reg - FIRST_VFP_REGNUM) / 2,
15244 (reg - start_reg) / 2);
15245 start_reg = reg + 2;
15248 if (start_reg != reg)
15249 vfp_output_fldmd (f, IP_REGNUM,
15250 (start_reg - FIRST_VFP_REGNUM) / 2,
15251 (reg - start_reg) / 2);
15256 /* The frame pointer is guaranteed to be non-double-word aligned.
15257 This is because it is set to (old_stack_pointer - 4) and the
15258 old_stack_pointer was double word aligned. Thus the offset to
15259 the iWMMXt registers to be loaded must also be non-double-word
15260 sized, so that the resultant address *is* double-word aligned.
15261 We can ignore floats_offset since that was already included in
15262 the live_regs_mask. */
15263 lrm_count += (lrm_count % 2 ? 2 : 1);
15265 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15266 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15268 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15269 reg, FP_REGNUM, lrm_count * 4);
15274 /* saved_regs_mask should contain the IP, which at the time of stack
15275 frame generation actually contains the old stack pointer. So a
15276 quick way to unwind the stack is just pop the IP register directly
15277 into the stack pointer. */
15278 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15279 saved_regs_mask &= ~ (1 << IP_REGNUM);
15280 saved_regs_mask |= (1 << SP_REGNUM);
15282 /* There are two registers left in saved_regs_mask - LR and PC. We
15283 only need to restore the LR register (the return address), but to
15284 save time we can load it directly into the PC, unless we need a
15285 special function exit sequence, or we are not really returning. */
15287 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15288 && !crtl->calls_eh_return)
15289 /* Delete the LR from the register mask, so that the LR on
15290 the stack is loaded into the PC in the register mask. */
15291 saved_regs_mask &= ~ (1 << LR_REGNUM);
15293 saved_regs_mask &= ~ (1 << PC_REGNUM);
15295 /* We must use SP as the base register, because SP is one of the
15296 registers being restored. If an interrupt or page fault
15297 happens in the ldm instruction, the SP might or might not
15298 have been restored. That would be bad, as then SP will no
15299 longer indicate the safe area of stack, and we can get stack
15300 corruption. Using SP as the base register means that it will
15301 be reset correctly to the original value, should an interrupt
15302 occur. If the stack pointer already points at the right
15303 place, then omit the subtraction. */
15304 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15305 || cfun->calls_alloca)
15306 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15307 4 * bit_count (saved_regs_mask));
15308 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15310 if (IS_INTERRUPT (func_type))
15311 /* Interrupt handlers will have pushed the
15312 IP onto the stack, so restore it now. */
15313 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15317 /* This branch is executed for ARM mode (non-apcs frames) and
15318 Thumb-2 mode. Frame layout is essentially the same for those
15319 cases, except that in ARM mode frame pointer points to the
15320 first saved register, while in Thumb-2 mode the frame pointer points
15321 to the last saved register.
15323 It is possible to make frame pointer point to last saved
15324 register in both cases, and remove some conditionals below.
15325 That means that fp setup in prologue would be just "mov fp, sp"
15326 and sp restore in epilogue would be just "mov sp, fp", whereas
15327 now we have to use add/sub in those cases. However, the value
15328 of that would be marginal, as both mov and add/sub are 32-bit
15329 in ARM mode, and it would require extra conditionals
15330 in arm_expand_prologue to distingish ARM-apcs-frame case
15331 (where frame pointer is required to point at first register)
15332 and ARM-non-apcs-frame. Therefore, such change is postponed
15333 until real need arise. */
15334 unsigned HOST_WIDE_INT amount;
15336 /* Restore stack pointer if necessary. */
15337 if (TARGET_ARM && frame_pointer_needed)
15339 operands[0] = stack_pointer_rtx;
15340 operands[1] = hard_frame_pointer_rtx;
15342 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15343 output_add_immediate (operands);
15347 if (frame_pointer_needed)
15349 /* For Thumb-2 restore sp from the frame pointer.
15350 Operand restrictions mean we have to incrememnt FP, then copy
15352 amount = offsets->locals_base - offsets->saved_regs;
15353 operands[0] = hard_frame_pointer_rtx;
15357 unsigned long count;
15358 operands[0] = stack_pointer_rtx;
15359 amount = offsets->outgoing_args - offsets->saved_regs;
15360 /* pop call clobbered registers if it avoids a
15361 separate stack adjustment. */
15362 count = offsets->saved_regs - offsets->saved_args;
15365 && !crtl->calls_eh_return
15366 && bit_count(saved_regs_mask) * 4 == count
15367 && !IS_INTERRUPT (func_type)
15368 && !IS_STACKALIGN (func_type)
15369 && !crtl->tail_call_emit)
15371 unsigned long mask;
15372 /* Preserve return values, of any size. */
15373 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15375 mask &= ~saved_regs_mask;
15377 while (bit_count (mask) * 4 > amount)
15379 while ((mask & (1 << reg)) == 0)
15381 mask &= ~(1 << reg);
15383 if (bit_count (mask) * 4 == amount) {
15385 saved_regs_mask |= mask;
15392 operands[1] = operands[0];
15393 operands[2] = GEN_INT (amount);
15394 output_add_immediate (operands);
15396 if (frame_pointer_needed)
15397 asm_fprintf (f, "\tmov\t%r, %r\n",
15398 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15401 if (TARGET_FPA_EMU2)
15403 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15404 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15405 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15410 start_reg = FIRST_FPA_REGNUM;
15412 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15414 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15416 if (reg - start_reg == 3)
15418 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15419 start_reg, SP_REGNUM);
15420 start_reg = reg + 1;
15425 if (reg != start_reg)
15426 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15427 start_reg, reg - start_reg,
15430 start_reg = reg + 1;
15434 /* Just in case the last register checked also needs unstacking. */
15435 if (reg != start_reg)
15436 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15437 start_reg, reg - start_reg, SP_REGNUM);
15440 if (TARGET_HARD_FLOAT && TARGET_VFP)
15442 int end_reg = LAST_VFP_REGNUM + 1;
15444 /* Scan the registers in reverse order. We need to match
15445 any groupings made in the prologue and generate matching
15447 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15449 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15450 && (!df_regs_ever_live_p (reg + 1)
15451 || call_used_regs[reg + 1]))
15453 if (end_reg > reg + 2)
15454 vfp_output_fldmd (f, SP_REGNUM,
15455 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15456 (end_reg - (reg + 2)) / 2);
15460 if (end_reg > reg + 2)
15461 vfp_output_fldmd (f, SP_REGNUM, 0,
15462 (end_reg - (reg + 2)) / 2);
15466 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15467 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15468 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15470 /* If we can, restore the LR into the PC. */
15471 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15472 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15473 && !IS_STACKALIGN (func_type)
15475 && crtl->args.pretend_args_size == 0
15476 && saved_regs_mask & (1 << LR_REGNUM)
15477 && !crtl->calls_eh_return)
15479 saved_regs_mask &= ~ (1 << LR_REGNUM);
15480 saved_regs_mask |= (1 << PC_REGNUM);
15481 rfe = IS_INTERRUPT (func_type);
15486 /* Load the registers off the stack. If we only have one register
15487 to load use the LDR instruction - it is faster. For Thumb-2
15488 always use pop and the assembler will pick the best instruction.*/
15489 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15490 && !IS_INTERRUPT(func_type))
15492 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15494 else if (saved_regs_mask)
15496 if (saved_regs_mask & (1 << SP_REGNUM))
15497 /* Note - write back to the stack register is not enabled
15498 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15499 in the list of registers and if we add writeback the
15500 instruction becomes UNPREDICTABLE. */
15501 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15503 else if (TARGET_ARM)
15504 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15507 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15510 if (crtl->args.pretend_args_size)
15512 /* Unwind the pre-pushed regs. */
15513 operands[0] = operands[1] = stack_pointer_rtx;
15514 operands[2] = GEN_INT (crtl->args.pretend_args_size);
15515 output_add_immediate (operands);
15519 /* We may have already restored PC directly from the stack. */
15520 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
15523 /* Stack adjustment for exception handler. */
15524 if (crtl->calls_eh_return)
15525 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
15526 ARM_EH_STACKADJ_REGNUM);
15528 /* Generate the return instruction. */
15529 switch ((int) ARM_FUNC_TYPE (func_type))
15533 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
15536 case ARM_FT_EXCEPTION:
15537 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15540 case ARM_FT_INTERWORKED:
15541 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15545 if (IS_STACKALIGN (func_type))
15547 /* See comment in arm_expand_prologue. */
15548 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
15550 if (arm_arch5 || arm_arch4t)
15551 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15553 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15561 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15562 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15564 arm_stack_offsets *offsets;
15570 /* Emit any call-via-reg trampolines that are needed for v4t support
15571 of call_reg and call_value_reg type insns. */
15572 for (regno = 0; regno < LR_REGNUM; regno++)
15574 rtx label = cfun->machine->call_via[regno];
15578 switch_to_section (function_section (current_function_decl));
15579 targetm.asm_out.internal_label (asm_out_file, "L",
15580 CODE_LABEL_NUMBER (label));
15581 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15585 /* ??? Probably not safe to set this here, since it assumes that a
15586 function will be emitted as assembly immediately after we generate
15587 RTL for it. This does not happen for inline functions. */
15588 cfun->machine->return_used_this_function = 0;
15590 else /* TARGET_32BIT */
15592 /* We need to take into account any stack-frame rounding. */
15593 offsets = arm_get_frame_offsets ();
15595 gcc_assert (!use_return_insn (FALSE, NULL)
15596 || (cfun->machine->return_used_this_function != 0)
15597 || offsets->saved_regs == offsets->outgoing_args
15598 || frame_pointer_needed);
15600 /* Reset the ARM-specific per-function variables. */
15601 after_arm_reorg = 0;
15605 /* Generate and emit an insn that we will recognize as a push_multi.
15606 Unfortunately, since this insn does not reflect very well the actual
15607 semantics of the operation, we need to annotate the insn for the benefit
15608 of DWARF2 frame unwind information. */
15610 emit_multi_reg_push (unsigned long mask)
15613 int num_dwarf_regs;
15617 int dwarf_par_index;
15620 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15621 if (mask & (1 << i))
15624 gcc_assert (num_regs && num_regs <= 16);
15626 /* We don't record the PC in the dwarf frame information. */
15627 num_dwarf_regs = num_regs;
15628 if (mask & (1 << PC_REGNUM))
15631 /* For the body of the insn we are going to generate an UNSPEC in
15632 parallel with several USEs. This allows the insn to be recognized
15633 by the push_multi pattern in the arm.md file.
15635 The body of the insn looks something like this:
15638 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15639 (const_int:SI <num>)))
15640 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15646 For the frame note however, we try to be more explicit and actually
15647 show each register being stored into the stack frame, plus a (single)
15648 decrement of the stack pointer. We do it this way in order to be
15649 friendly to the stack unwinding code, which only wants to see a single
15650 stack decrement per instruction. The RTL we generate for the note looks
15651 something like this:
15654 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15655 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15656 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15657 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15661 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15662 instead we'd have a parallel expression detailing all
15663 the stores to the various memory addresses so that debug
15664 information is more up-to-date. Remember however while writing
15665 this to take care of the constraints with the push instruction.
15667 Note also that this has to be taken care of for the VFP registers.
15669 For more see PR43399. */
15671 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15672 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15673 dwarf_par_index = 1;
15675 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15677 if (mask & (1 << i))
15679 reg = gen_rtx_REG (SImode, i);
15681 XVECEXP (par, 0, 0)
15682 = gen_rtx_SET (VOIDmode,
15685 gen_rtx_PRE_MODIFY (Pmode,
15688 (stack_pointer_rtx,
15691 gen_rtx_UNSPEC (BLKmode,
15692 gen_rtvec (1, reg),
15693 UNSPEC_PUSH_MULT));
15695 if (i != PC_REGNUM)
15697 tmp = gen_rtx_SET (VOIDmode,
15698 gen_frame_mem (SImode, stack_pointer_rtx),
15700 RTX_FRAME_RELATED_P (tmp) = 1;
15701 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15709 for (j = 1, i++; j < num_regs; i++)
15711 if (mask & (1 << i))
15713 reg = gen_rtx_REG (SImode, i);
15715 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15717 if (i != PC_REGNUM)
15720 = gen_rtx_SET (VOIDmode,
15723 plus_constant (stack_pointer_rtx,
15726 RTX_FRAME_RELATED_P (tmp) = 1;
15727 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15734 par = emit_insn (par);
15736 tmp = gen_rtx_SET (VOIDmode,
15738 plus_constant (stack_pointer_rtx, -4 * num_regs));
15739 RTX_FRAME_RELATED_P (tmp) = 1;
15740 XVECEXP (dwarf, 0, 0) = tmp;
15742 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15747 /* Calculate the size of the return value that is passed in registers. */
15749 arm_size_return_regs (void)
15751 enum machine_mode mode;
15753 if (crtl->return_rtx != 0)
15754 mode = GET_MODE (crtl->return_rtx);
15756 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15758 return GET_MODE_SIZE (mode);
15762 emit_sfm (int base_reg, int count)
15769 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15770 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15772 reg = gen_rtx_REG (XFmode, base_reg++);
15774 XVECEXP (par, 0, 0)
15775 = gen_rtx_SET (VOIDmode,
15778 gen_rtx_PRE_MODIFY (Pmode,
15781 (stack_pointer_rtx,
15784 gen_rtx_UNSPEC (BLKmode,
15785 gen_rtvec (1, reg),
15786 UNSPEC_PUSH_MULT));
15787 tmp = gen_rtx_SET (VOIDmode,
15788 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15789 RTX_FRAME_RELATED_P (tmp) = 1;
15790 XVECEXP (dwarf, 0, 1) = tmp;
15792 for (i = 1; i < count; i++)
15794 reg = gen_rtx_REG (XFmode, base_reg++);
15795 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15797 tmp = gen_rtx_SET (VOIDmode,
15798 gen_frame_mem (XFmode,
15799 plus_constant (stack_pointer_rtx,
15802 RTX_FRAME_RELATED_P (tmp) = 1;
15803 XVECEXP (dwarf, 0, i + 1) = tmp;
15806 tmp = gen_rtx_SET (VOIDmode,
15808 plus_constant (stack_pointer_rtx, -12 * count));
15810 RTX_FRAME_RELATED_P (tmp) = 1;
15811 XVECEXP (dwarf, 0, 0) = tmp;
15813 par = emit_insn (par);
15814 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15820 /* Return true if the current function needs to save/restore LR. */
15823 thumb_force_lr_save (void)
15825 return !cfun->machine->lr_save_eliminated
15826 && (!leaf_function_p ()
15827 || thumb_far_jump_used_p ()
15828 || df_regs_ever_live_p (LR_REGNUM));
15832 /* Return true if r3 is used by any of the tail call insns in the
15833 current function. */
15836 any_sibcall_uses_r3 (void)
15841 if (!crtl->tail_call_emit)
15843 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15844 if (e->flags & EDGE_SIBCALL)
15846 rtx call = BB_END (e->src);
15847 if (!CALL_P (call))
15848 call = prev_nonnote_nondebug_insn (call);
15849 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15850 if (find_regno_fusage (call, USE, 3))
15857 /* Compute the distance from register FROM to register TO.
15858 These can be the arg pointer (26), the soft frame pointer (25),
15859 the stack pointer (13) or the hard frame pointer (11).
15860 In thumb mode r7 is used as the soft frame pointer, if needed.
15861 Typical stack layout looks like this:
15863 old stack pointer -> | |
15866 | | saved arguments for
15867 | | vararg functions
15870 hard FP & arg pointer -> | | \
15878 soft frame pointer -> | | /
15883 locals base pointer -> | | /
15888 current stack pointer -> | | /
15891 For a given function some or all of these stack components
15892 may not be needed, giving rise to the possibility of
15893 eliminating some of the registers.
15895 The values returned by this function must reflect the behavior
15896 of arm_expand_prologue() and arm_compute_save_reg_mask().
15898 The sign of the number returned reflects the direction of stack
15899 growth, so the values are positive for all eliminations except
15900 from the soft frame pointer to the hard frame pointer.
15902 SFP may point just inside the local variables block to ensure correct
15906 /* Calculate stack offsets. These are used to calculate register elimination
15907 offsets and in prologue/epilogue code. Also calculates which registers
15908 should be saved. */
15910 static arm_stack_offsets *
15911 arm_get_frame_offsets (void)
15913 struct arm_stack_offsets *offsets;
15914 unsigned long func_type;
15918 HOST_WIDE_INT frame_size;
15921 offsets = &cfun->machine->stack_offsets;
15923 /* We need to know if we are a leaf function. Unfortunately, it
15924 is possible to be called after start_sequence has been called,
15925 which causes get_insns to return the insns for the sequence,
15926 not the function, which will cause leaf_function_p to return
15927 the incorrect result.
15929 to know about leaf functions once reload has completed, and the
15930 frame size cannot be changed after that time, so we can safely
15931 use the cached value. */
15933 if (reload_completed)
15936 /* Initially this is the size of the local variables. It will translated
15937 into an offset once we have determined the size of preceding data. */
15938 frame_size = ROUND_UP_WORD (get_frame_size ());
15940 leaf = leaf_function_p ();
15942 /* Space for variadic functions. */
15943 offsets->saved_args = crtl->args.pretend_args_size;
15945 /* In Thumb mode this is incorrect, but never used. */
15946 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15947 arm_compute_static_chain_stack_bytes();
15951 unsigned int regno;
15953 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15954 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15955 saved = core_saved;
15957 /* We know that SP will be doubleword aligned on entry, and we must
15958 preserve that condition at any subroutine call. We also require the
15959 soft frame pointer to be doubleword aligned. */
15961 if (TARGET_REALLY_IWMMXT)
15963 /* Check for the call-saved iWMMXt registers. */
15964 for (regno = FIRST_IWMMXT_REGNUM;
15965 regno <= LAST_IWMMXT_REGNUM;
15967 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15971 func_type = arm_current_func_type ();
15972 if (! IS_VOLATILE (func_type))
15974 /* Space for saved FPA registers. */
15975 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15976 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15979 /* Space for saved VFP registers. */
15980 if (TARGET_HARD_FLOAT && TARGET_VFP)
15981 saved += arm_get_vfp_saved_size ();
15984 else /* TARGET_THUMB1 */
15986 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15987 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15988 saved = core_saved;
15989 if (TARGET_BACKTRACE)
15993 /* Saved registers include the stack frame. */
15994 offsets->saved_regs = offsets->saved_args + saved +
15995 arm_compute_static_chain_stack_bytes();
15996 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15997 /* A leaf function does not need any stack alignment if it has nothing
15999 if (leaf && frame_size == 0
16000 /* However if it calls alloca(), we have a dynamically allocated
16001 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16002 && ! cfun->calls_alloca)
16004 offsets->outgoing_args = offsets->soft_frame;
16005 offsets->locals_base = offsets->soft_frame;
16009 /* Ensure SFP has the correct alignment. */
16010 if (ARM_DOUBLEWORD_ALIGN
16011 && (offsets->soft_frame & 7))
16013 offsets->soft_frame += 4;
16014 /* Try to align stack by pushing an extra reg. Don't bother doing this
16015 when there is a stack frame as the alignment will be rolled into
16016 the normal stack adjustment. */
16017 if (frame_size + crtl->outgoing_args_size == 0)
16021 /* If it is safe to use r3, then do so. This sometimes
16022 generates better code on Thumb-2 by avoiding the need to
16023 use 32-bit push/pop instructions. */
16024 if (! any_sibcall_uses_r3 ()
16025 && arm_size_return_regs () <= 12
16026 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16031 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16033 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16042 offsets->saved_regs += 4;
16043 offsets->saved_regs_mask |= (1 << reg);
16048 offsets->locals_base = offsets->soft_frame + frame_size;
16049 offsets->outgoing_args = (offsets->locals_base
16050 + crtl->outgoing_args_size);
16052 if (ARM_DOUBLEWORD_ALIGN)
16054 /* Ensure SP remains doubleword aligned. */
16055 if (offsets->outgoing_args & 7)
16056 offsets->outgoing_args += 4;
16057 gcc_assert (!(offsets->outgoing_args & 7));
16064 /* Calculate the relative offsets for the different stack pointers. Positive
16065 offsets are in the direction of stack growth. */
16068 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16070 arm_stack_offsets *offsets;
16072 offsets = arm_get_frame_offsets ();
16074 /* OK, now we have enough information to compute the distances.
16075 There must be an entry in these switch tables for each pair
16076 of registers in ELIMINABLE_REGS, even if some of the entries
16077 seem to be redundant or useless. */
16080 case ARG_POINTER_REGNUM:
16083 case THUMB_HARD_FRAME_POINTER_REGNUM:
16086 case FRAME_POINTER_REGNUM:
16087 /* This is the reverse of the soft frame pointer
16088 to hard frame pointer elimination below. */
16089 return offsets->soft_frame - offsets->saved_args;
16091 case ARM_HARD_FRAME_POINTER_REGNUM:
16092 /* This is only non-zero in the case where the static chain register
16093 is stored above the frame. */
16094 return offsets->frame - offsets->saved_args - 4;
16096 case STACK_POINTER_REGNUM:
16097 /* If nothing has been pushed on the stack at all
16098 then this will return -4. This *is* correct! */
16099 return offsets->outgoing_args - (offsets->saved_args + 4);
16102 gcc_unreachable ();
16104 gcc_unreachable ();
16106 case FRAME_POINTER_REGNUM:
16109 case THUMB_HARD_FRAME_POINTER_REGNUM:
16112 case ARM_HARD_FRAME_POINTER_REGNUM:
16113 /* The hard frame pointer points to the top entry in the
16114 stack frame. The soft frame pointer to the bottom entry
16115 in the stack frame. If there is no stack frame at all,
16116 then they are identical. */
16118 return offsets->frame - offsets->soft_frame;
16120 case STACK_POINTER_REGNUM:
16121 return offsets->outgoing_args - offsets->soft_frame;
16124 gcc_unreachable ();
16126 gcc_unreachable ();
16129 /* You cannot eliminate from the stack pointer.
16130 In theory you could eliminate from the hard frame
16131 pointer to the stack pointer, but this will never
16132 happen, since if a stack frame is not needed the
16133 hard frame pointer will never be used. */
16134 gcc_unreachable ();
16138 /* Given FROM and TO register numbers, say whether this elimination is
16139 allowed. Frame pointer elimination is automatically handled.
16141 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16142 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16143 pointer, we must eliminate FRAME_POINTER_REGNUM into
16144 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16145 ARG_POINTER_REGNUM. */
16148 arm_can_eliminate (const int from, const int to)
16150 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16151 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16152 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16153 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16157 /* Emit RTL to save coprocessor registers on function entry. Returns the
16158 number of bytes pushed. */
16161 arm_save_coproc_regs(void)
16163 int saved_size = 0;
16165 unsigned start_reg;
16168 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16169 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16171 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16172 insn = gen_rtx_MEM (V2SImode, insn);
16173 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16174 RTX_FRAME_RELATED_P (insn) = 1;
16178 /* Save any floating point call-saved registers used by this
16180 if (TARGET_FPA_EMU2)
16182 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16183 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16185 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16186 insn = gen_rtx_MEM (XFmode, insn);
16187 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16188 RTX_FRAME_RELATED_P (insn) = 1;
16194 start_reg = LAST_FPA_REGNUM;
16196 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16198 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16200 if (start_reg - reg == 3)
16202 insn = emit_sfm (reg, 4);
16203 RTX_FRAME_RELATED_P (insn) = 1;
16205 start_reg = reg - 1;
16210 if (start_reg != reg)
16212 insn = emit_sfm (reg + 1, start_reg - reg);
16213 RTX_FRAME_RELATED_P (insn) = 1;
16214 saved_size += (start_reg - reg) * 12;
16216 start_reg = reg - 1;
16220 if (start_reg != reg)
16222 insn = emit_sfm (reg + 1, start_reg - reg);
16223 saved_size += (start_reg - reg) * 12;
16224 RTX_FRAME_RELATED_P (insn) = 1;
16227 if (TARGET_HARD_FLOAT && TARGET_VFP)
16229 start_reg = FIRST_VFP_REGNUM;
16231 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16233 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16234 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16236 if (start_reg != reg)
16237 saved_size += vfp_emit_fstmd (start_reg,
16238 (reg - start_reg) / 2);
16239 start_reg = reg + 2;
16242 if (start_reg != reg)
16243 saved_size += vfp_emit_fstmd (start_reg,
16244 (reg - start_reg) / 2);
16250 /* Set the Thumb frame pointer from the stack pointer. */
16253 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16255 HOST_WIDE_INT amount;
16258 amount = offsets->outgoing_args - offsets->locals_base;
16260 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16261 stack_pointer_rtx, GEN_INT (amount)));
16264 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16265 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16266 expects the first two operands to be the same. */
16269 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16271 hard_frame_pointer_rtx));
16275 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16276 hard_frame_pointer_rtx,
16277 stack_pointer_rtx));
16279 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16280 plus_constant (stack_pointer_rtx, amount));
16281 RTX_FRAME_RELATED_P (dwarf) = 1;
16282 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16285 RTX_FRAME_RELATED_P (insn) = 1;
16288 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16291 arm_expand_prologue (void)
16296 unsigned long live_regs_mask;
16297 unsigned long func_type;
16299 int saved_pretend_args = 0;
16300 int saved_regs = 0;
16301 unsigned HOST_WIDE_INT args_to_push;
16302 arm_stack_offsets *offsets;
16304 func_type = arm_current_func_type ();
16306 /* Naked functions don't have prologues. */
16307 if (IS_NAKED (func_type))
16310 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16311 args_to_push = crtl->args.pretend_args_size;
16313 /* Compute which register we will have to save onto the stack. */
16314 offsets = arm_get_frame_offsets ();
16315 live_regs_mask = offsets->saved_regs_mask;
16317 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16319 if (IS_STACKALIGN (func_type))
16323 /* Handle a word-aligned stack pointer. We generate the following:
16328 <save and restore r0 in normal prologue/epilogue>
16332 The unwinder doesn't need to know about the stack realignment.
16333 Just tell it we saved SP in r0. */
16334 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16336 r0 = gen_rtx_REG (SImode, 0);
16337 r1 = gen_rtx_REG (SImode, 1);
16339 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16340 RTX_FRAME_RELATED_P (insn) = 1;
16341 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16343 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16345 /* ??? The CFA changes here, which may cause GDB to conclude that it
16346 has entered a different function. That said, the unwind info is
16347 correct, individually, before and after this instruction because
16348 we've described the save of SP, which will override the default
16349 handling of SP as restoring from the CFA. */
16350 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16353 /* For APCS frames, if IP register is clobbered
16354 when creating frame, save that register in a special
16356 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16358 if (IS_INTERRUPT (func_type))
16360 /* Interrupt functions must not corrupt any registers.
16361 Creating a frame pointer however, corrupts the IP
16362 register, so we must push it first. */
16363 emit_multi_reg_push (1 << IP_REGNUM);
16365 /* Do not set RTX_FRAME_RELATED_P on this insn.
16366 The dwarf stack unwinding code only wants to see one
16367 stack decrement per function, and this is not it. If
16368 this instruction is labeled as being part of the frame
16369 creation sequence then dwarf2out_frame_debug_expr will
16370 die when it encounters the assignment of IP to FP
16371 later on, since the use of SP here establishes SP as
16372 the CFA register and not IP.
16374 Anyway this instruction is not really part of the stack
16375 frame creation although it is part of the prologue. */
16377 else if (IS_NESTED (func_type))
16379 /* The Static chain register is the same as the IP register
16380 used as a scratch register during stack frame creation.
16381 To get around this need to find somewhere to store IP
16382 whilst the frame is being created. We try the following
16385 1. The last argument register.
16386 2. A slot on the stack above the frame. (This only
16387 works if the function is not a varargs function).
16388 3. Register r3, after pushing the argument registers
16391 Note - we only need to tell the dwarf2 backend about the SP
16392 adjustment in the second variant; the static chain register
16393 doesn't need to be unwound, as it doesn't contain a value
16394 inherited from the caller. */
16396 if (df_regs_ever_live_p (3) == false)
16397 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16398 else if (args_to_push == 0)
16402 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16405 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16406 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16409 /* Just tell the dwarf backend that we adjusted SP. */
16410 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16411 plus_constant (stack_pointer_rtx,
16413 RTX_FRAME_RELATED_P (insn) = 1;
16414 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16418 /* Store the args on the stack. */
16419 if (cfun->machine->uses_anonymous_args)
16420 insn = emit_multi_reg_push
16421 ((0xf0 >> (args_to_push / 4)) & 0xf);
16424 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16425 GEN_INT (- args_to_push)));
16427 RTX_FRAME_RELATED_P (insn) = 1;
16429 saved_pretend_args = 1;
16430 fp_offset = args_to_push;
16433 /* Now reuse r3 to preserve IP. */
16434 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16438 insn = emit_set_insn (ip_rtx,
16439 plus_constant (stack_pointer_rtx, fp_offset));
16440 RTX_FRAME_RELATED_P (insn) = 1;
16445 /* Push the argument registers, or reserve space for them. */
16446 if (cfun->machine->uses_anonymous_args)
16447 insn = emit_multi_reg_push
16448 ((0xf0 >> (args_to_push / 4)) & 0xf);
16451 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16452 GEN_INT (- args_to_push)));
16453 RTX_FRAME_RELATED_P (insn) = 1;
16456 /* If this is an interrupt service routine, and the link register
16457 is going to be pushed, and we're not generating extra
16458 push of IP (needed when frame is needed and frame layout if apcs),
16459 subtracting four from LR now will mean that the function return
16460 can be done with a single instruction. */
16461 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16462 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16463 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16466 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16468 emit_set_insn (lr, plus_constant (lr, -4));
16471 if (live_regs_mask)
16473 saved_regs += bit_count (live_regs_mask) * 4;
16474 if (optimize_size && !frame_pointer_needed
16475 && saved_regs == offsets->saved_regs - offsets->saved_args)
16477 /* If no coprocessor registers are being pushed and we don't have
16478 to worry about a frame pointer then push extra registers to
16479 create the stack frame. This is done is a way that does not
16480 alter the frame layout, so is independent of the epilogue. */
16484 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16486 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16487 if (frame && n * 4 >= frame)
16490 live_regs_mask |= (1 << n) - 1;
16491 saved_regs += frame;
16494 insn = emit_multi_reg_push (live_regs_mask);
16495 RTX_FRAME_RELATED_P (insn) = 1;
16498 if (! IS_VOLATILE (func_type))
16499 saved_regs += arm_save_coproc_regs ();
16501 if (frame_pointer_needed && TARGET_ARM)
16503 /* Create the new frame pointer. */
16504 if (TARGET_APCS_FRAME)
16506 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16507 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16508 RTX_FRAME_RELATED_P (insn) = 1;
16510 if (IS_NESTED (func_type))
16512 /* Recover the static chain register. */
16513 if (!df_regs_ever_live_p (3)
16514 || saved_pretend_args)
16515 insn = gen_rtx_REG (SImode, 3);
16516 else /* if (crtl->args.pretend_args_size == 0) */
16518 insn = plus_constant (hard_frame_pointer_rtx, 4);
16519 insn = gen_frame_mem (SImode, insn);
16521 emit_set_insn (ip_rtx, insn);
16522 /* Add a USE to stop propagate_one_insn() from barfing. */
16523 emit_insn (gen_prologue_use (ip_rtx));
16528 insn = GEN_INT (saved_regs - 4);
16529 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16530 stack_pointer_rtx, insn));
16531 RTX_FRAME_RELATED_P (insn) = 1;
16535 if (flag_stack_usage_info)
16536 current_function_static_stack_size
16537 = offsets->outgoing_args - offsets->saved_args;
16539 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16541 /* This add can produce multiple insns for a large constant, so we
16542 need to get tricky. */
16543 rtx last = get_last_insn ();
16545 amount = GEN_INT (offsets->saved_args + saved_regs
16546 - offsets->outgoing_args);
16548 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16552 last = last ? NEXT_INSN (last) : get_insns ();
16553 RTX_FRAME_RELATED_P (last) = 1;
16555 while (last != insn);
16557 /* If the frame pointer is needed, emit a special barrier that
16558 will prevent the scheduler from moving stores to the frame
16559 before the stack adjustment. */
16560 if (frame_pointer_needed)
16561 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16562 hard_frame_pointer_rtx));
16566 if (frame_pointer_needed && TARGET_THUMB2)
16567 thumb_set_frame_pointer (offsets);
16569 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16571 unsigned long mask;
16573 mask = live_regs_mask;
16574 mask &= THUMB2_WORK_REGS;
16575 if (!IS_NESTED (func_type))
16576 mask |= (1 << IP_REGNUM);
16577 arm_load_pic_register (mask);
16580 /* If we are profiling, make sure no instructions are scheduled before
16581 the call to mcount. Similarly if the user has requested no
16582 scheduling in the prolog. Similarly if we want non-call exceptions
16583 using the EABI unwinder, to prevent faulting instructions from being
16584 swapped with a stack adjustment. */
16585 if (crtl->profile || !TARGET_SCHED_PROLOG
16586 || (arm_except_unwind_info (&global_options) == UI_TARGET
16587 && cfun->can_throw_non_call_exceptions))
16588 emit_insn (gen_blockage ());
16590 /* If the link register is being kept alive, with the return address in it,
16591 then make sure that it does not get reused by the ce2 pass. */
16592 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16593 cfun->machine->lr_save_eliminated = 1;
16596 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16598 arm_print_condition (FILE *stream)
16600 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16602 /* Branch conversion is not implemented for Thumb-2. */
16605 output_operand_lossage ("predicated Thumb instruction");
16608 if (current_insn_predicate != NULL)
16610 output_operand_lossage
16611 ("predicated instruction in conditional sequence");
16615 fputs (arm_condition_codes[arm_current_cc], stream);
16617 else if (current_insn_predicate)
16619 enum arm_cond_code code;
16623 output_operand_lossage ("predicated Thumb instruction");
16627 code = get_arm_condition_code (current_insn_predicate);
16628 fputs (arm_condition_codes[code], stream);
16633 /* If CODE is 'd', then the X is a condition operand and the instruction
16634 should only be executed if the condition is true.
16635 if CODE is 'D', then the X is a condition operand and the instruction
16636 should only be executed if the condition is false: however, if the mode
16637 of the comparison is CCFPEmode, then always execute the instruction -- we
16638 do this because in these circumstances !GE does not necessarily imply LT;
16639 in these cases the instruction pattern will take care to make sure that
16640 an instruction containing %d will follow, thereby undoing the effects of
16641 doing this instruction unconditionally.
16642 If CODE is 'N' then X is a floating point operand that must be negated
16644 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16645 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16647 arm_print_operand (FILE *stream, rtx x, int code)
16652 fputs (ASM_COMMENT_START, stream);
16656 fputs (user_label_prefix, stream);
16660 fputs (REGISTER_PREFIX, stream);
16664 arm_print_condition (stream);
16668 /* Nothing in unified syntax, otherwise the current condition code. */
16669 if (!TARGET_UNIFIED_ASM)
16670 arm_print_condition (stream);
16674 /* The current condition code in unified syntax, otherwise nothing. */
16675 if (TARGET_UNIFIED_ASM)
16676 arm_print_condition (stream);
16680 /* The current condition code for a condition code setting instruction.
16681 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16682 if (TARGET_UNIFIED_ASM)
16684 fputc('s', stream);
16685 arm_print_condition (stream);
16689 arm_print_condition (stream);
16690 fputc('s', stream);
16695 /* If the instruction is conditionally executed then print
16696 the current condition code, otherwise print 's'. */
16697 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16698 if (current_insn_predicate)
16699 arm_print_condition (stream);
16701 fputc('s', stream);
16704 /* %# is a "break" sequence. It doesn't output anything, but is used to
16705 separate e.g. operand numbers from following text, if that text consists
16706 of further digits which we don't want to be part of the operand
16714 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16715 r = real_value_negate (&r);
16716 fprintf (stream, "%s", fp_const_from_val (&r));
16720 /* An integer or symbol address without a preceding # sign. */
16722 switch (GET_CODE (x))
16725 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16729 output_addr_const (stream, x);
16733 if (GET_CODE (XEXP (x, 0)) == PLUS
16734 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16736 output_addr_const (stream, x);
16739 /* Fall through. */
16742 output_operand_lossage ("Unsupported operand for code '%c'", code);
16747 if (GET_CODE (x) == CONST_INT)
16750 val = ARM_SIGN_EXTEND (~INTVAL (x));
16751 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16755 putc ('~', stream);
16756 output_addr_const (stream, x);
16761 /* The low 16 bits of an immediate constant. */
16762 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16766 fprintf (stream, "%s", arithmetic_instr (x, 1));
16769 /* Truncate Cirrus shift counts. */
16771 if (GET_CODE (x) == CONST_INT)
16773 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16776 arm_print_operand (stream, x, 0);
16780 fprintf (stream, "%s", arithmetic_instr (x, 0));
16788 if (!shift_operator (x, SImode))
16790 output_operand_lossage ("invalid shift operand");
16794 shift = shift_op (x, &val);
16798 fprintf (stream, ", %s ", shift);
16800 arm_print_operand (stream, XEXP (x, 1), 0);
16802 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16807 /* An explanation of the 'Q', 'R' and 'H' register operands:
16809 In a pair of registers containing a DI or DF value the 'Q'
16810 operand returns the register number of the register containing
16811 the least significant part of the value. The 'R' operand returns
16812 the register number of the register containing the most
16813 significant part of the value.
16815 The 'H' operand returns the higher of the two register numbers.
16816 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16817 same as the 'Q' operand, since the most significant part of the
16818 value is held in the lower number register. The reverse is true
16819 on systems where WORDS_BIG_ENDIAN is false.
16821 The purpose of these operands is to distinguish between cases
16822 where the endian-ness of the values is important (for example
16823 when they are added together), and cases where the endian-ness
16824 is irrelevant, but the order of register operations is important.
16825 For example when loading a value from memory into a register
16826 pair, the endian-ness does not matter. Provided that the value
16827 from the lower memory address is put into the lower numbered
16828 register, and the value from the higher address is put into the
16829 higher numbered register, the load will work regardless of whether
16830 the value being loaded is big-wordian or little-wordian. The
16831 order of the two register loads can matter however, if the address
16832 of the memory location is actually held in one of the registers
16833 being overwritten by the load.
16835 The 'Q' and 'R' constraints are also available for 64-bit
16838 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16840 rtx part = gen_lowpart (SImode, x);
16841 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16845 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16847 output_operand_lossage ("invalid operand for code '%c'", code);
16851 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16855 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16857 enum machine_mode mode = GET_MODE (x);
16860 if (mode == VOIDmode)
16862 part = gen_highpart_mode (SImode, mode, x);
16863 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16867 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16869 output_operand_lossage ("invalid operand for code '%c'", code);
16873 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16877 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16879 output_operand_lossage ("invalid operand for code '%c'", code);
16883 asm_fprintf (stream, "%r", REGNO (x) + 1);
16887 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16889 output_operand_lossage ("invalid operand for code '%c'", code);
16893 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16897 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16899 output_operand_lossage ("invalid operand for code '%c'", code);
16903 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16907 asm_fprintf (stream, "%r",
16908 GET_CODE (XEXP (x, 0)) == REG
16909 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16913 asm_fprintf (stream, "{%r-%r}",
16915 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16918 /* Like 'M', but writing doubleword vector registers, for use by Neon
16922 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16923 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16925 asm_fprintf (stream, "{d%d}", regno);
16927 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16932 /* CONST_TRUE_RTX means always -- that's the default. */
16933 if (x == const_true_rtx)
16936 if (!COMPARISON_P (x))
16938 output_operand_lossage ("invalid operand for code '%c'", code);
16942 fputs (arm_condition_codes[get_arm_condition_code (x)],
16947 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16948 want to do that. */
16949 if (x == const_true_rtx)
16951 output_operand_lossage ("instruction never executed");
16954 if (!COMPARISON_P (x))
16956 output_operand_lossage ("invalid operand for code '%c'", code);
16960 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16961 (get_arm_condition_code (x))],
16965 /* Cirrus registers can be accessed in a variety of ways:
16966 single floating point (f)
16967 double floating point (d)
16969 64bit integer (dx). */
16970 case 'W': /* Cirrus register in F mode. */
16971 case 'X': /* Cirrus register in D mode. */
16972 case 'Y': /* Cirrus register in FX mode. */
16973 case 'Z': /* Cirrus register in DX mode. */
16974 gcc_assert (GET_CODE (x) == REG
16975 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16977 fprintf (stream, "mv%s%s",
16979 : code == 'X' ? "d"
16980 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16984 /* Print cirrus register in the mode specified by the register's mode. */
16987 int mode = GET_MODE (x);
16989 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16991 output_operand_lossage ("invalid operand for code '%c'", code);
16995 fprintf (stream, "mv%s%s",
16996 mode == DFmode ? "d"
16997 : mode == SImode ? "fx"
16998 : mode == DImode ? "dx"
16999 : "f", reg_names[REGNO (x)] + 2);
17005 if (GET_CODE (x) != REG
17006 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17007 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17008 /* Bad value for wCG register number. */
17010 output_operand_lossage ("invalid operand for code '%c'", code);
17015 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17018 /* Print an iWMMXt control register name. */
17020 if (GET_CODE (x) != CONST_INT
17022 || INTVAL (x) >= 16)
17023 /* Bad value for wC register number. */
17025 output_operand_lossage ("invalid operand for code '%c'", code);
17031 static const char * wc_reg_names [16] =
17033 "wCID", "wCon", "wCSSF", "wCASF",
17034 "wC4", "wC5", "wC6", "wC7",
17035 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17036 "wC12", "wC13", "wC14", "wC15"
17039 fprintf (stream, wc_reg_names [INTVAL (x)]);
17043 /* Print the high single-precision register of a VFP double-precision
17047 int mode = GET_MODE (x);
17050 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17052 output_operand_lossage ("invalid operand for code '%c'", code);
17057 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17059 output_operand_lossage ("invalid operand for code '%c'", code);
17063 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17067 /* Print a VFP/Neon double precision or quad precision register name. */
17071 int mode = GET_MODE (x);
17072 int is_quad = (code == 'q');
17075 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17077 output_operand_lossage ("invalid operand for code '%c'", code);
17081 if (GET_CODE (x) != REG
17082 || !IS_VFP_REGNUM (REGNO (x)))
17084 output_operand_lossage ("invalid operand for code '%c'", code);
17089 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17090 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17092 output_operand_lossage ("invalid operand for code '%c'", code);
17096 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17097 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17101 /* These two codes print the low/high doubleword register of a Neon quad
17102 register, respectively. For pair-structure types, can also print
17103 low/high quadword registers. */
17107 int mode = GET_MODE (x);
17110 if ((GET_MODE_SIZE (mode) != 16
17111 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17113 output_operand_lossage ("invalid operand for code '%c'", code);
17118 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17120 output_operand_lossage ("invalid operand for code '%c'", code);
17124 if (GET_MODE_SIZE (mode) == 16)
17125 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17126 + (code == 'f' ? 1 : 0));
17128 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17129 + (code == 'f' ? 1 : 0));
17133 /* Print a VFPv3 floating-point constant, represented as an integer
17137 int index = vfp3_const_double_index (x);
17138 gcc_assert (index != -1);
17139 fprintf (stream, "%d", index);
17143 /* Print bits representing opcode features for Neon.
17145 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17146 and polynomials as unsigned.
17148 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17150 Bit 2 is 1 for rounding functions, 0 otherwise. */
17152 /* Identify the type as 's', 'u', 'p' or 'f'. */
17155 HOST_WIDE_INT bits = INTVAL (x);
17156 fputc ("uspf"[bits & 3], stream);
17160 /* Likewise, but signed and unsigned integers are both 'i'. */
17163 HOST_WIDE_INT bits = INTVAL (x);
17164 fputc ("iipf"[bits & 3], stream);
17168 /* As for 'T', but emit 'u' instead of 'p'. */
17171 HOST_WIDE_INT bits = INTVAL (x);
17172 fputc ("usuf"[bits & 3], stream);
17176 /* Bit 2: rounding (vs none). */
17179 HOST_WIDE_INT bits = INTVAL (x);
17180 fputs ((bits & 4) != 0 ? "r" : "", stream);
17184 /* Memory operand for vld1/vst1 instruction. */
17188 bool postinc = FALSE;
17189 unsigned align, memsize, align_bits;
17191 gcc_assert (GET_CODE (x) == MEM);
17192 addr = XEXP (x, 0);
17193 if (GET_CODE (addr) == POST_INC)
17196 addr = XEXP (addr, 0);
17198 asm_fprintf (stream, "[%r", REGNO (addr));
17200 /* We know the alignment of this access, so we can emit a hint in the
17201 instruction (for some alignments) as an aid to the memory subsystem
17203 align = MEM_ALIGN (x) >> 3;
17204 memsize = MEM_SIZE (x);
17206 /* Only certain alignment specifiers are supported by the hardware. */
17207 if (memsize == 16 && (align % 32) == 0)
17209 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
17211 else if ((align % 8) == 0)
17216 if (align_bits != 0)
17217 asm_fprintf (stream, ":%d", align_bits);
17219 asm_fprintf (stream, "]");
17222 fputs("!", stream);
17230 gcc_assert (GET_CODE (x) == MEM);
17231 addr = XEXP (x, 0);
17232 gcc_assert (GET_CODE (addr) == REG);
17233 asm_fprintf (stream, "[%r]", REGNO (addr));
17237 /* Translate an S register number into a D register number and element index. */
17240 int mode = GET_MODE (x);
17243 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17245 output_operand_lossage ("invalid operand for code '%c'", code);
17250 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17252 output_operand_lossage ("invalid operand for code '%c'", code);
17256 regno = regno - FIRST_VFP_REGNUM;
17257 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17261 /* Register specifier for vld1.16/vst1.16. Translate the S register
17262 number into a D register number and element index. */
17265 int mode = GET_MODE (x);
17268 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17270 output_operand_lossage ("invalid operand for code '%c'", code);
17275 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17277 output_operand_lossage ("invalid operand for code '%c'", code);
17281 regno = regno - FIRST_VFP_REGNUM;
17282 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17289 output_operand_lossage ("missing operand");
17293 switch (GET_CODE (x))
17296 asm_fprintf (stream, "%r", REGNO (x));
17300 output_memory_reference_mode = GET_MODE (x);
17301 output_address (XEXP (x, 0));
17308 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17309 sizeof (fpstr), 0, 1);
17310 fprintf (stream, "#%s", fpstr);
17313 fprintf (stream, "#%s", fp_immediate_constant (x));
17317 gcc_assert (GET_CODE (x) != NEG);
17318 fputc ('#', stream);
17319 if (GET_CODE (x) == HIGH)
17321 fputs (":lower16:", stream);
17325 output_addr_const (stream, x);
17331 /* Target hook for printing a memory address. */
17333 arm_print_operand_address (FILE *stream, rtx x)
17337 int is_minus = GET_CODE (x) == MINUS;
17339 if (GET_CODE (x) == REG)
17340 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17341 else if (GET_CODE (x) == PLUS || is_minus)
17343 rtx base = XEXP (x, 0);
17344 rtx index = XEXP (x, 1);
17345 HOST_WIDE_INT offset = 0;
17346 if (GET_CODE (base) != REG
17347 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17349 /* Ensure that BASE is a register. */
17350 /* (one of them must be). */
17351 /* Also ensure the SP is not used as in index register. */
17356 switch (GET_CODE (index))
17359 offset = INTVAL (index);
17362 asm_fprintf (stream, "[%r, #%wd]",
17363 REGNO (base), offset);
17367 asm_fprintf (stream, "[%r, %s%r]",
17368 REGNO (base), is_minus ? "-" : "",
17378 asm_fprintf (stream, "[%r, %s%r",
17379 REGNO (base), is_minus ? "-" : "",
17380 REGNO (XEXP (index, 0)));
17381 arm_print_operand (stream, index, 'S');
17382 fputs ("]", stream);
17387 gcc_unreachable ();
17390 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17391 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17393 extern enum machine_mode output_memory_reference_mode;
17395 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17397 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17398 asm_fprintf (stream, "[%r, #%s%d]!",
17399 REGNO (XEXP (x, 0)),
17400 GET_CODE (x) == PRE_DEC ? "-" : "",
17401 GET_MODE_SIZE (output_memory_reference_mode));
17403 asm_fprintf (stream, "[%r], #%s%d",
17404 REGNO (XEXP (x, 0)),
17405 GET_CODE (x) == POST_DEC ? "-" : "",
17406 GET_MODE_SIZE (output_memory_reference_mode));
17408 else if (GET_CODE (x) == PRE_MODIFY)
17410 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17411 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17412 asm_fprintf (stream, "#%wd]!",
17413 INTVAL (XEXP (XEXP (x, 1), 1)));
17415 asm_fprintf (stream, "%r]!",
17416 REGNO (XEXP (XEXP (x, 1), 1)));
17418 else if (GET_CODE (x) == POST_MODIFY)
17420 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17421 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17422 asm_fprintf (stream, "#%wd",
17423 INTVAL (XEXP (XEXP (x, 1), 1)));
17425 asm_fprintf (stream, "%r",
17426 REGNO (XEXP (XEXP (x, 1), 1)));
17428 else output_addr_const (stream, x);
17432 if (GET_CODE (x) == REG)
17433 asm_fprintf (stream, "[%r]", REGNO (x));
17434 else if (GET_CODE (x) == POST_INC)
17435 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17436 else if (GET_CODE (x) == PLUS)
17438 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17439 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17440 asm_fprintf (stream, "[%r, #%wd]",
17441 REGNO (XEXP (x, 0)),
17442 INTVAL (XEXP (x, 1)));
17444 asm_fprintf (stream, "[%r, %r]",
17445 REGNO (XEXP (x, 0)),
17446 REGNO (XEXP (x, 1)));
17449 output_addr_const (stream, x);
17453 /* Target hook for indicating whether a punctuation character for
17454 TARGET_PRINT_OPERAND is valid. */
17456 arm_print_operand_punct_valid_p (unsigned char code)
17458 return (code == '@' || code == '|' || code == '.'
17459 || code == '(' || code == ')' || code == '#'
17460 || (TARGET_32BIT && (code == '?'))
17461 || (TARGET_THUMB2 && (code == '!'))
17462 || (TARGET_THUMB && (code == '_')));
17465 /* Target hook for assembling integer objects. The ARM version needs to
17466 handle word-sized values specially. */
17468 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17470 enum machine_mode mode;
17472 if (size == UNITS_PER_WORD && aligned_p)
17474 fputs ("\t.word\t", asm_out_file);
17475 output_addr_const (asm_out_file, x);
17477 /* Mark symbols as position independent. We only do this in the
17478 .text segment, not in the .data segment. */
17479 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17480 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17482 /* See legitimize_pic_address for an explanation of the
17483 TARGET_VXWORKS_RTP check. */
17484 if (TARGET_VXWORKS_RTP
17485 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17486 fputs ("(GOT)", asm_out_file);
17488 fputs ("(GOTOFF)", asm_out_file);
17490 fputc ('\n', asm_out_file);
17494 mode = GET_MODE (x);
17496 if (arm_vector_mode_supported_p (mode))
17500 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17502 units = CONST_VECTOR_NUNITS (x);
17503 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17505 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17506 for (i = 0; i < units; i++)
17508 rtx elt = CONST_VECTOR_ELT (x, i);
17510 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17513 for (i = 0; i < units; i++)
17515 rtx elt = CONST_VECTOR_ELT (x, i);
17516 REAL_VALUE_TYPE rval;
17518 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17521 (rval, GET_MODE_INNER (mode),
17522 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17528 return default_assemble_integer (x, size, aligned_p);
17532 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17536 if (!TARGET_AAPCS_BASED)
17539 default_named_section_asm_out_constructor
17540 : default_named_section_asm_out_destructor) (symbol, priority);
17544 /* Put these in the .init_array section, using a special relocation. */
17545 if (priority != DEFAULT_INIT_PRIORITY)
17548 sprintf (buf, "%s.%.5u",
17549 is_ctor ? ".init_array" : ".fini_array",
17551 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17558 switch_to_section (s);
17559 assemble_align (POINTER_SIZE);
17560 fputs ("\t.word\t", asm_out_file);
17561 output_addr_const (asm_out_file, symbol);
17562 fputs ("(target1)\n", asm_out_file);
17565 /* Add a function to the list of static constructors. */
17568 arm_elf_asm_constructor (rtx symbol, int priority)
17570 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17573 /* Add a function to the list of static destructors. */
17576 arm_elf_asm_destructor (rtx symbol, int priority)
17578 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17581 /* A finite state machine takes care of noticing whether or not instructions
17582 can be conditionally executed, and thus decrease execution time and code
17583 size by deleting branch instructions. The fsm is controlled by
17584 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17586 /* The state of the fsm controlling condition codes are:
17587 0: normal, do nothing special
17588 1: make ASM_OUTPUT_OPCODE not output this instruction
17589 2: make ASM_OUTPUT_OPCODE not output this instruction
17590 3: make instructions conditional
17591 4: make instructions conditional
17593 State transitions (state->state by whom under condition):
17594 0 -> 1 final_prescan_insn if the `target' is a label
17595 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17596 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17597 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17598 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17599 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17600 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17601 (the target insn is arm_target_insn).
17603 If the jump clobbers the conditions then we use states 2 and 4.
17605 A similar thing can be done with conditional return insns.
17607 XXX In case the `target' is an unconditional branch, this conditionalising
17608 of the instructions always reduces code size, but not always execution
17609 time. But then, I want to reduce the code size to somewhere near what
17610 /bin/cc produces. */
17612 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17613 instructions. When a COND_EXEC instruction is seen the subsequent
17614 instructions are scanned so that multiple conditional instructions can be
17615 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17616 specify the length and true/false mask for the IT block. These will be
17617 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17619 /* Returns the index of the ARM condition code string in
17620 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17621 COMPARISON should be an rtx like `(eq (...) (...))'. */
17623 maybe_get_arm_condition_code (rtx comparison)
17625 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17626 enum arm_cond_code code;
17627 enum rtx_code comp_code = GET_CODE (comparison);
17629 if (GET_MODE_CLASS (mode) != MODE_CC)
17630 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17631 XEXP (comparison, 1));
17635 case CC_DNEmode: code = ARM_NE; goto dominance;
17636 case CC_DEQmode: code = ARM_EQ; goto dominance;
17637 case CC_DGEmode: code = ARM_GE; goto dominance;
17638 case CC_DGTmode: code = ARM_GT; goto dominance;
17639 case CC_DLEmode: code = ARM_LE; goto dominance;
17640 case CC_DLTmode: code = ARM_LT; goto dominance;
17641 case CC_DGEUmode: code = ARM_CS; goto dominance;
17642 case CC_DGTUmode: code = ARM_HI; goto dominance;
17643 case CC_DLEUmode: code = ARM_LS; goto dominance;
17644 case CC_DLTUmode: code = ARM_CC;
17647 if (comp_code == EQ)
17648 return ARM_INVERSE_CONDITION_CODE (code);
17649 if (comp_code == NE)
17656 case NE: return ARM_NE;
17657 case EQ: return ARM_EQ;
17658 case GE: return ARM_PL;
17659 case LT: return ARM_MI;
17660 default: return ARM_NV;
17666 case NE: return ARM_NE;
17667 case EQ: return ARM_EQ;
17668 default: return ARM_NV;
17674 case NE: return ARM_MI;
17675 case EQ: return ARM_PL;
17676 default: return ARM_NV;
17681 /* These encodings assume that AC=1 in the FPA system control
17682 byte. This allows us to handle all cases except UNEQ and
17686 case GE: return ARM_GE;
17687 case GT: return ARM_GT;
17688 case LE: return ARM_LS;
17689 case LT: return ARM_MI;
17690 case NE: return ARM_NE;
17691 case EQ: return ARM_EQ;
17692 case ORDERED: return ARM_VC;
17693 case UNORDERED: return ARM_VS;
17694 case UNLT: return ARM_LT;
17695 case UNLE: return ARM_LE;
17696 case UNGT: return ARM_HI;
17697 case UNGE: return ARM_PL;
17698 /* UNEQ and LTGT do not have a representation. */
17699 case UNEQ: /* Fall through. */
17700 case LTGT: /* Fall through. */
17701 default: return ARM_NV;
17707 case NE: return ARM_NE;
17708 case EQ: return ARM_EQ;
17709 case GE: return ARM_LE;
17710 case GT: return ARM_LT;
17711 case LE: return ARM_GE;
17712 case LT: return ARM_GT;
17713 case GEU: return ARM_LS;
17714 case GTU: return ARM_CC;
17715 case LEU: return ARM_CS;
17716 case LTU: return ARM_HI;
17717 default: return ARM_NV;
17723 case LTU: return ARM_CS;
17724 case GEU: return ARM_CC;
17725 default: return ARM_NV;
17731 case NE: return ARM_NE;
17732 case EQ: return ARM_EQ;
17733 case GEU: return ARM_CS;
17734 case GTU: return ARM_HI;
17735 case LEU: return ARM_LS;
17736 case LTU: return ARM_CC;
17737 default: return ARM_NV;
17743 case GE: return ARM_GE;
17744 case LT: return ARM_LT;
17745 case GEU: return ARM_CS;
17746 case LTU: return ARM_CC;
17747 default: return ARM_NV;
17753 case NE: return ARM_NE;
17754 case EQ: return ARM_EQ;
17755 case GE: return ARM_GE;
17756 case GT: return ARM_GT;
17757 case LE: return ARM_LE;
17758 case LT: return ARM_LT;
17759 case GEU: return ARM_CS;
17760 case GTU: return ARM_HI;
17761 case LEU: return ARM_LS;
17762 case LTU: return ARM_CC;
17763 default: return ARM_NV;
17766 default: gcc_unreachable ();
17770 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17771 static enum arm_cond_code
17772 get_arm_condition_code (rtx comparison)
17774 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17775 gcc_assert (code != ARM_NV);
17779 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17782 thumb2_final_prescan_insn (rtx insn)
17784 rtx first_insn = insn;
17785 rtx body = PATTERN (insn);
17787 enum arm_cond_code code;
17791 /* Remove the previous insn from the count of insns to be output. */
17792 if (arm_condexec_count)
17793 arm_condexec_count--;
17795 /* Nothing to do if we are already inside a conditional block. */
17796 if (arm_condexec_count)
17799 if (GET_CODE (body) != COND_EXEC)
17802 /* Conditional jumps are implemented directly. */
17803 if (GET_CODE (insn) == JUMP_INSN)
17806 predicate = COND_EXEC_TEST (body);
17807 arm_current_cc = get_arm_condition_code (predicate);
17809 n = get_attr_ce_count (insn);
17810 arm_condexec_count = 1;
17811 arm_condexec_mask = (1 << n) - 1;
17812 arm_condexec_masklen = n;
17813 /* See if subsequent instructions can be combined into the same block. */
17816 insn = next_nonnote_insn (insn);
17818 /* Jumping into the middle of an IT block is illegal, so a label or
17819 barrier terminates the block. */
17820 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17823 body = PATTERN (insn);
17824 /* USE and CLOBBER aren't really insns, so just skip them. */
17825 if (GET_CODE (body) == USE
17826 || GET_CODE (body) == CLOBBER)
17829 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17830 if (GET_CODE (body) != COND_EXEC)
17832 /* Allow up to 4 conditionally executed instructions in a block. */
17833 n = get_attr_ce_count (insn);
17834 if (arm_condexec_masklen + n > 4)
17837 predicate = COND_EXEC_TEST (body);
17838 code = get_arm_condition_code (predicate);
17839 mask = (1 << n) - 1;
17840 if (arm_current_cc == code)
17841 arm_condexec_mask |= (mask << arm_condexec_masklen);
17842 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17845 arm_condexec_count++;
17846 arm_condexec_masklen += n;
17848 /* A jump must be the last instruction in a conditional block. */
17849 if (GET_CODE(insn) == JUMP_INSN)
17852 /* Restore recog_data (getting the attributes of other insns can
17853 destroy this array, but final.c assumes that it remains intact
17854 across this call). */
17855 extract_constrain_insn_cached (first_insn);
17859 arm_final_prescan_insn (rtx insn)
17861 /* BODY will hold the body of INSN. */
17862 rtx body = PATTERN (insn);
17864 /* This will be 1 if trying to repeat the trick, and things need to be
17865 reversed if it appears to fail. */
17868 /* If we start with a return insn, we only succeed if we find another one. */
17869 int seeking_return = 0;
17870 enum rtx_code return_code = UNKNOWN;
17872 /* START_INSN will hold the insn from where we start looking. This is the
17873 first insn after the following code_label if REVERSE is true. */
17874 rtx start_insn = insn;
17876 /* If in state 4, check if the target branch is reached, in order to
17877 change back to state 0. */
17878 if (arm_ccfsm_state == 4)
17880 if (insn == arm_target_insn)
17882 arm_target_insn = NULL;
17883 arm_ccfsm_state = 0;
17888 /* If in state 3, it is possible to repeat the trick, if this insn is an
17889 unconditional branch to a label, and immediately following this branch
17890 is the previous target label which is only used once, and the label this
17891 branch jumps to is not too far off. */
17892 if (arm_ccfsm_state == 3)
17894 if (simplejump_p (insn))
17896 start_insn = next_nonnote_insn (start_insn);
17897 if (GET_CODE (start_insn) == BARRIER)
17899 /* XXX Isn't this always a barrier? */
17900 start_insn = next_nonnote_insn (start_insn);
17902 if (GET_CODE (start_insn) == CODE_LABEL
17903 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17904 && LABEL_NUSES (start_insn) == 1)
17909 else if (ANY_RETURN_P (body))
17911 start_insn = next_nonnote_insn (start_insn);
17912 if (GET_CODE (start_insn) == BARRIER)
17913 start_insn = next_nonnote_insn (start_insn);
17914 if (GET_CODE (start_insn) == CODE_LABEL
17915 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17916 && LABEL_NUSES (start_insn) == 1)
17919 seeking_return = 1;
17920 return_code = GET_CODE (body);
17929 gcc_assert (!arm_ccfsm_state || reverse);
17930 if (GET_CODE (insn) != JUMP_INSN)
17933 /* This jump might be paralleled with a clobber of the condition codes
17934 the jump should always come first */
17935 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17936 body = XVECEXP (body, 0, 0);
17939 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17940 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17943 int fail = FALSE, succeed = FALSE;
17944 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17945 int then_not_else = TRUE;
17946 rtx this_insn = start_insn, label = 0;
17948 /* Register the insn jumped to. */
17951 if (!seeking_return)
17952 label = XEXP (SET_SRC (body), 0);
17954 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17955 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17956 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17958 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17959 then_not_else = FALSE;
17961 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
17963 seeking_return = 1;
17964 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
17966 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
17968 seeking_return = 1;
17969 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
17970 then_not_else = FALSE;
17973 gcc_unreachable ();
17975 /* See how many insns this branch skips, and what kind of insns. If all
17976 insns are okay, and the label or unconditional branch to the same
17977 label is not too far away, succeed. */
17978 for (insns_skipped = 0;
17979 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17983 this_insn = next_nonnote_insn (this_insn);
17987 switch (GET_CODE (this_insn))
17990 /* Succeed if it is the target label, otherwise fail since
17991 control falls in from somewhere else. */
17992 if (this_insn == label)
17994 arm_ccfsm_state = 1;
18002 /* Succeed if the following insn is the target label.
18004 If return insns are used then the last insn in a function
18005 will be a barrier. */
18006 this_insn = next_nonnote_insn (this_insn);
18007 if (this_insn && this_insn == label)
18009 arm_ccfsm_state = 1;
18017 /* The AAPCS says that conditional calls should not be
18018 used since they make interworking inefficient (the
18019 linker can't transform BL<cond> into BLX). That's
18020 only a problem if the machine has BLX. */
18027 /* Succeed if the following insn is the target label, or
18028 if the following two insns are a barrier and the
18030 this_insn = next_nonnote_insn (this_insn);
18031 if (this_insn && GET_CODE (this_insn) == BARRIER)
18032 this_insn = next_nonnote_insn (this_insn);
18034 if (this_insn && this_insn == label
18035 && insns_skipped < max_insns_skipped)
18037 arm_ccfsm_state = 1;
18045 /* If this is an unconditional branch to the same label, succeed.
18046 If it is to another label, do nothing. If it is conditional,
18048 /* XXX Probably, the tests for SET and the PC are
18051 scanbody = PATTERN (this_insn);
18052 if (GET_CODE (scanbody) == SET
18053 && GET_CODE (SET_DEST (scanbody)) == PC)
18055 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18056 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18058 arm_ccfsm_state = 2;
18061 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18064 /* Fail if a conditional return is undesirable (e.g. on a
18065 StrongARM), but still allow this if optimizing for size. */
18066 else if (GET_CODE (scanbody) == return_code
18067 && !use_return_insn (TRUE, NULL)
18070 else if (GET_CODE (scanbody) == return_code)
18072 arm_ccfsm_state = 2;
18075 else if (GET_CODE (scanbody) == PARALLEL)
18077 switch (get_attr_conds (this_insn))
18087 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18092 /* Instructions using or affecting the condition codes make it
18094 scanbody = PATTERN (this_insn);
18095 if (!(GET_CODE (scanbody) == SET
18096 || GET_CODE (scanbody) == PARALLEL)
18097 || get_attr_conds (this_insn) != CONDS_NOCOND)
18100 /* A conditional cirrus instruction must be followed by
18101 a non Cirrus instruction. However, since we
18102 conditionalize instructions in this function and by
18103 the time we get here we can't add instructions
18104 (nops), because shorten_branches() has already been
18105 called, we will disable conditionalizing Cirrus
18106 instructions to be safe. */
18107 if (GET_CODE (scanbody) != USE
18108 && GET_CODE (scanbody) != CLOBBER
18109 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18119 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18120 arm_target_label = CODE_LABEL_NUMBER (label);
18123 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18125 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18127 this_insn = next_nonnote_insn (this_insn);
18128 gcc_assert (!this_insn
18129 || (GET_CODE (this_insn) != BARRIER
18130 && GET_CODE (this_insn) != CODE_LABEL));
18134 /* Oh, dear! we ran off the end.. give up. */
18135 extract_constrain_insn_cached (insn);
18136 arm_ccfsm_state = 0;
18137 arm_target_insn = NULL;
18140 arm_target_insn = this_insn;
18143 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18146 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18148 if (reverse || then_not_else)
18149 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18152 /* Restore recog_data (getting the attributes of other insns can
18153 destroy this array, but final.c assumes that it remains intact
18154 across this call. */
18155 extract_constrain_insn_cached (insn);
18159 /* Output IT instructions. */
18161 thumb2_asm_output_opcode (FILE * stream)
18166 if (arm_condexec_mask)
18168 for (n = 0; n < arm_condexec_masklen; n++)
18169 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18171 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18172 arm_condition_codes[arm_current_cc]);
18173 arm_condexec_mask = 0;
18177 /* Returns true if REGNO is a valid register
18178 for holding a quantity of type MODE. */
18180 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18182 if (GET_MODE_CLASS (mode) == MODE_CC)
18183 return (regno == CC_REGNUM
18184 || (TARGET_HARD_FLOAT && TARGET_VFP
18185 && regno == VFPCC_REGNUM));
18188 /* For the Thumb we only allow values bigger than SImode in
18189 registers 0 - 6, so that there is always a second low
18190 register available to hold the upper part of the value.
18191 We probably we ought to ensure that the register is the
18192 start of an even numbered register pair. */
18193 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18195 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18196 && IS_CIRRUS_REGNUM (regno))
18197 /* We have outlawed SI values in Cirrus registers because they
18198 reside in the lower 32 bits, but SF values reside in the
18199 upper 32 bits. This causes gcc all sorts of grief. We can't
18200 even split the registers into pairs because Cirrus SI values
18201 get sign extended to 64bits-- aldyh. */
18202 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18204 if (TARGET_HARD_FLOAT && TARGET_VFP
18205 && IS_VFP_REGNUM (regno))
18207 if (mode == SFmode || mode == SImode)
18208 return VFP_REGNO_OK_FOR_SINGLE (regno);
18210 if (mode == DFmode)
18211 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18213 /* VFP registers can hold HFmode values, but there is no point in
18214 putting them there unless we have hardware conversion insns. */
18215 if (mode == HFmode)
18216 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18219 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18220 || (VALID_NEON_QREG_MODE (mode)
18221 && NEON_REGNO_OK_FOR_QUAD (regno))
18222 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18223 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18224 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18225 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18226 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18231 if (TARGET_REALLY_IWMMXT)
18233 if (IS_IWMMXT_GR_REGNUM (regno))
18234 return mode == SImode;
18236 if (IS_IWMMXT_REGNUM (regno))
18237 return VALID_IWMMXT_REG_MODE (mode);
18240 /* We allow almost any value to be stored in the general registers.
18241 Restrict doubleword quantities to even register pairs so that we can
18242 use ldrd. Do not allow very large Neon structure opaque modes in
18243 general registers; they would use too many. */
18244 if (regno <= LAST_ARM_REGNUM)
18245 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18246 && ARM_NUM_REGS (mode) <= 4;
18248 if (regno == FRAME_POINTER_REGNUM
18249 || regno == ARG_POINTER_REGNUM)
18250 /* We only allow integers in the fake hard registers. */
18251 return GET_MODE_CLASS (mode) == MODE_INT;
18253 /* The only registers left are the FPA registers
18254 which we only allow to hold FP values. */
18255 return (TARGET_HARD_FLOAT && TARGET_FPA
18256 && GET_MODE_CLASS (mode) == MODE_FLOAT
18257 && regno >= FIRST_FPA_REGNUM
18258 && regno <= LAST_FPA_REGNUM);
18261 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18262 not used in arm mode. */
18265 arm_regno_class (int regno)
18269 if (regno == STACK_POINTER_REGNUM)
18271 if (regno == CC_REGNUM)
18278 if (TARGET_THUMB2 && regno < 8)
18281 if ( regno <= LAST_ARM_REGNUM
18282 || regno == FRAME_POINTER_REGNUM
18283 || regno == ARG_POINTER_REGNUM)
18284 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18286 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18287 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18289 if (IS_CIRRUS_REGNUM (regno))
18290 return CIRRUS_REGS;
18292 if (IS_VFP_REGNUM (regno))
18294 if (regno <= D7_VFP_REGNUM)
18295 return VFP_D0_D7_REGS;
18296 else if (regno <= LAST_LO_VFP_REGNUM)
18297 return VFP_LO_REGS;
18299 return VFP_HI_REGS;
18302 if (IS_IWMMXT_REGNUM (regno))
18303 return IWMMXT_REGS;
18305 if (IS_IWMMXT_GR_REGNUM (regno))
18306 return IWMMXT_GR_REGS;
18311 /* Handle a special case when computing the offset
18312 of an argument from the frame pointer. */
18314 arm_debugger_arg_offset (int value, rtx addr)
18318 /* We are only interested if dbxout_parms() failed to compute the offset. */
18322 /* We can only cope with the case where the address is held in a register. */
18323 if (GET_CODE (addr) != REG)
18326 /* If we are using the frame pointer to point at the argument, then
18327 an offset of 0 is correct. */
18328 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18331 /* If we are using the stack pointer to point at the
18332 argument, then an offset of 0 is correct. */
18333 /* ??? Check this is consistent with thumb2 frame layout. */
18334 if ((TARGET_THUMB || !frame_pointer_needed)
18335 && REGNO (addr) == SP_REGNUM)
18338 /* Oh dear. The argument is pointed to by a register rather
18339 than being held in a register, or being stored at a known
18340 offset from the frame pointer. Since GDB only understands
18341 those two kinds of argument we must translate the address
18342 held in the register into an offset from the frame pointer.
18343 We do this by searching through the insns for the function
18344 looking to see where this register gets its value. If the
18345 register is initialized from the frame pointer plus an offset
18346 then we are in luck and we can continue, otherwise we give up.
18348 This code is exercised by producing debugging information
18349 for a function with arguments like this:
18351 double func (double a, double b, int c, double d) {return d;}
18353 Without this code the stab for parameter 'd' will be set to
18354 an offset of 0 from the frame pointer, rather than 8. */
18356 /* The if() statement says:
18358 If the insn is a normal instruction
18359 and if the insn is setting the value in a register
18360 and if the register being set is the register holding the address of the argument
18361 and if the address is computing by an addition
18362 that involves adding to a register
18363 which is the frame pointer
18368 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18370 if ( GET_CODE (insn) == INSN
18371 && GET_CODE (PATTERN (insn)) == SET
18372 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18373 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18374 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18375 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18376 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18379 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18388 warning (0, "unable to compute real location of stacked parameter");
18389 value = 8; /* XXX magic hack */
18409 T_MAX /* Size of enum. Keep last. */
18410 } neon_builtin_type_mode;
18412 #define TYPE_MODE_BIT(X) (1 << (X))
18414 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18415 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18416 | TYPE_MODE_BIT (T_DI))
18417 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18418 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18419 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18421 #define v8qi_UP T_V8QI
18422 #define v4hi_UP T_V4HI
18423 #define v2si_UP T_V2SI
18424 #define v2sf_UP T_V2SF
18426 #define v16qi_UP T_V16QI
18427 #define v8hi_UP T_V8HI
18428 #define v4si_UP T_V4SI
18429 #define v4sf_UP T_V4SF
18430 #define v2di_UP T_V2DI
18435 #define UP(X) X##_UP
18468 NEON_LOADSTRUCTLANE,
18470 NEON_STORESTRUCTLANE,
18479 const neon_itype itype;
18480 const neon_builtin_type_mode mode;
18481 const enum insn_code code;
18482 unsigned int fcode;
18483 } neon_builtin_datum;
18485 #define CF(N,X) CODE_FOR_neon_##N##X
18487 #define VAR1(T, N, A) \
18488 {#N, NEON_##T, UP (A), CF (N, A), 0}
18489 #define VAR2(T, N, A, B) \
18491 {#N, NEON_##T, UP (B), CF (N, B), 0}
18492 #define VAR3(T, N, A, B, C) \
18493 VAR2 (T, N, A, B), \
18494 {#N, NEON_##T, UP (C), CF (N, C), 0}
18495 #define VAR4(T, N, A, B, C, D) \
18496 VAR3 (T, N, A, B, C), \
18497 {#N, NEON_##T, UP (D), CF (N, D), 0}
18498 #define VAR5(T, N, A, B, C, D, E) \
18499 VAR4 (T, N, A, B, C, D), \
18500 {#N, NEON_##T, UP (E), CF (N, E), 0}
18501 #define VAR6(T, N, A, B, C, D, E, F) \
18502 VAR5 (T, N, A, B, C, D, E), \
18503 {#N, NEON_##T, UP (F), CF (N, F), 0}
18504 #define VAR7(T, N, A, B, C, D, E, F, G) \
18505 VAR6 (T, N, A, B, C, D, E, F), \
18506 {#N, NEON_##T, UP (G), CF (N, G), 0}
18507 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18508 VAR7 (T, N, A, B, C, D, E, F, G), \
18509 {#N, NEON_##T, UP (H), CF (N, H), 0}
18510 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18511 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18512 {#N, NEON_##T, UP (I), CF (N, I), 0}
18513 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18514 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18515 {#N, NEON_##T, UP (J), CF (N, J), 0}
18517 /* The mode entries in the following table correspond to the "key" type of the
18518 instruction variant, i.e. equivalent to that which would be specified after
18519 the assembler mnemonic, which usually refers to the last vector operand.
18520 (Signed/unsigned/polynomial types are not differentiated between though, and
18521 are all mapped onto the same mode for a given element size.) The modes
18522 listed per instruction should be the same as those defined for that
18523 instruction's pattern in neon.md. */
18525 static neon_builtin_datum neon_builtin_data[] =
18527 VAR10 (BINOP, vadd,
18528 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18529 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18530 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18531 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18532 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18533 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18534 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18535 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18536 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18537 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18538 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18539 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18540 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18541 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18542 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18543 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18544 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18545 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18546 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18547 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18548 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18549 VAR2 (BINOP, vqdmull, v4hi, v2si),
18550 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18551 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18552 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18553 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18554 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18555 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18556 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18557 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18558 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18559 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18560 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18561 VAR10 (BINOP, vsub,
18562 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18563 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18564 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18565 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18566 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18567 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18568 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18569 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18570 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18571 VAR2 (BINOP, vcage, v2sf, v4sf),
18572 VAR2 (BINOP, vcagt, v2sf, v4sf),
18573 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18574 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18575 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18576 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18577 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18578 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18579 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18580 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18581 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18582 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18583 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18584 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18585 VAR2 (BINOP, vrecps, v2sf, v4sf),
18586 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18587 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18588 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18589 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18590 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18591 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18592 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18593 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18594 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18595 VAR2 (UNOP, vcnt, v8qi, v16qi),
18596 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18597 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18598 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18599 /* FIXME: vget_lane supports more variants than this! */
18600 VAR10 (GETLANE, vget_lane,
18601 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18602 VAR10 (SETLANE, vset_lane,
18603 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18604 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18605 VAR10 (DUP, vdup_n,
18606 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18607 VAR10 (DUPLANE, vdup_lane,
18608 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18609 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18610 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18611 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18612 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18613 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18614 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18615 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18616 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18617 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18618 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18619 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18620 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18621 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18622 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18623 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18624 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18625 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18626 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18627 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18628 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18629 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18630 VAR10 (BINOP, vext,
18631 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18632 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18633 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18634 VAR2 (UNOP, vrev16, v8qi, v16qi),
18635 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18636 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18637 VAR10 (SELECT, vbsl,
18638 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18639 VAR1 (VTBL, vtbl1, v8qi),
18640 VAR1 (VTBL, vtbl2, v8qi),
18641 VAR1 (VTBL, vtbl3, v8qi),
18642 VAR1 (VTBL, vtbl4, v8qi),
18643 VAR1 (VTBX, vtbx1, v8qi),
18644 VAR1 (VTBX, vtbx2, v8qi),
18645 VAR1 (VTBX, vtbx3, v8qi),
18646 VAR1 (VTBX, vtbx4, v8qi),
18647 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18648 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18649 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18650 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18651 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18652 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18653 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18654 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18655 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18656 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18657 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18658 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18659 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18660 VAR10 (LOAD1, vld1,
18661 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18662 VAR10 (LOAD1LANE, vld1_lane,
18663 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18664 VAR10 (LOAD1, vld1_dup,
18665 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18666 VAR10 (STORE1, vst1,
18667 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18668 VAR10 (STORE1LANE, vst1_lane,
18669 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18671 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18672 VAR7 (LOADSTRUCTLANE, vld2_lane,
18673 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18674 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18675 VAR9 (STORESTRUCT, vst2,
18676 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18677 VAR7 (STORESTRUCTLANE, vst2_lane,
18678 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18680 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18681 VAR7 (LOADSTRUCTLANE, vld3_lane,
18682 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18683 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18684 VAR9 (STORESTRUCT, vst3,
18685 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18686 VAR7 (STORESTRUCTLANE, vst3_lane,
18687 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18688 VAR9 (LOADSTRUCT, vld4,
18689 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18690 VAR7 (LOADSTRUCTLANE, vld4_lane,
18691 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18692 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18693 VAR9 (STORESTRUCT, vst4,
18694 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18695 VAR7 (STORESTRUCTLANE, vst4_lane,
18696 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18697 VAR10 (LOGICBINOP, vand,
18698 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18699 VAR10 (LOGICBINOP, vorr,
18700 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18701 VAR10 (BINOP, veor,
18702 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18703 VAR10 (LOGICBINOP, vbic,
18704 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18705 VAR10 (LOGICBINOP, vorn,
18706 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18721 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18722 symbolic names defined here (which would require too much duplication).
18726 ARM_BUILTIN_GETWCX,
18727 ARM_BUILTIN_SETWCX,
18731 ARM_BUILTIN_WAVG2BR,
18732 ARM_BUILTIN_WAVG2HR,
18733 ARM_BUILTIN_WAVG2B,
18734 ARM_BUILTIN_WAVG2H,
18741 ARM_BUILTIN_WMACSZ,
18743 ARM_BUILTIN_WMACUZ,
18746 ARM_BUILTIN_WSADBZ,
18748 ARM_BUILTIN_WSADHZ,
18750 ARM_BUILTIN_WALIGN,
18753 ARM_BUILTIN_TMIAPH,
18754 ARM_BUILTIN_TMIABB,
18755 ARM_BUILTIN_TMIABT,
18756 ARM_BUILTIN_TMIATB,
18757 ARM_BUILTIN_TMIATT,
18759 ARM_BUILTIN_TMOVMSKB,
18760 ARM_BUILTIN_TMOVMSKH,
18761 ARM_BUILTIN_TMOVMSKW,
18763 ARM_BUILTIN_TBCSTB,
18764 ARM_BUILTIN_TBCSTH,
18765 ARM_BUILTIN_TBCSTW,
18767 ARM_BUILTIN_WMADDS,
18768 ARM_BUILTIN_WMADDU,
18770 ARM_BUILTIN_WPACKHSS,
18771 ARM_BUILTIN_WPACKWSS,
18772 ARM_BUILTIN_WPACKDSS,
18773 ARM_BUILTIN_WPACKHUS,
18774 ARM_BUILTIN_WPACKWUS,
18775 ARM_BUILTIN_WPACKDUS,
18780 ARM_BUILTIN_WADDSSB,
18781 ARM_BUILTIN_WADDSSH,
18782 ARM_BUILTIN_WADDSSW,
18783 ARM_BUILTIN_WADDUSB,
18784 ARM_BUILTIN_WADDUSH,
18785 ARM_BUILTIN_WADDUSW,
18789 ARM_BUILTIN_WSUBSSB,
18790 ARM_BUILTIN_WSUBSSH,
18791 ARM_BUILTIN_WSUBSSW,
18792 ARM_BUILTIN_WSUBUSB,
18793 ARM_BUILTIN_WSUBUSH,
18794 ARM_BUILTIN_WSUBUSW,
18801 ARM_BUILTIN_WCMPEQB,
18802 ARM_BUILTIN_WCMPEQH,
18803 ARM_BUILTIN_WCMPEQW,
18804 ARM_BUILTIN_WCMPGTUB,
18805 ARM_BUILTIN_WCMPGTUH,
18806 ARM_BUILTIN_WCMPGTUW,
18807 ARM_BUILTIN_WCMPGTSB,
18808 ARM_BUILTIN_WCMPGTSH,
18809 ARM_BUILTIN_WCMPGTSW,
18811 ARM_BUILTIN_TEXTRMSB,
18812 ARM_BUILTIN_TEXTRMSH,
18813 ARM_BUILTIN_TEXTRMSW,
18814 ARM_BUILTIN_TEXTRMUB,
18815 ARM_BUILTIN_TEXTRMUH,
18816 ARM_BUILTIN_TEXTRMUW,
18817 ARM_BUILTIN_TINSRB,
18818 ARM_BUILTIN_TINSRH,
18819 ARM_BUILTIN_TINSRW,
18821 ARM_BUILTIN_WMAXSW,
18822 ARM_BUILTIN_WMAXSH,
18823 ARM_BUILTIN_WMAXSB,
18824 ARM_BUILTIN_WMAXUW,
18825 ARM_BUILTIN_WMAXUH,
18826 ARM_BUILTIN_WMAXUB,
18827 ARM_BUILTIN_WMINSW,
18828 ARM_BUILTIN_WMINSH,
18829 ARM_BUILTIN_WMINSB,
18830 ARM_BUILTIN_WMINUW,
18831 ARM_BUILTIN_WMINUH,
18832 ARM_BUILTIN_WMINUB,
18834 ARM_BUILTIN_WMULUM,
18835 ARM_BUILTIN_WMULSM,
18836 ARM_BUILTIN_WMULUL,
18838 ARM_BUILTIN_PSADBH,
18839 ARM_BUILTIN_WSHUFH,
18853 ARM_BUILTIN_WSLLHI,
18854 ARM_BUILTIN_WSLLWI,
18855 ARM_BUILTIN_WSLLDI,
18856 ARM_BUILTIN_WSRAHI,
18857 ARM_BUILTIN_WSRAWI,
18858 ARM_BUILTIN_WSRADI,
18859 ARM_BUILTIN_WSRLHI,
18860 ARM_BUILTIN_WSRLWI,
18861 ARM_BUILTIN_WSRLDI,
18862 ARM_BUILTIN_WRORHI,
18863 ARM_BUILTIN_WRORWI,
18864 ARM_BUILTIN_WRORDI,
18866 ARM_BUILTIN_WUNPCKIHB,
18867 ARM_BUILTIN_WUNPCKIHH,
18868 ARM_BUILTIN_WUNPCKIHW,
18869 ARM_BUILTIN_WUNPCKILB,
18870 ARM_BUILTIN_WUNPCKILH,
18871 ARM_BUILTIN_WUNPCKILW,
18873 ARM_BUILTIN_WUNPCKEHSB,
18874 ARM_BUILTIN_WUNPCKEHSH,
18875 ARM_BUILTIN_WUNPCKEHSW,
18876 ARM_BUILTIN_WUNPCKEHUB,
18877 ARM_BUILTIN_WUNPCKEHUH,
18878 ARM_BUILTIN_WUNPCKEHUW,
18879 ARM_BUILTIN_WUNPCKELSB,
18880 ARM_BUILTIN_WUNPCKELSH,
18881 ARM_BUILTIN_WUNPCKELSW,
18882 ARM_BUILTIN_WUNPCKELUB,
18883 ARM_BUILTIN_WUNPCKELUH,
18884 ARM_BUILTIN_WUNPCKELUW,
18886 ARM_BUILTIN_THREAD_POINTER,
18888 ARM_BUILTIN_NEON_BASE,
18890 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18893 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18896 arm_init_neon_builtins (void)
18898 unsigned int i, fcode;
18901 tree neon_intQI_type_node;
18902 tree neon_intHI_type_node;
18903 tree neon_polyQI_type_node;
18904 tree neon_polyHI_type_node;
18905 tree neon_intSI_type_node;
18906 tree neon_intDI_type_node;
18907 tree neon_float_type_node;
18909 tree intQI_pointer_node;
18910 tree intHI_pointer_node;
18911 tree intSI_pointer_node;
18912 tree intDI_pointer_node;
18913 tree float_pointer_node;
18915 tree const_intQI_node;
18916 tree const_intHI_node;
18917 tree const_intSI_node;
18918 tree const_intDI_node;
18919 tree const_float_node;
18921 tree const_intQI_pointer_node;
18922 tree const_intHI_pointer_node;
18923 tree const_intSI_pointer_node;
18924 tree const_intDI_pointer_node;
18925 tree const_float_pointer_node;
18927 tree V8QI_type_node;
18928 tree V4HI_type_node;
18929 tree V2SI_type_node;
18930 tree V2SF_type_node;
18931 tree V16QI_type_node;
18932 tree V8HI_type_node;
18933 tree V4SI_type_node;
18934 tree V4SF_type_node;
18935 tree V2DI_type_node;
18937 tree intUQI_type_node;
18938 tree intUHI_type_node;
18939 tree intUSI_type_node;
18940 tree intUDI_type_node;
18942 tree intEI_type_node;
18943 tree intOI_type_node;
18944 tree intCI_type_node;
18945 tree intXI_type_node;
18947 tree V8QI_pointer_node;
18948 tree V4HI_pointer_node;
18949 tree V2SI_pointer_node;
18950 tree V2SF_pointer_node;
18951 tree V16QI_pointer_node;
18952 tree V8HI_pointer_node;
18953 tree V4SI_pointer_node;
18954 tree V4SF_pointer_node;
18955 tree V2DI_pointer_node;
18957 tree void_ftype_pv8qi_v8qi_v8qi;
18958 tree void_ftype_pv4hi_v4hi_v4hi;
18959 tree void_ftype_pv2si_v2si_v2si;
18960 tree void_ftype_pv2sf_v2sf_v2sf;
18961 tree void_ftype_pdi_di_di;
18962 tree void_ftype_pv16qi_v16qi_v16qi;
18963 tree void_ftype_pv8hi_v8hi_v8hi;
18964 tree void_ftype_pv4si_v4si_v4si;
18965 tree void_ftype_pv4sf_v4sf_v4sf;
18966 tree void_ftype_pv2di_v2di_v2di;
18968 tree reinterp_ftype_dreg[5][5];
18969 tree reinterp_ftype_qreg[5][5];
18970 tree dreg_types[5], qreg_types[5];
18972 /* Create distinguished type nodes for NEON vector element types,
18973 and pointers to values of such types, so we can detect them later. */
18974 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18975 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18976 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18977 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18978 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18979 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18980 neon_float_type_node = make_node (REAL_TYPE);
18981 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18982 layout_type (neon_float_type_node);
18984 /* Define typedefs which exactly correspond to the modes we are basing vector
18985 types on. If you change these names you'll need to change
18986 the table used by arm_mangle_type too. */
18987 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18988 "__builtin_neon_qi");
18989 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18990 "__builtin_neon_hi");
18991 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18992 "__builtin_neon_si");
18993 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18994 "__builtin_neon_sf");
18995 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18996 "__builtin_neon_di");
18997 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18998 "__builtin_neon_poly8");
18999 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19000 "__builtin_neon_poly16");
19002 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19003 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19004 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19005 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19006 float_pointer_node = build_pointer_type (neon_float_type_node);
19008 /* Next create constant-qualified versions of the above types. */
19009 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19011 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19013 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19015 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19017 const_float_node = build_qualified_type (neon_float_type_node,
19020 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19021 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19022 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19023 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19024 const_float_pointer_node = build_pointer_type (const_float_node);
19026 /* Now create vector types based on our NEON element types. */
19027 /* 64-bit vectors. */
19029 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19031 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19033 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19035 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19036 /* 128-bit vectors. */
19038 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19040 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19042 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19044 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19046 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19048 /* Unsigned integer types for various mode sizes. */
19049 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19050 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19051 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19052 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19054 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19055 "__builtin_neon_uqi");
19056 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19057 "__builtin_neon_uhi");
19058 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19059 "__builtin_neon_usi");
19060 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19061 "__builtin_neon_udi");
19063 /* Opaque integer types for structures of vectors. */
19064 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19065 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19066 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19067 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19069 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19070 "__builtin_neon_ti");
19071 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19072 "__builtin_neon_ei");
19073 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19074 "__builtin_neon_oi");
19075 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19076 "__builtin_neon_ci");
19077 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19078 "__builtin_neon_xi");
19080 /* Pointers to vector types. */
19081 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19082 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19083 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19084 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19085 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19086 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19087 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19088 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19089 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19091 /* Operations which return results as pairs. */
19092 void_ftype_pv8qi_v8qi_v8qi =
19093 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19094 V8QI_type_node, NULL);
19095 void_ftype_pv4hi_v4hi_v4hi =
19096 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19097 V4HI_type_node, NULL);
19098 void_ftype_pv2si_v2si_v2si =
19099 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19100 V2SI_type_node, NULL);
19101 void_ftype_pv2sf_v2sf_v2sf =
19102 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19103 V2SF_type_node, NULL);
19104 void_ftype_pdi_di_di =
19105 build_function_type_list (void_type_node, intDI_pointer_node,
19106 neon_intDI_type_node, neon_intDI_type_node, NULL);
19107 void_ftype_pv16qi_v16qi_v16qi =
19108 build_function_type_list (void_type_node, V16QI_pointer_node,
19109 V16QI_type_node, V16QI_type_node, NULL);
19110 void_ftype_pv8hi_v8hi_v8hi =
19111 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19112 V8HI_type_node, NULL);
19113 void_ftype_pv4si_v4si_v4si =
19114 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19115 V4SI_type_node, NULL);
19116 void_ftype_pv4sf_v4sf_v4sf =
19117 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19118 V4SF_type_node, NULL);
19119 void_ftype_pv2di_v2di_v2di =
19120 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19121 V2DI_type_node, NULL);
19123 dreg_types[0] = V8QI_type_node;
19124 dreg_types[1] = V4HI_type_node;
19125 dreg_types[2] = V2SI_type_node;
19126 dreg_types[3] = V2SF_type_node;
19127 dreg_types[4] = neon_intDI_type_node;
19129 qreg_types[0] = V16QI_type_node;
19130 qreg_types[1] = V8HI_type_node;
19131 qreg_types[2] = V4SI_type_node;
19132 qreg_types[3] = V4SF_type_node;
19133 qreg_types[4] = V2DI_type_node;
19135 for (i = 0; i < 5; i++)
19138 for (j = 0; j < 5; j++)
19140 reinterp_ftype_dreg[i][j]
19141 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19142 reinterp_ftype_qreg[i][j]
19143 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19147 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19148 i < ARRAY_SIZE (neon_builtin_data);
19151 neon_builtin_datum *d = &neon_builtin_data[i];
19153 const char* const modenames[] = {
19154 "v8qi", "v4hi", "v2si", "v2sf", "di",
19155 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19160 int is_load = 0, is_store = 0;
19162 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19169 case NEON_LOAD1LANE:
19170 case NEON_LOADSTRUCT:
19171 case NEON_LOADSTRUCTLANE:
19173 /* Fall through. */
19175 case NEON_STORE1LANE:
19176 case NEON_STORESTRUCT:
19177 case NEON_STORESTRUCTLANE:
19180 /* Fall through. */
19183 case NEON_LOGICBINOP:
19184 case NEON_SHIFTINSERT:
19191 case NEON_SHIFTIMM:
19192 case NEON_SHIFTACC:
19198 case NEON_LANEMULL:
19199 case NEON_LANEMULH:
19201 case NEON_SCALARMUL:
19202 case NEON_SCALARMULL:
19203 case NEON_SCALARMULH:
19204 case NEON_SCALARMAC:
19210 tree return_type = void_type_node, args = void_list_node;
19212 /* Build a function type directly from the insn_data for
19213 this builtin. The build_function_type() function takes
19214 care of removing duplicates for us. */
19215 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19219 if (is_load && k == 1)
19221 /* Neon load patterns always have the memory
19222 operand in the operand 1 position. */
19223 gcc_assert (insn_data[d->code].operand[k].predicate
19224 == neon_struct_operand);
19230 eltype = const_intQI_pointer_node;
19235 eltype = const_intHI_pointer_node;
19240 eltype = const_intSI_pointer_node;
19245 eltype = const_float_pointer_node;
19250 eltype = const_intDI_pointer_node;
19253 default: gcc_unreachable ();
19256 else if (is_store && k == 0)
19258 /* Similarly, Neon store patterns use operand 0 as
19259 the memory location to store to. */
19260 gcc_assert (insn_data[d->code].operand[k].predicate
19261 == neon_struct_operand);
19267 eltype = intQI_pointer_node;
19272 eltype = intHI_pointer_node;
19277 eltype = intSI_pointer_node;
19282 eltype = float_pointer_node;
19287 eltype = intDI_pointer_node;
19290 default: gcc_unreachable ();
19295 switch (insn_data[d->code].operand[k].mode)
19297 case VOIDmode: eltype = void_type_node; break;
19299 case QImode: eltype = neon_intQI_type_node; break;
19300 case HImode: eltype = neon_intHI_type_node; break;
19301 case SImode: eltype = neon_intSI_type_node; break;
19302 case SFmode: eltype = neon_float_type_node; break;
19303 case DImode: eltype = neon_intDI_type_node; break;
19304 case TImode: eltype = intTI_type_node; break;
19305 case EImode: eltype = intEI_type_node; break;
19306 case OImode: eltype = intOI_type_node; break;
19307 case CImode: eltype = intCI_type_node; break;
19308 case XImode: eltype = intXI_type_node; break;
19309 /* 64-bit vectors. */
19310 case V8QImode: eltype = V8QI_type_node; break;
19311 case V4HImode: eltype = V4HI_type_node; break;
19312 case V2SImode: eltype = V2SI_type_node; break;
19313 case V2SFmode: eltype = V2SF_type_node; break;
19314 /* 128-bit vectors. */
19315 case V16QImode: eltype = V16QI_type_node; break;
19316 case V8HImode: eltype = V8HI_type_node; break;
19317 case V4SImode: eltype = V4SI_type_node; break;
19318 case V4SFmode: eltype = V4SF_type_node; break;
19319 case V2DImode: eltype = V2DI_type_node; break;
19320 default: gcc_unreachable ();
19324 if (k == 0 && !is_store)
19325 return_type = eltype;
19327 args = tree_cons (NULL_TREE, eltype, args);
19330 ftype = build_function_type (return_type, args);
19334 case NEON_RESULTPAIR:
19336 switch (insn_data[d->code].operand[1].mode)
19338 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19339 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19340 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19341 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19342 case DImode: ftype = void_ftype_pdi_di_di; break;
19343 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19344 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19345 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19346 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19347 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19348 default: gcc_unreachable ();
19353 case NEON_REINTERP:
19355 /* We iterate over 5 doubleword types, then 5 quadword
19357 int rhs = d->mode % 5;
19358 switch (insn_data[d->code].operand[0].mode)
19360 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19361 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19362 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19363 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19364 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19365 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19366 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19367 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19368 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19369 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19370 default: gcc_unreachable ();
19376 gcc_unreachable ();
19379 gcc_assert (ftype != NULL);
19381 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19383 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19385 arm_builtin_decls[fcode] = decl;
19389 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19392 if ((MASK) & insn_flags) \
19395 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19396 BUILT_IN_MD, NULL, NULL_TREE); \
19397 arm_builtin_decls[CODE] = bdecl; \
19402 struct builtin_description
19404 const unsigned int mask;
19405 const enum insn_code icode;
19406 const char * const name;
19407 const enum arm_builtins code;
19408 const enum rtx_code comparison;
19409 const unsigned int flag;
19412 static const struct builtin_description bdesc_2arg[] =
19414 #define IWMMXT_BUILTIN(code, string, builtin) \
19415 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19416 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19418 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19419 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19420 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19421 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19422 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19423 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19424 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19425 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19426 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19427 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19428 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19429 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19430 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19431 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19432 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19433 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19434 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19435 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19436 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19437 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19438 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19439 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19440 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19441 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19442 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19443 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19444 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19445 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19446 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19447 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19448 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19449 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19450 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19451 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19452 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19453 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19454 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19455 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19456 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19457 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19458 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19459 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19460 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19461 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19462 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19463 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19464 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19465 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19466 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19467 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19468 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19469 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19470 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19471 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19472 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19473 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19474 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19475 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19477 #define IWMMXT_BUILTIN2(code, builtin) \
19478 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19480 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19481 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19482 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19483 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19484 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19485 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19486 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
19487 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
19488 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
19489 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
19490 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
19491 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
19492 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
19493 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
19494 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
19495 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
19496 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
19497 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
19498 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
19499 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
19500 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
19501 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
19502 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
19503 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
19504 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
19505 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
19506 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
19507 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
19508 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
19509 IWMMXT_BUILTIN2 (rordi3, WRORDI)
19510 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19511 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19514 static const struct builtin_description bdesc_1arg[] =
19516 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19517 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19518 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19519 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19520 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19521 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19522 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19523 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19524 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19525 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19526 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19527 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19528 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19529 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19530 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19531 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19532 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19533 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19536 /* Set up all the iWMMXt builtins. This is not called if
19537 TARGET_IWMMXT is zero. */
19540 arm_init_iwmmxt_builtins (void)
19542 const struct builtin_description * d;
19545 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19546 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19547 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19550 = build_function_type_list (integer_type_node,
19551 integer_type_node, NULL_TREE);
19552 tree v8qi_ftype_v8qi_v8qi_int
19553 = build_function_type_list (V8QI_type_node,
19554 V8QI_type_node, V8QI_type_node,
19555 integer_type_node, NULL_TREE);
19556 tree v4hi_ftype_v4hi_int
19557 = build_function_type_list (V4HI_type_node,
19558 V4HI_type_node, integer_type_node, NULL_TREE);
19559 tree v2si_ftype_v2si_int
19560 = build_function_type_list (V2SI_type_node,
19561 V2SI_type_node, integer_type_node, NULL_TREE);
19562 tree v2si_ftype_di_di
19563 = build_function_type_list (V2SI_type_node,
19564 long_long_integer_type_node,
19565 long_long_integer_type_node,
19567 tree di_ftype_di_int
19568 = build_function_type_list (long_long_integer_type_node,
19569 long_long_integer_type_node,
19570 integer_type_node, NULL_TREE);
19571 tree di_ftype_di_int_int
19572 = build_function_type_list (long_long_integer_type_node,
19573 long_long_integer_type_node,
19575 integer_type_node, NULL_TREE);
19576 tree int_ftype_v8qi
19577 = build_function_type_list (integer_type_node,
19578 V8QI_type_node, NULL_TREE);
19579 tree int_ftype_v4hi
19580 = build_function_type_list (integer_type_node,
19581 V4HI_type_node, NULL_TREE);
19582 tree int_ftype_v2si
19583 = build_function_type_list (integer_type_node,
19584 V2SI_type_node, NULL_TREE);
19585 tree int_ftype_v8qi_int
19586 = build_function_type_list (integer_type_node,
19587 V8QI_type_node, integer_type_node, NULL_TREE);
19588 tree int_ftype_v4hi_int
19589 = build_function_type_list (integer_type_node,
19590 V4HI_type_node, integer_type_node, NULL_TREE);
19591 tree int_ftype_v2si_int
19592 = build_function_type_list (integer_type_node,
19593 V2SI_type_node, integer_type_node, NULL_TREE);
19594 tree v8qi_ftype_v8qi_int_int
19595 = build_function_type_list (V8QI_type_node,
19596 V8QI_type_node, integer_type_node,
19597 integer_type_node, NULL_TREE);
19598 tree v4hi_ftype_v4hi_int_int
19599 = build_function_type_list (V4HI_type_node,
19600 V4HI_type_node, integer_type_node,
19601 integer_type_node, NULL_TREE);
19602 tree v2si_ftype_v2si_int_int
19603 = build_function_type_list (V2SI_type_node,
19604 V2SI_type_node, integer_type_node,
19605 integer_type_node, NULL_TREE);
19606 /* Miscellaneous. */
19607 tree v8qi_ftype_v4hi_v4hi
19608 = build_function_type_list (V8QI_type_node,
19609 V4HI_type_node, V4HI_type_node, NULL_TREE);
19610 tree v4hi_ftype_v2si_v2si
19611 = build_function_type_list (V4HI_type_node,
19612 V2SI_type_node, V2SI_type_node, NULL_TREE);
19613 tree v2si_ftype_v4hi_v4hi
19614 = build_function_type_list (V2SI_type_node,
19615 V4HI_type_node, V4HI_type_node, NULL_TREE);
19616 tree v2si_ftype_v8qi_v8qi
19617 = build_function_type_list (V2SI_type_node,
19618 V8QI_type_node, V8QI_type_node, NULL_TREE);
19619 tree v4hi_ftype_v4hi_di
19620 = build_function_type_list (V4HI_type_node,
19621 V4HI_type_node, long_long_integer_type_node,
19623 tree v2si_ftype_v2si_di
19624 = build_function_type_list (V2SI_type_node,
19625 V2SI_type_node, long_long_integer_type_node,
19627 tree void_ftype_int_int
19628 = build_function_type_list (void_type_node,
19629 integer_type_node, integer_type_node,
19632 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19634 = build_function_type_list (long_long_integer_type_node,
19635 V8QI_type_node, NULL_TREE);
19637 = build_function_type_list (long_long_integer_type_node,
19638 V4HI_type_node, NULL_TREE);
19640 = build_function_type_list (long_long_integer_type_node,
19641 V2SI_type_node, NULL_TREE);
19642 tree v2si_ftype_v4hi
19643 = build_function_type_list (V2SI_type_node,
19644 V4HI_type_node, NULL_TREE);
19645 tree v4hi_ftype_v8qi
19646 = build_function_type_list (V4HI_type_node,
19647 V8QI_type_node, NULL_TREE);
19649 tree di_ftype_di_v4hi_v4hi
19650 = build_function_type_list (long_long_unsigned_type_node,
19651 long_long_unsigned_type_node,
19652 V4HI_type_node, V4HI_type_node,
19655 tree di_ftype_v4hi_v4hi
19656 = build_function_type_list (long_long_unsigned_type_node,
19657 V4HI_type_node,V4HI_type_node,
19660 /* Normal vector binops. */
19661 tree v8qi_ftype_v8qi_v8qi
19662 = build_function_type_list (V8QI_type_node,
19663 V8QI_type_node, V8QI_type_node, NULL_TREE);
19664 tree v4hi_ftype_v4hi_v4hi
19665 = build_function_type_list (V4HI_type_node,
19666 V4HI_type_node,V4HI_type_node, NULL_TREE);
19667 tree v2si_ftype_v2si_v2si
19668 = build_function_type_list (V2SI_type_node,
19669 V2SI_type_node, V2SI_type_node, NULL_TREE);
19670 tree di_ftype_di_di
19671 = build_function_type_list (long_long_unsigned_type_node,
19672 long_long_unsigned_type_node,
19673 long_long_unsigned_type_node,
19676 /* Add all builtins that are more or less simple operations on two
19678 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19680 /* Use one of the operands; the target can have a different mode for
19681 mask-generating compares. */
19682 enum machine_mode mode;
19688 mode = insn_data[d->icode].operand[1].mode;
19693 type = v8qi_ftype_v8qi_v8qi;
19696 type = v4hi_ftype_v4hi_v4hi;
19699 type = v2si_ftype_v2si_v2si;
19702 type = di_ftype_di_di;
19706 gcc_unreachable ();
19709 def_mbuiltin (d->mask, d->name, type, d->code);
19712 /* Add the remaining MMX insns with somewhat more complicated types. */
19713 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19714 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19715 ARM_BUILTIN_ ## CODE)
19717 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19718 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19719 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19721 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19722 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19723 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19724 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19725 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19726 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19728 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19729 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19730 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19731 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19732 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19733 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19735 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19736 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19737 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19738 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19739 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19740 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19742 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19743 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19744 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19745 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19746 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19747 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19749 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19751 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19752 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19753 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19754 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19756 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19757 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19758 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19759 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19760 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19761 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19762 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19763 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19764 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19766 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19767 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19768 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19770 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19771 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19772 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19774 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19775 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19776 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19777 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19778 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19779 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19781 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19782 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19783 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19784 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19785 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19786 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19787 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19788 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19789 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19790 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19791 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19792 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19794 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19795 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19796 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19797 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19799 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19800 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19801 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19802 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19803 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19804 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19805 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19807 #undef iwmmx_mbuiltin
19811 arm_init_tls_builtins (void)
19815 ftype = build_function_type (ptr_type_node, void_list_node);
19816 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19817 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19819 TREE_NOTHROW (decl) = 1;
19820 TREE_READONLY (decl) = 1;
19821 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19825 arm_init_fp16_builtins (void)
19827 tree fp16_type = make_node (REAL_TYPE);
19828 TYPE_PRECISION (fp16_type) = 16;
19829 layout_type (fp16_type);
19830 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19834 arm_init_builtins (void)
19836 arm_init_tls_builtins ();
19838 if (TARGET_REALLY_IWMMXT)
19839 arm_init_iwmmxt_builtins ();
19842 arm_init_neon_builtins ();
19844 if (arm_fp16_format)
19845 arm_init_fp16_builtins ();
19848 /* Return the ARM builtin for CODE. */
19851 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19853 if (code >= ARM_BUILTIN_MAX)
19854 return error_mark_node;
19856 return arm_builtin_decls[code];
19859 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19861 static const char *
19862 arm_invalid_parameter_type (const_tree t)
19864 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19865 return N_("function parameters cannot have __fp16 type");
19869 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19871 static const char *
19872 arm_invalid_return_type (const_tree t)
19874 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19875 return N_("functions cannot return __fp16 type");
19879 /* Implement TARGET_PROMOTED_TYPE. */
19882 arm_promoted_type (const_tree t)
19884 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19885 return float_type_node;
19889 /* Implement TARGET_CONVERT_TO_TYPE.
19890 Specifically, this hook implements the peculiarity of the ARM
19891 half-precision floating-point C semantics that requires conversions between
19892 __fp16 to or from double to do an intermediate conversion to float. */
19895 arm_convert_to_type (tree type, tree expr)
19897 tree fromtype = TREE_TYPE (expr);
19898 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19900 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19901 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19902 return convert (type, convert (float_type_node, expr));
19906 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19907 This simply adds HFmode as a supported mode; even though we don't
19908 implement arithmetic on this type directly, it's supported by
19909 optabs conversions, much the way the double-word arithmetic is
19910 special-cased in the default hook. */
19913 arm_scalar_mode_supported_p (enum machine_mode mode)
19915 if (mode == HFmode)
19916 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19917 else if (ALL_FIXED_POINT_MODE_P (mode))
19920 return default_scalar_mode_supported_p (mode);
19923 /* Errors in the source file can cause expand_expr to return const0_rtx
19924 where we expect a vector. To avoid crashing, use one of the vector
19925 clear instructions. */
19928 safe_vector_operand (rtx x, enum machine_mode mode)
19930 if (x != const0_rtx)
19932 x = gen_reg_rtx (mode);
19934 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19935 : gen_rtx_SUBREG (DImode, x, 0)));
19939 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19942 arm_expand_binop_builtin (enum insn_code icode,
19943 tree exp, rtx target)
19946 tree arg0 = CALL_EXPR_ARG (exp, 0);
19947 tree arg1 = CALL_EXPR_ARG (exp, 1);
19948 rtx op0 = expand_normal (arg0);
19949 rtx op1 = expand_normal (arg1);
19950 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19951 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19952 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19954 if (VECTOR_MODE_P (mode0))
19955 op0 = safe_vector_operand (op0, mode0);
19956 if (VECTOR_MODE_P (mode1))
19957 op1 = safe_vector_operand (op1, mode1);
19960 || GET_MODE (target) != tmode
19961 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19962 target = gen_reg_rtx (tmode);
19964 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19967 op0 = copy_to_mode_reg (mode0, op0);
19968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19969 op1 = copy_to_mode_reg (mode1, op1);
19971 pat = GEN_FCN (icode) (target, op0, op1);
19978 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19981 arm_expand_unop_builtin (enum insn_code icode,
19982 tree exp, rtx target, int do_load)
19985 tree arg0 = CALL_EXPR_ARG (exp, 0);
19986 rtx op0 = expand_normal (arg0);
19987 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19988 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19991 || GET_MODE (target) != tmode
19992 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19993 target = gen_reg_rtx (tmode);
19995 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19998 if (VECTOR_MODE_P (mode0))
19999 op0 = safe_vector_operand (op0, mode0);
20001 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20002 op0 = copy_to_mode_reg (mode0, op0);
20005 pat = GEN_FCN (icode) (target, op0);
20013 NEON_ARG_COPY_TO_REG,
20019 #define NEON_MAX_BUILTIN_ARGS 5
20021 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20022 and return an expression for the accessed memory.
20024 The intrinsic function operates on a block of registers that has
20025 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20026 The function references the memory at EXP in mode MEM_MODE;
20027 this mode may be BLKmode if no more suitable mode is available. */
20030 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20031 enum machine_mode reg_mode,
20032 neon_builtin_type_mode type_mode)
20034 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20035 tree elem_type, upper_bound, array_type;
20037 /* Work out the size of the register block in bytes. */
20038 reg_size = GET_MODE_SIZE (reg_mode);
20040 /* Work out the size of each vector in bytes. */
20041 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20042 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20044 /* Work out how many vectors there are. */
20045 gcc_assert (reg_size % vector_size == 0);
20046 nvectors = reg_size / vector_size;
20048 /* Work out how many elements are being loaded or stored.
20049 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20050 and memory elements; anything else implies a lane load or store. */
20051 if (mem_mode == reg_mode)
20052 nelems = vector_size * nvectors;
20056 /* Work out the type of each element. */
20057 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20058 elem_type = TREE_TYPE (TREE_TYPE (exp));
20060 /* Create a type that describes the full access. */
20061 upper_bound = build_int_cst (size_type_node, nelems - 1);
20062 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20064 /* Dereference EXP using that type. */
20065 exp = convert (build_pointer_type (array_type), exp);
20066 return fold_build2 (MEM_REF, array_type, exp,
20067 build_int_cst (TREE_TYPE (exp), 0));
20070 /* Expand a Neon builtin. */
20072 arm_expand_neon_args (rtx target, int icode, int have_retval,
20073 neon_builtin_type_mode type_mode,
20078 tree arg[NEON_MAX_BUILTIN_ARGS];
20079 rtx op[NEON_MAX_BUILTIN_ARGS];
20080 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20081 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20082 enum machine_mode other_mode;
20088 || GET_MODE (target) != tmode
20089 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20090 target = gen_reg_rtx (tmode);
20092 va_start (ap, exp);
20096 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20098 if (thisarg == NEON_ARG_STOP)
20102 opno = argc + have_retval;
20103 mode[argc] = insn_data[icode].operand[opno].mode;
20104 arg[argc] = CALL_EXPR_ARG (exp, argc);
20105 if (thisarg == NEON_ARG_MEMORY)
20107 other_mode = insn_data[icode].operand[1 - opno].mode;
20108 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20109 other_mode, type_mode);
20111 op[argc] = expand_normal (arg[argc]);
20115 case NEON_ARG_COPY_TO_REG:
20116 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20117 if (!(*insn_data[icode].operand[opno].predicate)
20118 (op[argc], mode[argc]))
20119 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20122 case NEON_ARG_CONSTANT:
20123 /* FIXME: This error message is somewhat unhelpful. */
20124 if (!(*insn_data[icode].operand[opno].predicate)
20125 (op[argc], mode[argc]))
20126 error ("argument must be a constant");
20129 case NEON_ARG_MEMORY:
20130 gcc_assert (MEM_P (op[argc]));
20131 PUT_MODE (op[argc], mode[argc]);
20132 /* ??? arm_neon.h uses the same built-in functions for signed
20133 and unsigned accesses, casting where necessary. This isn't
20135 set_mem_alias_set (op[argc], 0);
20136 if (!(*insn_data[icode].operand[opno].predicate)
20137 (op[argc], mode[argc]))
20138 op[argc] = (replace_equiv_address
20139 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20142 case NEON_ARG_STOP:
20143 gcc_unreachable ();
20156 pat = GEN_FCN (icode) (target, op[0]);
20160 pat = GEN_FCN (icode) (target, op[0], op[1]);
20164 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20168 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20172 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20176 gcc_unreachable ();
20182 pat = GEN_FCN (icode) (op[0]);
20186 pat = GEN_FCN (icode) (op[0], op[1]);
20190 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20194 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20198 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20202 gcc_unreachable ();
20213 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20214 constants defined per-instruction or per instruction-variant. Instead, the
20215 required info is looked up in the table neon_builtin_data. */
20217 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20219 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20220 neon_itype itype = d->itype;
20221 enum insn_code icode = d->code;
20222 neon_builtin_type_mode type_mode = d->mode;
20229 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20230 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20234 case NEON_SCALARMUL:
20235 case NEON_SCALARMULL:
20236 case NEON_SCALARMULH:
20237 case NEON_SHIFTINSERT:
20238 case NEON_LOGICBINOP:
20239 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20240 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20244 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20245 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20246 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20250 case NEON_SHIFTIMM:
20251 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20252 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20256 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20257 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20261 case NEON_REINTERP:
20262 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20263 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20267 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20268 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20270 case NEON_RESULTPAIR:
20271 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20272 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20276 case NEON_LANEMULL:
20277 case NEON_LANEMULH:
20278 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20279 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20280 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20283 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20284 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20285 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20287 case NEON_SHIFTACC:
20288 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20289 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20290 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20292 case NEON_SCALARMAC:
20293 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20294 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20295 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20299 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20300 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20304 case NEON_LOADSTRUCT:
20305 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20306 NEON_ARG_MEMORY, NEON_ARG_STOP);
20308 case NEON_LOAD1LANE:
20309 case NEON_LOADSTRUCTLANE:
20310 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20311 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20315 case NEON_STORESTRUCT:
20316 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20317 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20319 case NEON_STORE1LANE:
20320 case NEON_STORESTRUCTLANE:
20321 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20322 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20326 gcc_unreachable ();
20329 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20331 neon_reinterpret (rtx dest, rtx src)
20333 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20336 /* Emit code to place a Neon pair result in memory locations (with equal
20339 neon_emit_pair_result_insn (enum machine_mode mode,
20340 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20343 rtx mem = gen_rtx_MEM (mode, destaddr);
20344 rtx tmp1 = gen_reg_rtx (mode);
20345 rtx tmp2 = gen_reg_rtx (mode);
20347 emit_insn (intfn (tmp1, op1, op2, tmp2));
20349 emit_move_insn (mem, tmp1);
20350 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20351 emit_move_insn (mem, tmp2);
20354 /* Set up operands for a register copy from src to dest, taking care not to
20355 clobber registers in the process.
20356 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
20357 be called with a large N, so that should be OK. */
20360 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20362 unsigned int copied = 0, opctr = 0;
20363 unsigned int done = (1 << count) - 1;
20366 while (copied != done)
20368 for (i = 0; i < count; i++)
20372 for (j = 0; good && j < count; j++)
20373 if (i != j && (copied & (1 << j)) == 0
20374 && reg_overlap_mentioned_p (src[j], dest[i]))
20379 operands[opctr++] = dest[i];
20380 operands[opctr++] = src[i];
20386 gcc_assert (opctr == count * 2);
20389 /* Expand an expression EXP that calls a built-in function,
20390 with result going to TARGET if that's convenient
20391 (and in mode MODE if that's convenient).
20392 SUBTARGET may be used as the target for computing one of EXP's operands.
20393 IGNORE is nonzero if the value is to be ignored. */
20396 arm_expand_builtin (tree exp,
20398 rtx subtarget ATTRIBUTE_UNUSED,
20399 enum machine_mode mode ATTRIBUTE_UNUSED,
20400 int ignore ATTRIBUTE_UNUSED)
20402 const struct builtin_description * d;
20403 enum insn_code icode;
20404 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20412 int fcode = DECL_FUNCTION_CODE (fndecl);
20414 enum machine_mode tmode;
20415 enum machine_mode mode0;
20416 enum machine_mode mode1;
20417 enum machine_mode mode2;
20419 if (fcode >= ARM_BUILTIN_NEON_BASE)
20420 return arm_expand_neon_builtin (fcode, exp, target);
20424 case ARM_BUILTIN_TEXTRMSB:
20425 case ARM_BUILTIN_TEXTRMUB:
20426 case ARM_BUILTIN_TEXTRMSH:
20427 case ARM_BUILTIN_TEXTRMUH:
20428 case ARM_BUILTIN_TEXTRMSW:
20429 case ARM_BUILTIN_TEXTRMUW:
20430 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20431 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20432 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20433 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20434 : CODE_FOR_iwmmxt_textrmw);
20436 arg0 = CALL_EXPR_ARG (exp, 0);
20437 arg1 = CALL_EXPR_ARG (exp, 1);
20438 op0 = expand_normal (arg0);
20439 op1 = expand_normal (arg1);
20440 tmode = insn_data[icode].operand[0].mode;
20441 mode0 = insn_data[icode].operand[1].mode;
20442 mode1 = insn_data[icode].operand[2].mode;
20444 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20445 op0 = copy_to_mode_reg (mode0, op0);
20446 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20448 /* @@@ better error message */
20449 error ("selector must be an immediate");
20450 return gen_reg_rtx (tmode);
20453 || GET_MODE (target) != tmode
20454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20455 target = gen_reg_rtx (tmode);
20456 pat = GEN_FCN (icode) (target, op0, op1);
20462 case ARM_BUILTIN_TINSRB:
20463 case ARM_BUILTIN_TINSRH:
20464 case ARM_BUILTIN_TINSRW:
20465 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20466 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20467 : CODE_FOR_iwmmxt_tinsrw);
20468 arg0 = CALL_EXPR_ARG (exp, 0);
20469 arg1 = CALL_EXPR_ARG (exp, 1);
20470 arg2 = CALL_EXPR_ARG (exp, 2);
20471 op0 = expand_normal (arg0);
20472 op1 = expand_normal (arg1);
20473 op2 = expand_normal (arg2);
20474 tmode = insn_data[icode].operand[0].mode;
20475 mode0 = insn_data[icode].operand[1].mode;
20476 mode1 = insn_data[icode].operand[2].mode;
20477 mode2 = insn_data[icode].operand[3].mode;
20479 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20480 op0 = copy_to_mode_reg (mode0, op0);
20481 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20482 op1 = copy_to_mode_reg (mode1, op1);
20483 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20485 /* @@@ better error message */
20486 error ("selector must be an immediate");
20490 || GET_MODE (target) != tmode
20491 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20492 target = gen_reg_rtx (tmode);
20493 pat = GEN_FCN (icode) (target, op0, op1, op2);
20499 case ARM_BUILTIN_SETWCX:
20500 arg0 = CALL_EXPR_ARG (exp, 0);
20501 arg1 = CALL_EXPR_ARG (exp, 1);
20502 op0 = force_reg (SImode, expand_normal (arg0));
20503 op1 = expand_normal (arg1);
20504 emit_insn (gen_iwmmxt_tmcr (op1, op0));
20507 case ARM_BUILTIN_GETWCX:
20508 arg0 = CALL_EXPR_ARG (exp, 0);
20509 op0 = expand_normal (arg0);
20510 target = gen_reg_rtx (SImode);
20511 emit_insn (gen_iwmmxt_tmrc (target, op0));
20514 case ARM_BUILTIN_WSHUFH:
20515 icode = CODE_FOR_iwmmxt_wshufh;
20516 arg0 = CALL_EXPR_ARG (exp, 0);
20517 arg1 = CALL_EXPR_ARG (exp, 1);
20518 op0 = expand_normal (arg0);
20519 op1 = expand_normal (arg1);
20520 tmode = insn_data[icode].operand[0].mode;
20521 mode1 = insn_data[icode].operand[1].mode;
20522 mode2 = insn_data[icode].operand[2].mode;
20524 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20525 op0 = copy_to_mode_reg (mode1, op0);
20526 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20528 /* @@@ better error message */
20529 error ("mask must be an immediate");
20533 || GET_MODE (target) != tmode
20534 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20535 target = gen_reg_rtx (tmode);
20536 pat = GEN_FCN (icode) (target, op0, op1);
20542 case ARM_BUILTIN_WSADB:
20543 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20544 case ARM_BUILTIN_WSADH:
20545 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20546 case ARM_BUILTIN_WSADBZ:
20547 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20548 case ARM_BUILTIN_WSADHZ:
20549 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20551 /* Several three-argument builtins. */
20552 case ARM_BUILTIN_WMACS:
20553 case ARM_BUILTIN_WMACU:
20554 case ARM_BUILTIN_WALIGN:
20555 case ARM_BUILTIN_TMIA:
20556 case ARM_BUILTIN_TMIAPH:
20557 case ARM_BUILTIN_TMIATT:
20558 case ARM_BUILTIN_TMIATB:
20559 case ARM_BUILTIN_TMIABT:
20560 case ARM_BUILTIN_TMIABB:
20561 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20562 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20563 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20564 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20565 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20566 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20567 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20568 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20569 : CODE_FOR_iwmmxt_walign);
20570 arg0 = CALL_EXPR_ARG (exp, 0);
20571 arg1 = CALL_EXPR_ARG (exp, 1);
20572 arg2 = CALL_EXPR_ARG (exp, 2);
20573 op0 = expand_normal (arg0);
20574 op1 = expand_normal (arg1);
20575 op2 = expand_normal (arg2);
20576 tmode = insn_data[icode].operand[0].mode;
20577 mode0 = insn_data[icode].operand[1].mode;
20578 mode1 = insn_data[icode].operand[2].mode;
20579 mode2 = insn_data[icode].operand[3].mode;
20581 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20582 op0 = copy_to_mode_reg (mode0, op0);
20583 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20584 op1 = copy_to_mode_reg (mode1, op1);
20585 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20586 op2 = copy_to_mode_reg (mode2, op2);
20588 || GET_MODE (target) != tmode
20589 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20590 target = gen_reg_rtx (tmode);
20591 pat = GEN_FCN (icode) (target, op0, op1, op2);
20597 case ARM_BUILTIN_WZERO:
20598 target = gen_reg_rtx (DImode);
20599 emit_insn (gen_iwmmxt_clrdi (target));
20602 case ARM_BUILTIN_THREAD_POINTER:
20603 return arm_load_tp (target);
20609 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20610 if (d->code == (const enum arm_builtins) fcode)
20611 return arm_expand_binop_builtin (d->icode, exp, target);
20613 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20614 if (d->code == (const enum arm_builtins) fcode)
20615 return arm_expand_unop_builtin (d->icode, exp, target, 0);
20617 /* @@@ Should really do something sensible here. */
20621 /* Return the number (counting from 0) of
20622 the least significant set bit in MASK. */
20625 number_of_first_bit_set (unsigned mask)
20627 return ctz_hwi (mask);
20630 /* Like emit_multi_reg_push, but allowing for a different set of
20631 registers to be described as saved. MASK is the set of registers
20632 to be saved; REAL_REGS is the set of registers to be described as
20633 saved. If REAL_REGS is 0, only describe the stack adjustment. */
20636 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
20638 unsigned long regno;
20639 rtx par[10], tmp, reg, insn;
20642 /* Build the parallel of the registers actually being stored. */
20643 for (i = 0; mask; ++i, mask &= mask - 1)
20645 regno = ctz_hwi (mask);
20646 reg = gen_rtx_REG (SImode, regno);
20649 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
20651 tmp = gen_rtx_USE (VOIDmode, reg);
20656 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20657 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20658 tmp = gen_frame_mem (BLKmode, tmp);
20659 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
20662 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
20663 insn = emit_insn (tmp);
20665 /* Always build the stack adjustment note for unwind info. */
20666 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20667 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
20670 /* Build the parallel of the registers recorded as saved for unwind. */
20671 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
20673 regno = ctz_hwi (real_regs);
20674 reg = gen_rtx_REG (SImode, regno);
20676 tmp = plus_constant (stack_pointer_rtx, j * 4);
20677 tmp = gen_frame_mem (SImode, tmp);
20678 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
20679 RTX_FRAME_RELATED_P (tmp) = 1;
20687 RTX_FRAME_RELATED_P (par[0]) = 1;
20688 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
20691 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
20696 /* Emit code to push or pop registers to or from the stack. F is the
20697 assembly file. MASK is the registers to pop. */
20699 thumb_pop (FILE *f, unsigned long mask)
20702 int lo_mask = mask & 0xFF;
20703 int pushed_words = 0;
20707 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
20709 /* Special case. Do not generate a POP PC statement here, do it in
20711 thumb_exit (f, -1);
20715 fprintf (f, "\tpop\t{");
20717 /* Look at the low registers first. */
20718 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20722 asm_fprintf (f, "%r", regno);
20724 if ((lo_mask & ~1) != 0)
20731 if (mask & (1 << PC_REGNUM))
20733 /* Catch popping the PC. */
20734 if (TARGET_INTERWORK || TARGET_BACKTRACE
20735 || crtl->calls_eh_return)
20737 /* The PC is never poped directly, instead
20738 it is popped into r3 and then BX is used. */
20739 fprintf (f, "}\n");
20741 thumb_exit (f, -1);
20750 asm_fprintf (f, "%r", PC_REGNUM);
20754 fprintf (f, "}\n");
20757 /* Generate code to return from a thumb function.
20758 If 'reg_containing_return_addr' is -1, then the return address is
20759 actually on the stack, at the stack pointer. */
20761 thumb_exit (FILE *f, int reg_containing_return_addr)
20763 unsigned regs_available_for_popping;
20764 unsigned regs_to_pop;
20766 unsigned available;
20770 int restore_a4 = FALSE;
20772 /* Compute the registers we need to pop. */
20776 if (reg_containing_return_addr == -1)
20778 regs_to_pop |= 1 << LR_REGNUM;
20782 if (TARGET_BACKTRACE)
20784 /* Restore the (ARM) frame pointer and stack pointer. */
20785 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20789 /* If there is nothing to pop then just emit the BX instruction and
20791 if (pops_needed == 0)
20793 if (crtl->calls_eh_return)
20794 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20796 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20799 /* Otherwise if we are not supporting interworking and we have not created
20800 a backtrace structure and the function was not entered in ARM mode then
20801 just pop the return address straight into the PC. */
20802 else if (!TARGET_INTERWORK
20803 && !TARGET_BACKTRACE
20804 && !is_called_in_ARM_mode (current_function_decl)
20805 && !crtl->calls_eh_return)
20807 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20811 /* Find out how many of the (return) argument registers we can corrupt. */
20812 regs_available_for_popping = 0;
20814 /* If returning via __builtin_eh_return, the bottom three registers
20815 all contain information needed for the return. */
20816 if (crtl->calls_eh_return)
20820 /* If we can deduce the registers used from the function's
20821 return value. This is more reliable that examining
20822 df_regs_ever_live_p () because that will be set if the register is
20823 ever used in the function, not just if the register is used
20824 to hold a return value. */
20826 if (crtl->return_rtx != 0)
20827 mode = GET_MODE (crtl->return_rtx);
20829 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20831 size = GET_MODE_SIZE (mode);
20835 /* In a void function we can use any argument register.
20836 In a function that returns a structure on the stack
20837 we can use the second and third argument registers. */
20838 if (mode == VOIDmode)
20839 regs_available_for_popping =
20840 (1 << ARG_REGISTER (1))
20841 | (1 << ARG_REGISTER (2))
20842 | (1 << ARG_REGISTER (3));
20844 regs_available_for_popping =
20845 (1 << ARG_REGISTER (2))
20846 | (1 << ARG_REGISTER (3));
20848 else if (size <= 4)
20849 regs_available_for_popping =
20850 (1 << ARG_REGISTER (2))
20851 | (1 << ARG_REGISTER (3));
20852 else if (size <= 8)
20853 regs_available_for_popping =
20854 (1 << ARG_REGISTER (3));
20857 /* Match registers to be popped with registers into which we pop them. */
20858 for (available = regs_available_for_popping,
20859 required = regs_to_pop;
20860 required != 0 && available != 0;
20861 available &= ~(available & - available),
20862 required &= ~(required & - required))
20865 /* If we have any popping registers left over, remove them. */
20867 regs_available_for_popping &= ~available;
20869 /* Otherwise if we need another popping register we can use
20870 the fourth argument register. */
20871 else if (pops_needed)
20873 /* If we have not found any free argument registers and
20874 reg a4 contains the return address, we must move it. */
20875 if (regs_available_for_popping == 0
20876 && reg_containing_return_addr == LAST_ARG_REGNUM)
20878 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20879 reg_containing_return_addr = LR_REGNUM;
20881 else if (size > 12)
20883 /* Register a4 is being used to hold part of the return value,
20884 but we have dire need of a free, low register. */
20887 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20890 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20892 /* The fourth argument register is available. */
20893 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20899 /* Pop as many registers as we can. */
20900 thumb_pop (f, regs_available_for_popping);
20902 /* Process the registers we popped. */
20903 if (reg_containing_return_addr == -1)
20905 /* The return address was popped into the lowest numbered register. */
20906 regs_to_pop &= ~(1 << LR_REGNUM);
20908 reg_containing_return_addr =
20909 number_of_first_bit_set (regs_available_for_popping);
20911 /* Remove this register for the mask of available registers, so that
20912 the return address will not be corrupted by further pops. */
20913 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20916 /* If we popped other registers then handle them here. */
20917 if (regs_available_for_popping)
20921 /* Work out which register currently contains the frame pointer. */
20922 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20924 /* Move it into the correct place. */
20925 asm_fprintf (f, "\tmov\t%r, %r\n",
20926 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20928 /* (Temporarily) remove it from the mask of popped registers. */
20929 regs_available_for_popping &= ~(1 << frame_pointer);
20930 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20932 if (regs_available_for_popping)
20936 /* We popped the stack pointer as well,
20937 find the register that contains it. */
20938 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20940 /* Move it into the stack register. */
20941 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20943 /* At this point we have popped all necessary registers, so
20944 do not worry about restoring regs_available_for_popping
20945 to its correct value:
20947 assert (pops_needed == 0)
20948 assert (regs_available_for_popping == (1 << frame_pointer))
20949 assert (regs_to_pop == (1 << STACK_POINTER)) */
20953 /* Since we have just move the popped value into the frame
20954 pointer, the popping register is available for reuse, and
20955 we know that we still have the stack pointer left to pop. */
20956 regs_available_for_popping |= (1 << frame_pointer);
20960 /* If we still have registers left on the stack, but we no longer have
20961 any registers into which we can pop them, then we must move the return
20962 address into the link register and make available the register that
20964 if (regs_available_for_popping == 0 && pops_needed > 0)
20966 regs_available_for_popping |= 1 << reg_containing_return_addr;
20968 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20969 reg_containing_return_addr);
20971 reg_containing_return_addr = LR_REGNUM;
20974 /* If we have registers left on the stack then pop some more.
20975 We know that at most we will want to pop FP and SP. */
20976 if (pops_needed > 0)
20981 thumb_pop (f, regs_available_for_popping);
20983 /* We have popped either FP or SP.
20984 Move whichever one it is into the correct register. */
20985 popped_into = number_of_first_bit_set (regs_available_for_popping);
20986 move_to = number_of_first_bit_set (regs_to_pop);
20988 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20990 regs_to_pop &= ~(1 << move_to);
20995 /* If we still have not popped everything then we must have only
20996 had one register available to us and we are now popping the SP. */
20997 if (pops_needed > 0)
21001 thumb_pop (f, regs_available_for_popping);
21003 popped_into = number_of_first_bit_set (regs_available_for_popping);
21005 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21007 assert (regs_to_pop == (1 << STACK_POINTER))
21008 assert (pops_needed == 1)
21012 /* If necessary restore the a4 register. */
21015 if (reg_containing_return_addr != LR_REGNUM)
21017 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21018 reg_containing_return_addr = LR_REGNUM;
21021 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21024 if (crtl->calls_eh_return)
21025 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21027 /* Return to caller. */
21028 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21031 /* Scan INSN just before assembler is output for it.
21032 For Thumb-1, we track the status of the condition codes; this
21033 information is used in the cbranchsi4_insn pattern. */
21035 thumb1_final_prescan_insn (rtx insn)
21037 if (flag_print_asm_name)
21038 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21039 INSN_ADDRESSES (INSN_UID (insn)));
21040 /* Don't overwrite the previous setter when we get to a cbranch. */
21041 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21043 enum attr_conds conds;
21045 if (cfun->machine->thumb1_cc_insn)
21047 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21048 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21051 conds = get_attr_conds (insn);
21052 if (conds == CONDS_SET)
21054 rtx set = single_set (insn);
21055 cfun->machine->thumb1_cc_insn = insn;
21056 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21057 cfun->machine->thumb1_cc_op1 = const0_rtx;
21058 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21059 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21061 rtx src1 = XEXP (SET_SRC (set), 1);
21062 if (src1 == const0_rtx)
21063 cfun->machine->thumb1_cc_mode = CCmode;
21066 else if (conds != CONDS_NOCOND)
21067 cfun->machine->thumb1_cc_insn = NULL_RTX;
21072 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21074 unsigned HOST_WIDE_INT mask = 0xff;
21077 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21078 if (val == 0) /* XXX */
21081 for (i = 0; i < 25; i++)
21082 if ((val & (mask << i)) == val)
21088 /* Returns nonzero if the current function contains,
21089 or might contain a far jump. */
21091 thumb_far_jump_used_p (void)
21095 /* This test is only important for leaf functions. */
21096 /* assert (!leaf_function_p ()); */
21098 /* If we have already decided that far jumps may be used,
21099 do not bother checking again, and always return true even if
21100 it turns out that they are not being used. Once we have made
21101 the decision that far jumps are present (and that hence the link
21102 register will be pushed onto the stack) we cannot go back on it. */
21103 if (cfun->machine->far_jump_used)
21106 /* If this function is not being called from the prologue/epilogue
21107 generation code then it must be being called from the
21108 INITIAL_ELIMINATION_OFFSET macro. */
21109 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21111 /* In this case we know that we are being asked about the elimination
21112 of the arg pointer register. If that register is not being used,
21113 then there are no arguments on the stack, and we do not have to
21114 worry that a far jump might force the prologue to push the link
21115 register, changing the stack offsets. In this case we can just
21116 return false, since the presence of far jumps in the function will
21117 not affect stack offsets.
21119 If the arg pointer is live (or if it was live, but has now been
21120 eliminated and so set to dead) then we do have to test to see if
21121 the function might contain a far jump. This test can lead to some
21122 false negatives, since before reload is completed, then length of
21123 branch instructions is not known, so gcc defaults to returning their
21124 longest length, which in turn sets the far jump attribute to true.
21126 A false negative will not result in bad code being generated, but it
21127 will result in a needless push and pop of the link register. We
21128 hope that this does not occur too often.
21130 If we need doubleword stack alignment this could affect the other
21131 elimination offsets so we can't risk getting it wrong. */
21132 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21133 cfun->machine->arg_pointer_live = 1;
21134 else if (!cfun->machine->arg_pointer_live)
21138 /* Check to see if the function contains a branch
21139 insn with the far jump attribute set. */
21140 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21142 if (GET_CODE (insn) == JUMP_INSN
21143 /* Ignore tablejump patterns. */
21144 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21145 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21146 && get_attr_far_jump (insn) == FAR_JUMP_YES
21149 /* Record the fact that we have decided that
21150 the function does use far jumps. */
21151 cfun->machine->far_jump_used = 1;
21159 /* Return nonzero if FUNC must be entered in ARM mode. */
21161 is_called_in_ARM_mode (tree func)
21163 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21165 /* Ignore the problem about functions whose address is taken. */
21166 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21170 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21176 /* Given the stack offsets and register mask in OFFSETS, decide how
21177 many additional registers to push instead of subtracting a constant
21178 from SP. For epilogues the principle is the same except we use pop.
21179 FOR_PROLOGUE indicates which we're generating. */
21181 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21183 HOST_WIDE_INT amount;
21184 unsigned long live_regs_mask = offsets->saved_regs_mask;
21185 /* Extract a mask of the ones we can give to the Thumb's push/pop
21187 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21188 /* Then count how many other high registers will need to be pushed. */
21189 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21190 int n_free, reg_base;
21192 if (!for_prologue && frame_pointer_needed)
21193 amount = offsets->locals_base - offsets->saved_regs;
21195 amount = offsets->outgoing_args - offsets->saved_regs;
21197 /* If the stack frame size is 512 exactly, we can save one load
21198 instruction, which should make this a win even when optimizing
21200 if (!optimize_size && amount != 512)
21203 /* Can't do this if there are high registers to push. */
21204 if (high_regs_pushed != 0)
21207 /* Shouldn't do it in the prologue if no registers would normally
21208 be pushed at all. In the epilogue, also allow it if we'll have
21209 a pop insn for the PC. */
21212 || TARGET_BACKTRACE
21213 || (live_regs_mask & 1 << LR_REGNUM) == 0
21214 || TARGET_INTERWORK
21215 || crtl->args.pretend_args_size != 0))
21218 /* Don't do this if thumb_expand_prologue wants to emit instructions
21219 between the push and the stack frame allocation. */
21221 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21222 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21229 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21230 live_regs_mask >>= reg_base;
21233 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21234 && (for_prologue || call_used_regs[reg_base + n_free]))
21236 live_regs_mask >>= 1;
21242 gcc_assert (amount / 4 * 4 == amount);
21244 if (amount >= 512 && (amount - n_free * 4) < 512)
21245 return (amount - 508) / 4;
21246 if (amount <= n_free * 4)
21251 /* The bits which aren't usefully expanded as rtl. */
21253 thumb_unexpanded_epilogue (void)
21255 arm_stack_offsets *offsets;
21257 unsigned long live_regs_mask = 0;
21258 int high_regs_pushed = 0;
21260 int had_to_push_lr;
21263 if (cfun->machine->return_used_this_function != 0)
21266 if (IS_NAKED (arm_current_func_type ()))
21269 offsets = arm_get_frame_offsets ();
21270 live_regs_mask = offsets->saved_regs_mask;
21271 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21273 /* If we can deduce the registers used from the function's return value.
21274 This is more reliable that examining df_regs_ever_live_p () because that
21275 will be set if the register is ever used in the function, not just if
21276 the register is used to hold a return value. */
21277 size = arm_size_return_regs ();
21279 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21282 unsigned long extra_mask = (1 << extra_pop) - 1;
21283 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
21286 /* The prolog may have pushed some high registers to use as
21287 work registers. e.g. the testsuite file:
21288 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21289 compiles to produce:
21290 push {r4, r5, r6, r7, lr}
21294 as part of the prolog. We have to undo that pushing here. */
21296 if (high_regs_pushed)
21298 unsigned long mask = live_regs_mask & 0xff;
21301 /* The available low registers depend on the size of the value we are
21309 /* Oh dear! We have no low registers into which we can pop
21312 ("no low registers available for popping high registers");
21314 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21315 if (live_regs_mask & (1 << next_hi_reg))
21318 while (high_regs_pushed)
21320 /* Find lo register(s) into which the high register(s) can
21322 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21324 if (mask & (1 << regno))
21325 high_regs_pushed--;
21326 if (high_regs_pushed == 0)
21330 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21332 /* Pop the values into the low register(s). */
21333 thumb_pop (asm_out_file, mask);
21335 /* Move the value(s) into the high registers. */
21336 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21338 if (mask & (1 << regno))
21340 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21343 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21344 if (live_regs_mask & (1 << next_hi_reg))
21349 live_regs_mask &= ~0x0f00;
21352 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21353 live_regs_mask &= 0xff;
21355 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21357 /* Pop the return address into the PC. */
21358 if (had_to_push_lr)
21359 live_regs_mask |= 1 << PC_REGNUM;
21361 /* Either no argument registers were pushed or a backtrace
21362 structure was created which includes an adjusted stack
21363 pointer, so just pop everything. */
21364 if (live_regs_mask)
21365 thumb_pop (asm_out_file, live_regs_mask);
21367 /* We have either just popped the return address into the
21368 PC or it is was kept in LR for the entire function.
21369 Note that thumb_pop has already called thumb_exit if the
21370 PC was in the list. */
21371 if (!had_to_push_lr)
21372 thumb_exit (asm_out_file, LR_REGNUM);
21376 /* Pop everything but the return address. */
21377 if (live_regs_mask)
21378 thumb_pop (asm_out_file, live_regs_mask);
21380 if (had_to_push_lr)
21384 /* We have no free low regs, so save one. */
21385 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21389 /* Get the return address into a temporary register. */
21390 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21394 /* Move the return address to lr. */
21395 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21397 /* Restore the low register. */
21398 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21403 regno = LAST_ARG_REGNUM;
21408 /* Remove the argument registers that were pushed onto the stack. */
21409 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21410 SP_REGNUM, SP_REGNUM,
21411 crtl->args.pretend_args_size);
21413 thumb_exit (asm_out_file, regno);
21419 /* Functions to save and restore machine-specific function data. */
21420 static struct machine_function *
21421 arm_init_machine_status (void)
21423 struct machine_function *machine;
21424 machine = ggc_alloc_cleared_machine_function ();
21426 #if ARM_FT_UNKNOWN != 0
21427 machine->func_type = ARM_FT_UNKNOWN;
21432 /* Return an RTX indicating where the return address to the
21433 calling function can be found. */
21435 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21440 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21443 /* Do anything needed before RTL is emitted for each function. */
21445 arm_init_expanders (void)
21447 /* Arrange to initialize and mark the machine per-function status. */
21448 init_machine_status = arm_init_machine_status;
21450 /* This is to stop the combine pass optimizing away the alignment
21451 adjustment of va_arg. */
21452 /* ??? It is claimed that this should not be necessary. */
21454 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21458 /* Like arm_compute_initial_elimination offset. Simpler because there
21459 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21460 to point at the base of the local variables after static stack
21461 space for a function has been allocated. */
21464 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21466 arm_stack_offsets *offsets;
21468 offsets = arm_get_frame_offsets ();
21472 case ARG_POINTER_REGNUM:
21475 case STACK_POINTER_REGNUM:
21476 return offsets->outgoing_args - offsets->saved_args;
21478 case FRAME_POINTER_REGNUM:
21479 return offsets->soft_frame - offsets->saved_args;
21481 case ARM_HARD_FRAME_POINTER_REGNUM:
21482 return offsets->saved_regs - offsets->saved_args;
21484 case THUMB_HARD_FRAME_POINTER_REGNUM:
21485 return offsets->locals_base - offsets->saved_args;
21488 gcc_unreachable ();
21492 case FRAME_POINTER_REGNUM:
21495 case STACK_POINTER_REGNUM:
21496 return offsets->outgoing_args - offsets->soft_frame;
21498 case ARM_HARD_FRAME_POINTER_REGNUM:
21499 return offsets->saved_regs - offsets->soft_frame;
21501 case THUMB_HARD_FRAME_POINTER_REGNUM:
21502 return offsets->locals_base - offsets->soft_frame;
21505 gcc_unreachable ();
21510 gcc_unreachable ();
21514 /* Generate the function's prologue. */
21517 thumb1_expand_prologue (void)
21521 HOST_WIDE_INT amount;
21522 arm_stack_offsets *offsets;
21523 unsigned long func_type;
21525 unsigned long live_regs_mask;
21526 unsigned long l_mask;
21527 unsigned high_regs_pushed = 0;
21529 func_type = arm_current_func_type ();
21531 /* Naked functions don't have prologues. */
21532 if (IS_NAKED (func_type))
21535 if (IS_INTERRUPT (func_type))
21537 error ("interrupt Service Routines cannot be coded in Thumb mode");
21541 if (is_called_in_ARM_mode (current_function_decl))
21542 emit_insn (gen_prologue_thumb1_interwork ());
21544 offsets = arm_get_frame_offsets ();
21545 live_regs_mask = offsets->saved_regs_mask;
21547 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21548 l_mask = live_regs_mask & 0x40ff;
21549 /* Then count how many other high registers will need to be pushed. */
21550 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21552 if (crtl->args.pretend_args_size)
21554 rtx x = GEN_INT (-crtl->args.pretend_args_size);
21556 if (cfun->machine->uses_anonymous_args)
21558 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21559 unsigned long mask;
21561 mask = 1ul << (LAST_ARG_REGNUM + 1);
21562 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
21564 insn = thumb1_emit_multi_reg_push (mask, 0);
21568 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21569 stack_pointer_rtx, x));
21571 RTX_FRAME_RELATED_P (insn) = 1;
21574 if (TARGET_BACKTRACE)
21576 HOST_WIDE_INT offset = 0;
21577 unsigned work_register;
21578 rtx work_reg, x, arm_hfp_rtx;
21580 /* We have been asked to create a stack backtrace structure.
21581 The code looks like this:
21585 0 sub SP, #16 Reserve space for 4 registers.
21586 2 push {R7} Push low registers.
21587 4 add R7, SP, #20 Get the stack pointer before the push.
21588 6 str R7, [SP, #8] Store the stack pointer
21589 (before reserving the space).
21590 8 mov R7, PC Get hold of the start of this code + 12.
21591 10 str R7, [SP, #16] Store it.
21592 12 mov R7, FP Get hold of the current frame pointer.
21593 14 str R7, [SP, #4] Store it.
21594 16 mov R7, LR Get hold of the current return address.
21595 18 str R7, [SP, #12] Store it.
21596 20 add R7, SP, #16 Point at the start of the
21597 backtrace structure.
21598 22 mov FP, R7 Put this value into the frame pointer. */
21600 work_register = thumb_find_work_register (live_regs_mask);
21601 work_reg = gen_rtx_REG (SImode, work_register);
21602 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
21604 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21605 stack_pointer_rtx, GEN_INT (-16)));
21606 RTX_FRAME_RELATED_P (insn) = 1;
21610 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
21611 RTX_FRAME_RELATED_P (insn) = 1;
21613 offset = bit_count (l_mask) * UNITS_PER_WORD;
21616 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
21617 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21619 x = plus_constant (stack_pointer_rtx, offset + 4);
21620 x = gen_frame_mem (SImode, x);
21621 emit_move_insn (x, work_reg);
21623 /* Make sure that the instruction fetching the PC is in the right place
21624 to calculate "start of backtrace creation code + 12". */
21625 /* ??? The stores using the common WORK_REG ought to be enough to
21626 prevent the scheduler from doing anything weird. Failing that
21627 we could always move all of the following into an UNSPEC_VOLATILE. */
21630 x = gen_rtx_REG (SImode, PC_REGNUM);
21631 emit_move_insn (work_reg, x);
21633 x = plus_constant (stack_pointer_rtx, offset + 12);
21634 x = gen_frame_mem (SImode, x);
21635 emit_move_insn (x, work_reg);
21637 emit_move_insn (work_reg, arm_hfp_rtx);
21639 x = plus_constant (stack_pointer_rtx, offset);
21640 x = gen_frame_mem (SImode, x);
21641 emit_move_insn (x, work_reg);
21645 emit_move_insn (work_reg, arm_hfp_rtx);
21647 x = plus_constant (stack_pointer_rtx, offset);
21648 x = gen_frame_mem (SImode, x);
21649 emit_move_insn (x, work_reg);
21651 x = gen_rtx_REG (SImode, PC_REGNUM);
21652 emit_move_insn (work_reg, x);
21654 x = plus_constant (stack_pointer_rtx, offset + 12);
21655 x = gen_frame_mem (SImode, x);
21656 emit_move_insn (x, work_reg);
21659 x = gen_rtx_REG (SImode, LR_REGNUM);
21660 emit_move_insn (work_reg, x);
21662 x = plus_constant (stack_pointer_rtx, offset + 8);
21663 x = gen_frame_mem (SImode, x);
21664 emit_move_insn (x, work_reg);
21666 x = GEN_INT (offset + 12);
21667 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21669 emit_move_insn (arm_hfp_rtx, work_reg);
21671 /* Optimization: If we are not pushing any low registers but we are going
21672 to push some high registers then delay our first push. This will just
21673 be a push of LR and we can combine it with the push of the first high
21675 else if ((l_mask & 0xff) != 0
21676 || (high_regs_pushed == 0 && l_mask))
21678 unsigned long mask = l_mask;
21679 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21680 insn = thumb1_emit_multi_reg_push (mask, mask);
21681 RTX_FRAME_RELATED_P (insn) = 1;
21684 if (high_regs_pushed)
21686 unsigned pushable_regs;
21687 unsigned next_hi_reg;
21689 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21690 if (live_regs_mask & (1 << next_hi_reg))
21693 pushable_regs = l_mask & 0xff;
21695 if (pushable_regs == 0)
21696 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21698 while (high_regs_pushed > 0)
21700 unsigned long real_regs_mask = 0;
21702 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21704 if (pushable_regs & (1 << regno))
21706 emit_move_insn (gen_rtx_REG (SImode, regno),
21707 gen_rtx_REG (SImode, next_hi_reg));
21709 high_regs_pushed --;
21710 real_regs_mask |= (1 << next_hi_reg);
21712 if (high_regs_pushed)
21714 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21716 if (live_regs_mask & (1 << next_hi_reg))
21721 pushable_regs &= ~((1 << regno) - 1);
21727 /* If we had to find a work register and we have not yet
21728 saved the LR then add it to the list of regs to push. */
21729 if (l_mask == (1 << LR_REGNUM))
21731 pushable_regs |= l_mask;
21732 real_regs_mask |= l_mask;
21736 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
21737 RTX_FRAME_RELATED_P (insn) = 1;
21741 /* Load the pic register before setting the frame pointer,
21742 so we can use r7 as a temporary work register. */
21743 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21744 arm_load_pic_register (live_regs_mask);
21746 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21747 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
21748 stack_pointer_rtx);
21750 if (flag_stack_usage_info)
21751 current_function_static_stack_size
21752 = offsets->outgoing_args - offsets->saved_args;
21754 amount = offsets->outgoing_args - offsets->saved_regs;
21755 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
21760 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21761 GEN_INT (- amount)));
21762 RTX_FRAME_RELATED_P (insn) = 1;
21768 /* The stack decrement is too big for an immediate value in a single
21769 insn. In theory we could issue multiple subtracts, but after
21770 three of them it becomes more space efficient to place the full
21771 value in the constant pool and load into a register. (Also the
21772 ARM debugger really likes to see only one stack decrement per
21773 function). So instead we look for a scratch register into which
21774 we can load the decrement, and then we subtract this from the
21775 stack pointer. Unfortunately on the thumb the only available
21776 scratch registers are the argument registers, and we cannot use
21777 these as they may hold arguments to the function. Instead we
21778 attempt to locate a call preserved register which is used by this
21779 function. If we can find one, then we know that it will have
21780 been pushed at the start of the prologue and so we can corrupt
21782 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
21783 if (live_regs_mask & (1 << regno))
21786 gcc_assert(regno <= LAST_LO_REGNUM);
21788 reg = gen_rtx_REG (SImode, regno);
21790 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
21792 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21793 stack_pointer_rtx, reg));
21795 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21796 plus_constant (stack_pointer_rtx,
21798 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21799 RTX_FRAME_RELATED_P (insn) = 1;
21803 if (frame_pointer_needed)
21804 thumb_set_frame_pointer (offsets);
21806 /* If we are profiling, make sure no instructions are scheduled before
21807 the call to mcount. Similarly if the user has requested no
21808 scheduling in the prolog. Similarly if we want non-call exceptions
21809 using the EABI unwinder, to prevent faulting instructions from being
21810 swapped with a stack adjustment. */
21811 if (crtl->profile || !TARGET_SCHED_PROLOG
21812 || (arm_except_unwind_info (&global_options) == UI_TARGET
21813 && cfun->can_throw_non_call_exceptions))
21814 emit_insn (gen_blockage ());
21816 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
21817 if (live_regs_mask & 0xff)
21818 cfun->machine->lr_save_eliminated = 0;
21823 thumb1_expand_epilogue (void)
21825 HOST_WIDE_INT amount;
21826 arm_stack_offsets *offsets;
21829 /* Naked functions don't have prologues. */
21830 if (IS_NAKED (arm_current_func_type ()))
21833 offsets = arm_get_frame_offsets ();
21834 amount = offsets->outgoing_args - offsets->saved_regs;
21836 if (frame_pointer_needed)
21838 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
21839 amount = offsets->locals_base - offsets->saved_regs;
21841 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
21843 gcc_assert (amount >= 0);
21847 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21848 GEN_INT (amount)));
21851 /* r3 is always free in the epilogue. */
21852 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
21854 emit_insn (gen_movsi (reg, GEN_INT (amount)));
21855 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
21859 /* Emit a USE (stack_pointer_rtx), so that
21860 the stack adjustment will not be deleted. */
21861 emit_insn (gen_prologue_use (stack_pointer_rtx));
21863 if (crtl->profile || !TARGET_SCHED_PROLOG)
21864 emit_insn (gen_blockage ());
21866 /* Emit a clobber for each insn that will be restored in the epilogue,
21867 so that flow2 will get register lifetimes correct. */
21868 for (regno = 0; regno < 13; regno++)
21869 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
21870 emit_clobber (gen_rtx_REG (SImode, regno));
21872 if (! df_regs_ever_live_p (LR_REGNUM))
21873 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
21876 /* Implementation of insn prologue_thumb1_interwork. This is the first
21877 "instruction" of a function called in ARM mode. Swap to thumb mode. */
21880 thumb1_output_interwork (void)
21883 FILE *f = asm_out_file;
21885 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21886 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21888 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21890 /* Generate code sequence to switch us into Thumb mode. */
21891 /* The .code 32 directive has already been emitted by
21892 ASM_DECLARE_FUNCTION_NAME. */
21893 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21894 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21896 /* Generate a label, so that the debugger will notice the
21897 change in instruction sets. This label is also used by
21898 the assembler to bypass the ARM code when this function
21899 is called from a Thumb encoded function elsewhere in the
21900 same file. Hence the definition of STUB_NAME here must
21901 agree with the definition in gas/config/tc-arm.c. */
21903 #define STUB_NAME ".real_start_of"
21905 fprintf (f, "\t.code\t16\n");
21907 if (arm_dllexport_name_p (name))
21908 name = arm_strip_name_encoding (name);
21910 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21911 fprintf (f, "\t.thumb_func\n");
21912 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21917 /* Handle the case of a double word load into a low register from
21918 a computed memory address. The computed address may involve a
21919 register which is overwritten by the load. */
21921 thumb_load_double_from_address (rtx *operands)
21929 gcc_assert (GET_CODE (operands[0]) == REG);
21930 gcc_assert (GET_CODE (operands[1]) == MEM);
21932 /* Get the memory address. */
21933 addr = XEXP (operands[1], 0);
21935 /* Work out how the memory address is computed. */
21936 switch (GET_CODE (addr))
21939 operands[2] = adjust_address (operands[1], SImode, 4);
21941 if (REGNO (operands[0]) == REGNO (addr))
21943 output_asm_insn ("ldr\t%H0, %2", operands);
21944 output_asm_insn ("ldr\t%0, %1", operands);
21948 output_asm_insn ("ldr\t%0, %1", operands);
21949 output_asm_insn ("ldr\t%H0, %2", operands);
21954 /* Compute <address> + 4 for the high order load. */
21955 operands[2] = adjust_address (operands[1], SImode, 4);
21957 output_asm_insn ("ldr\t%0, %1", operands);
21958 output_asm_insn ("ldr\t%H0, %2", operands);
21962 arg1 = XEXP (addr, 0);
21963 arg2 = XEXP (addr, 1);
21965 if (CONSTANT_P (arg1))
21966 base = arg2, offset = arg1;
21968 base = arg1, offset = arg2;
21970 gcc_assert (GET_CODE (base) == REG);
21972 /* Catch the case of <address> = <reg> + <reg> */
21973 if (GET_CODE (offset) == REG)
21975 int reg_offset = REGNO (offset);
21976 int reg_base = REGNO (base);
21977 int reg_dest = REGNO (operands[0]);
21979 /* Add the base and offset registers together into the
21980 higher destination register. */
21981 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21982 reg_dest + 1, reg_base, reg_offset);
21984 /* Load the lower destination register from the address in
21985 the higher destination register. */
21986 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21987 reg_dest, reg_dest + 1);
21989 /* Load the higher destination register from its own address
21991 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21992 reg_dest + 1, reg_dest + 1);
21996 /* Compute <address> + 4 for the high order load. */
21997 operands[2] = adjust_address (operands[1], SImode, 4);
21999 /* If the computed address is held in the low order register
22000 then load the high order register first, otherwise always
22001 load the low order register first. */
22002 if (REGNO (operands[0]) == REGNO (base))
22004 output_asm_insn ("ldr\t%H0, %2", operands);
22005 output_asm_insn ("ldr\t%0, %1", operands);
22009 output_asm_insn ("ldr\t%0, %1", operands);
22010 output_asm_insn ("ldr\t%H0, %2", operands);
22016 /* With no registers to worry about we can just load the value
22018 operands[2] = adjust_address (operands[1], SImode, 4);
22020 output_asm_insn ("ldr\t%H0, %2", operands);
22021 output_asm_insn ("ldr\t%0, %1", operands);
22025 gcc_unreachable ();
22032 thumb_output_move_mem_multiple (int n, rtx *operands)
22039 if (REGNO (operands[4]) > REGNO (operands[5]))
22042 operands[4] = operands[5];
22045 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22046 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22050 if (REGNO (operands[4]) > REGNO (operands[5]))
22053 operands[4] = operands[5];
22056 if (REGNO (operands[5]) > REGNO (operands[6]))
22059 operands[5] = operands[6];
22062 if (REGNO (operands[4]) > REGNO (operands[5]))
22065 operands[4] = operands[5];
22069 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22070 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22074 gcc_unreachable ();
22080 /* Output a call-via instruction for thumb state. */
22082 thumb_call_via_reg (rtx reg)
22084 int regno = REGNO (reg);
22087 gcc_assert (regno < LR_REGNUM);
22089 /* If we are in the normal text section we can use a single instance
22090 per compilation unit. If we are doing function sections, then we need
22091 an entry per section, since we can't rely on reachability. */
22092 if (in_section == text_section)
22094 thumb_call_reg_needed = 1;
22096 if (thumb_call_via_label[regno] == NULL)
22097 thumb_call_via_label[regno] = gen_label_rtx ();
22098 labelp = thumb_call_via_label + regno;
22102 if (cfun->machine->call_via[regno] == NULL)
22103 cfun->machine->call_via[regno] = gen_label_rtx ();
22104 labelp = cfun->machine->call_via + regno;
22107 output_asm_insn ("bl\t%a0", labelp);
22111 /* Routines for generating rtl. */
22113 thumb_expand_movmemqi (rtx *operands)
22115 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22116 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22117 HOST_WIDE_INT len = INTVAL (operands[2]);
22118 HOST_WIDE_INT offset = 0;
22122 emit_insn (gen_movmem12b (out, in, out, in));
22128 emit_insn (gen_movmem8b (out, in, out, in));
22134 rtx reg = gen_reg_rtx (SImode);
22135 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22136 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22143 rtx reg = gen_reg_rtx (HImode);
22144 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22145 plus_constant (in, offset))));
22146 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22154 rtx reg = gen_reg_rtx (QImode);
22155 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22156 plus_constant (in, offset))));
22157 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22163 thumb_reload_out_hi (rtx *operands)
22165 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22168 /* Handle reading a half-word from memory during reload. */
22170 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22172 gcc_unreachable ();
22175 /* Return the length of a function name prefix
22176 that starts with the character 'c'. */
22178 arm_get_strip_length (int c)
22182 ARM_NAME_ENCODING_LENGTHS
22187 /* Return a pointer to a function's name with any
22188 and all prefix encodings stripped from it. */
22190 arm_strip_name_encoding (const char *name)
22194 while ((skip = arm_get_strip_length (* name)))
22200 /* If there is a '*' anywhere in the name's prefix, then
22201 emit the stripped name verbatim, otherwise prepend an
22202 underscore if leading underscores are being used. */
22204 arm_asm_output_labelref (FILE *stream, const char *name)
22209 while ((skip = arm_get_strip_length (* name)))
22211 verbatim |= (*name == '*');
22216 fputs (name, stream);
22218 asm_fprintf (stream, "%U%s", name);
22222 arm_file_start (void)
22226 if (TARGET_UNIFIED_ASM)
22227 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22231 const char *fpu_name;
22232 if (arm_selected_arch)
22233 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22234 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22235 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22237 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22239 if (TARGET_SOFT_FLOAT)
22242 fpu_name = "softvfp";
22244 fpu_name = "softfpa";
22248 fpu_name = arm_fpu_desc->name;
22249 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22251 if (TARGET_HARD_FLOAT)
22252 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
22253 if (TARGET_HARD_FLOAT_ABI)
22254 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
22257 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22259 /* Some of these attributes only apply when the corresponding features
22260 are used. However we don't have any easy way of figuring this out.
22261 Conservatively record the setting that would have been used. */
22263 /* Tag_ABI_FP_rounding. */
22264 if (flag_rounding_math)
22265 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
22266 if (!flag_unsafe_math_optimizations)
22268 /* Tag_ABI_FP_denomal. */
22269 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
22270 /* Tag_ABI_FP_exceptions. */
22271 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
22273 /* Tag_ABI_FP_user_exceptions. */
22274 if (flag_signaling_nans)
22275 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
22276 /* Tag_ABI_FP_number_model. */
22277 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
22278 flag_finite_math_only ? 1 : 3);
22280 /* Tag_ABI_align8_needed. */
22281 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
22282 /* Tag_ABI_align8_preserved. */
22283 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
22284 /* Tag_ABI_enum_size. */
22285 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
22286 flag_short_enums ? 1 : 2);
22288 /* Tag_ABI_optimization_goals. */
22291 else if (optimize >= 2)
22297 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
22299 /* Tag_CPU_unaligned_access. */
22300 asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
22303 /* Tag_ABI_FP_16bit_format. */
22304 if (arm_fp16_format)
22305 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
22306 (int)arm_fp16_format);
22308 if (arm_lang_output_object_attributes_hook)
22309 arm_lang_output_object_attributes_hook();
22311 default_file_start();
22315 arm_file_end (void)
22319 if (NEED_INDICATE_EXEC_STACK)
22320 /* Add .note.GNU-stack. */
22321 file_end_indicate_exec_stack ();
22323 if (! thumb_call_reg_needed)
22326 switch_to_section (text_section);
22327 asm_fprintf (asm_out_file, "\t.code 16\n");
22328 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22330 for (regno = 0; regno < LR_REGNUM; regno++)
22332 rtx label = thumb_call_via_label[regno];
22336 targetm.asm_out.internal_label (asm_out_file, "L",
22337 CODE_LABEL_NUMBER (label));
22338 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22344 /* Symbols in the text segment can be accessed without indirecting via the
22345 constant pool; it may take an extra binary operation, but this is still
22346 faster than indirecting via memory. Don't do this when not optimizing,
22347 since we won't be calculating al of the offsets necessary to do this
22351 arm_encode_section_info (tree decl, rtx rtl, int first)
22353 if (optimize > 0 && TREE_CONSTANT (decl))
22354 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22356 default_encode_section_info (decl, rtl, first);
22358 #endif /* !ARM_PE */
22361 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22363 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22364 && !strcmp (prefix, "L"))
22366 arm_ccfsm_state = 0;
22367 arm_target_insn = NULL;
22369 default_internal_label (stream, prefix, labelno);
22372 /* Output code to add DELTA to the first argument, and then jump
22373 to FUNCTION. Used for C++ multiple inheritance. */
22375 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22376 HOST_WIDE_INT delta,
22377 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22380 static int thunk_label = 0;
22383 int mi_delta = delta;
22384 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22386 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22389 mi_delta = - mi_delta;
22393 int labelno = thunk_label++;
22394 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22395 /* Thunks are entered in arm mode when avaiable. */
22396 if (TARGET_THUMB1_ONLY)
22398 /* push r3 so we can use it as a temporary. */
22399 /* TODO: Omit this save if r3 is not used. */
22400 fputs ("\tpush {r3}\n", file);
22401 fputs ("\tldr\tr3, ", file);
22405 fputs ("\tldr\tr12, ", file);
22407 assemble_name (file, label);
22408 fputc ('\n', file);
22411 /* If we are generating PIC, the ldr instruction below loads
22412 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22413 the address of the add + 8, so we have:
22415 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22418 Note that we have "+ 1" because some versions of GNU ld
22419 don't set the low bit of the result for R_ARM_REL32
22420 relocations against thumb function symbols.
22421 On ARMv6M this is +4, not +8. */
22422 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22423 assemble_name (file, labelpc);
22424 fputs (":\n", file);
22425 if (TARGET_THUMB1_ONLY)
22427 /* This is 2 insns after the start of the thunk, so we know it
22428 is 4-byte aligned. */
22429 fputs ("\tadd\tr3, pc, r3\n", file);
22430 fputs ("\tmov r12, r3\n", file);
22433 fputs ("\tadd\tr12, pc, r12\n", file);
22435 else if (TARGET_THUMB1_ONLY)
22436 fputs ("\tmov r12, r3\n", file);
22438 if (TARGET_THUMB1_ONLY)
22440 if (mi_delta > 255)
22442 fputs ("\tldr\tr3, ", file);
22443 assemble_name (file, label);
22444 fputs ("+4\n", file);
22445 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22446 mi_op, this_regno, this_regno);
22448 else if (mi_delta != 0)
22450 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22451 mi_op, this_regno, this_regno,
22457 /* TODO: Use movw/movt for large constants when available. */
22458 while (mi_delta != 0)
22460 if ((mi_delta & (3 << shift)) == 0)
22464 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22465 mi_op, this_regno, this_regno,
22466 mi_delta & (0xff << shift));
22467 mi_delta &= ~(0xff << shift);
22474 if (TARGET_THUMB1_ONLY)
22475 fputs ("\tpop\t{r3}\n", file);
22477 fprintf (file, "\tbx\tr12\n");
22478 ASM_OUTPUT_ALIGN (file, 2);
22479 assemble_name (file, label);
22480 fputs (":\n", file);
22483 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22484 rtx tem = XEXP (DECL_RTL (function), 0);
22485 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22486 tem = gen_rtx_MINUS (GET_MODE (tem),
22488 gen_rtx_SYMBOL_REF (Pmode,
22489 ggc_strdup (labelpc)));
22490 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22493 /* Output ".word .LTHUNKn". */
22494 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
22496 if (TARGET_THUMB1_ONLY && mi_delta > 255)
22497 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
22501 fputs ("\tb\t", file);
22502 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
22503 if (NEED_PLT_RELOC)
22504 fputs ("(PLT)", file);
22505 fputc ('\n', file);
22510 arm_emit_vector_const (FILE *file, rtx x)
22513 const char * pattern;
22515 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22517 switch (GET_MODE (x))
22519 case V2SImode: pattern = "%08x"; break;
22520 case V4HImode: pattern = "%04x"; break;
22521 case V8QImode: pattern = "%02x"; break;
22522 default: gcc_unreachable ();
22525 fprintf (file, "0x");
22526 for (i = CONST_VECTOR_NUNITS (x); i--;)
22530 element = CONST_VECTOR_ELT (x, i);
22531 fprintf (file, pattern, INTVAL (element));
22537 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22538 HFmode constant pool entries are actually loaded with ldr. */
22540 arm_emit_fp16_const (rtx c)
22545 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22546 bits = real_to_target (NULL, &r, HFmode);
22547 if (WORDS_BIG_ENDIAN)
22548 assemble_zeros (2);
22549 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22550 if (!WORDS_BIG_ENDIAN)
22551 assemble_zeros (2);
22555 arm_output_load_gr (rtx *operands)
22562 if (GET_CODE (operands [1]) != MEM
22563 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22564 || GET_CODE (reg = XEXP (sum, 0)) != REG
22565 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22566 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22567 return "wldrw%?\t%0, %1";
22569 /* Fix up an out-of-range load of a GR register. */
22570 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22571 wcgr = operands[0];
22573 output_asm_insn ("ldr%?\t%0, %1", operands);
22575 operands[0] = wcgr;
22577 output_asm_insn ("tmcr%?\t%0, %1", operands);
22578 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
22583 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22585 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22586 named arg and all anonymous args onto the stack.
22587 XXX I know the prologue shouldn't be pushing registers, but it is faster
22591 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
22592 enum machine_mode mode,
22595 int second_time ATTRIBUTE_UNUSED)
22597 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
22600 cfun->machine->uses_anonymous_args = 1;
22601 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
22603 nregs = pcum->aapcs_ncrn;
22604 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
22608 nregs = pcum->nregs;
22610 if (nregs < NUM_ARG_REGS)
22611 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
22614 /* Return nonzero if the CONSUMER instruction (a store) does not need
22615 PRODUCER's value to calculate the address. */
22618 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
22620 rtx value = PATTERN (producer);
22621 rtx addr = PATTERN (consumer);
22623 if (GET_CODE (value) == COND_EXEC)
22624 value = COND_EXEC_CODE (value);
22625 if (GET_CODE (value) == PARALLEL)
22626 value = XVECEXP (value, 0, 0);
22627 value = XEXP (value, 0);
22628 if (GET_CODE (addr) == COND_EXEC)
22629 addr = COND_EXEC_CODE (addr);
22630 if (GET_CODE (addr) == PARALLEL)
22631 addr = XVECEXP (addr, 0, 0);
22632 addr = XEXP (addr, 0);
22634 return !reg_overlap_mentioned_p (value, addr);
22637 /* Return nonzero if the CONSUMER instruction (a store) does need
22638 PRODUCER's value to calculate the address. */
22641 arm_early_store_addr_dep (rtx producer, rtx consumer)
22643 return !arm_no_early_store_addr_dep (producer, consumer);
22646 /* Return nonzero if the CONSUMER instruction (a load) does need
22647 PRODUCER's value to calculate the address. */
22650 arm_early_load_addr_dep (rtx producer, rtx consumer)
22652 rtx value = PATTERN (producer);
22653 rtx addr = PATTERN (consumer);
22655 if (GET_CODE (value) == COND_EXEC)
22656 value = COND_EXEC_CODE (value);
22657 if (GET_CODE (value) == PARALLEL)
22658 value = XVECEXP (value, 0, 0);
22659 value = XEXP (value, 0);
22660 if (GET_CODE (addr) == COND_EXEC)
22661 addr = COND_EXEC_CODE (addr);
22662 if (GET_CODE (addr) == PARALLEL)
22663 addr = XVECEXP (addr, 0, 0);
22664 addr = XEXP (addr, 1);
22666 return reg_overlap_mentioned_p (value, addr);
22669 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22670 have an early register shift value or amount dependency on the
22671 result of PRODUCER. */
22674 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
22676 rtx value = PATTERN (producer);
22677 rtx op = PATTERN (consumer);
22680 if (GET_CODE (value) == COND_EXEC)
22681 value = COND_EXEC_CODE (value);
22682 if (GET_CODE (value) == PARALLEL)
22683 value = XVECEXP (value, 0, 0);
22684 value = XEXP (value, 0);
22685 if (GET_CODE (op) == COND_EXEC)
22686 op = COND_EXEC_CODE (op);
22687 if (GET_CODE (op) == PARALLEL)
22688 op = XVECEXP (op, 0, 0);
22691 early_op = XEXP (op, 0);
22692 /* This is either an actual independent shift, or a shift applied to
22693 the first operand of another operation. We want the whole shift
22695 if (GET_CODE (early_op) == REG)
22698 return !reg_overlap_mentioned_p (value, early_op);
22701 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22702 have an early register shift value dependency on the result of
22706 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22708 rtx value = PATTERN (producer);
22709 rtx op = PATTERN (consumer);
22712 if (GET_CODE (value) == COND_EXEC)
22713 value = COND_EXEC_CODE (value);
22714 if (GET_CODE (value) == PARALLEL)
22715 value = XVECEXP (value, 0, 0);
22716 value = XEXP (value, 0);
22717 if (GET_CODE (op) == COND_EXEC)
22718 op = COND_EXEC_CODE (op);
22719 if (GET_CODE (op) == PARALLEL)
22720 op = XVECEXP (op, 0, 0);
22723 early_op = XEXP (op, 0);
22725 /* This is either an actual independent shift, or a shift applied to
22726 the first operand of another operation. We want the value being
22727 shifted, in either case. */
22728 if (GET_CODE (early_op) != REG)
22729 early_op = XEXP (early_op, 0);
22731 return !reg_overlap_mentioned_p (value, early_op);
22734 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22735 have an early register mult dependency on the result of
22739 arm_no_early_mul_dep (rtx producer, rtx consumer)
22741 rtx value = PATTERN (producer);
22742 rtx op = PATTERN (consumer);
22744 if (GET_CODE (value) == COND_EXEC)
22745 value = COND_EXEC_CODE (value);
22746 if (GET_CODE (value) == PARALLEL)
22747 value = XVECEXP (value, 0, 0);
22748 value = XEXP (value, 0);
22749 if (GET_CODE (op) == COND_EXEC)
22750 op = COND_EXEC_CODE (op);
22751 if (GET_CODE (op) == PARALLEL)
22752 op = XVECEXP (op, 0, 0);
22755 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22757 if (GET_CODE (XEXP (op, 0)) == MULT)
22758 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22760 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22766 /* We can't rely on the caller doing the proper promotion when
22767 using APCS or ATPCS. */
22770 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22772 return !TARGET_AAPCS_BASED;
22775 static enum machine_mode
22776 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22777 enum machine_mode mode,
22778 int *punsignedp ATTRIBUTE_UNUSED,
22779 const_tree fntype ATTRIBUTE_UNUSED,
22780 int for_return ATTRIBUTE_UNUSED)
22782 if (GET_MODE_CLASS (mode) == MODE_INT
22783 && GET_MODE_SIZE (mode) < 4)
22789 /* AAPCS based ABIs use short enums by default. */
22792 arm_default_short_enums (void)
22794 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22798 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22801 arm_align_anon_bitfield (void)
22803 return TARGET_AAPCS_BASED;
22807 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22810 arm_cxx_guard_type (void)
22812 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22815 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22816 has an accumulator dependency on the result of the producer (a
22817 multiplication instruction) and no other dependency on that result. */
22819 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22821 rtx mul = PATTERN (producer);
22822 rtx mac = PATTERN (consumer);
22824 rtx mac_op0, mac_op1, mac_acc;
22826 if (GET_CODE (mul) == COND_EXEC)
22827 mul = COND_EXEC_CODE (mul);
22828 if (GET_CODE (mac) == COND_EXEC)
22829 mac = COND_EXEC_CODE (mac);
22831 /* Check that mul is of the form (set (...) (mult ...))
22832 and mla is of the form (set (...) (plus (mult ...) (...))). */
22833 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22834 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22835 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22838 mul_result = XEXP (mul, 0);
22839 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22840 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22841 mac_acc = XEXP (XEXP (mac, 1), 1);
22843 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22844 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22845 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22849 /* The EABI says test the least significant bit of a guard variable. */
22852 arm_cxx_guard_mask_bit (void)
22854 return TARGET_AAPCS_BASED;
22858 /* The EABI specifies that all array cookies are 8 bytes long. */
22861 arm_get_cookie_size (tree type)
22865 if (!TARGET_AAPCS_BASED)
22866 return default_cxx_get_cookie_size (type);
22868 size = build_int_cst (sizetype, 8);
22873 /* The EABI says that array cookies should also contain the element size. */
22876 arm_cookie_has_size (void)
22878 return TARGET_AAPCS_BASED;
22882 /* The EABI says constructors and destructors should return a pointer to
22883 the object constructed/destroyed. */
22886 arm_cxx_cdtor_returns_this (void)
22888 return TARGET_AAPCS_BASED;
22891 /* The EABI says that an inline function may never be the key
22895 arm_cxx_key_method_may_be_inline (void)
22897 return !TARGET_AAPCS_BASED;
22901 arm_cxx_determine_class_data_visibility (tree decl)
22903 if (!TARGET_AAPCS_BASED
22904 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22907 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22908 is exported. However, on systems without dynamic vague linkage,
22909 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22910 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22911 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22913 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22914 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22918 arm_cxx_class_data_always_comdat (void)
22920 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22921 vague linkage if the class has no key function. */
22922 return !TARGET_AAPCS_BASED;
22926 /* The EABI says __aeabi_atexit should be used to register static
22930 arm_cxx_use_aeabi_atexit (void)
22932 return TARGET_AAPCS_BASED;
22937 arm_set_return_address (rtx source, rtx scratch)
22939 arm_stack_offsets *offsets;
22940 HOST_WIDE_INT delta;
22942 unsigned long saved_regs;
22944 offsets = arm_get_frame_offsets ();
22945 saved_regs = offsets->saved_regs_mask;
22947 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22948 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22951 if (frame_pointer_needed)
22952 addr = plus_constant(hard_frame_pointer_rtx, -4);
22955 /* LR will be the first saved register. */
22956 delta = offsets->outgoing_args - (offsets->frame + 4);
22961 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22962 GEN_INT (delta & ~4095)));
22967 addr = stack_pointer_rtx;
22969 addr = plus_constant (addr, delta);
22971 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22977 thumb_set_return_address (rtx source, rtx scratch)
22979 arm_stack_offsets *offsets;
22980 HOST_WIDE_INT delta;
22981 HOST_WIDE_INT limit;
22984 unsigned long mask;
22988 offsets = arm_get_frame_offsets ();
22989 mask = offsets->saved_regs_mask;
22990 if (mask & (1 << LR_REGNUM))
22993 /* Find the saved regs. */
22994 if (frame_pointer_needed)
22996 delta = offsets->soft_frame - offsets->saved_args;
22997 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23003 delta = offsets->outgoing_args - offsets->saved_args;
23006 /* Allow for the stack frame. */
23007 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23009 /* The link register is always the first saved register. */
23012 /* Construct the address. */
23013 addr = gen_rtx_REG (SImode, reg);
23016 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23017 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23021 addr = plus_constant (addr, delta);
23023 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23026 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23029 /* Implements target hook vector_mode_supported_p. */
23031 arm_vector_mode_supported_p (enum machine_mode mode)
23033 /* Neon also supports V2SImode, etc. listed in the clause below. */
23034 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23035 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23038 if ((TARGET_NEON || TARGET_IWMMXT)
23039 && ((mode == V2SImode)
23040 || (mode == V4HImode)
23041 || (mode == V8QImode)))
23044 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23045 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23046 || mode == V2HAmode))
23052 /* Implements target hook array_mode_supported_p. */
23055 arm_array_mode_supported_p (enum machine_mode mode,
23056 unsigned HOST_WIDE_INT nelems)
23059 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23060 && (nelems >= 2 && nelems <= 4))
23066 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23067 registers when autovectorizing for Neon, at least until multiple vector
23068 widths are supported properly by the middle-end. */
23070 static enum machine_mode
23071 arm_preferred_simd_mode (enum machine_mode mode)
23077 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23079 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23081 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23083 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23085 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23092 if (TARGET_REALLY_IWMMXT)
23108 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23110 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23111 using r0-r4 for function arguments, r7 for the stack frame and don't have
23112 enough left over to do doubleword arithmetic. For Thumb-2 all the
23113 potentially problematic instructions accept high registers so this is not
23114 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23115 that require many low registers. */
23117 arm_class_likely_spilled_p (reg_class_t rclass)
23119 if ((TARGET_THUMB1 && rclass == LO_REGS)
23120 || rclass == CC_REG)
23126 /* Implements target hook small_register_classes_for_mode_p. */
23128 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23130 return TARGET_THUMB1;
23133 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23134 ARM insns and therefore guarantee that the shift count is modulo 256.
23135 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
23136 guarantee no particular behavior for out-of-range counts. */
23138 static unsigned HOST_WIDE_INT
23139 arm_shift_truncation_mask (enum machine_mode mode)
23141 return mode == SImode ? 255 : 0;
23145 /* Map internal gcc register numbers to DWARF2 register numbers. */
23148 arm_dbx_register_number (unsigned int regno)
23153 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23154 compatibility. The EABI defines them as registers 96-103. */
23155 if (IS_FPA_REGNUM (regno))
23156 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23158 if (IS_VFP_REGNUM (regno))
23160 /* See comment in arm_dwarf_register_span. */
23161 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23162 return 64 + regno - FIRST_VFP_REGNUM;
23164 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23167 if (IS_IWMMXT_GR_REGNUM (regno))
23168 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23170 if (IS_IWMMXT_REGNUM (regno))
23171 return 112 + regno - FIRST_IWMMXT_REGNUM;
23173 gcc_unreachable ();
23176 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23177 GCC models tham as 64 32-bit registers, so we need to describe this to
23178 the DWARF generation code. Other registers can use the default. */
23180 arm_dwarf_register_span (rtx rtl)
23187 regno = REGNO (rtl);
23188 if (!IS_VFP_REGNUM (regno))
23191 /* XXX FIXME: The EABI defines two VFP register ranges:
23192 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23194 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23195 corresponding D register. Until GDB supports this, we shall use the
23196 legacy encodings. We also use these encodings for D0-D15 for
23197 compatibility with older debuggers. */
23198 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23201 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23202 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23203 regno = (regno - FIRST_VFP_REGNUM) / 2;
23204 for (i = 0; i < nregs; i++)
23205 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23210 #if ARM_UNWIND_INFO
23211 /* Emit unwind directives for a store-multiple instruction or stack pointer
23212 push during alignment.
23213 These should only ever be generated by the function prologue code, so
23214 expect them to have a particular form. */
23217 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23220 HOST_WIDE_INT offset;
23221 HOST_WIDE_INT nregs;
23227 e = XVECEXP (p, 0, 0);
23228 if (GET_CODE (e) != SET)
23231 /* First insn will adjust the stack pointer. */
23232 if (GET_CODE (e) != SET
23233 || GET_CODE (XEXP (e, 0)) != REG
23234 || REGNO (XEXP (e, 0)) != SP_REGNUM
23235 || GET_CODE (XEXP (e, 1)) != PLUS)
23238 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23239 nregs = XVECLEN (p, 0) - 1;
23241 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23244 /* The function prologue may also push pc, but not annotate it as it is
23245 never restored. We turn this into a stack pointer adjustment. */
23246 if (nregs * 4 == offset - 4)
23248 fprintf (asm_out_file, "\t.pad #4\n");
23252 fprintf (asm_out_file, "\t.save {");
23254 else if (IS_VFP_REGNUM (reg))
23257 fprintf (asm_out_file, "\t.vsave {");
23259 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23261 /* FPA registers are done differently. */
23262 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23266 /* Unknown register type. */
23269 /* If the stack increment doesn't match the size of the saved registers,
23270 something has gone horribly wrong. */
23271 if (offset != nregs * reg_size)
23276 /* The remaining insns will describe the stores. */
23277 for (i = 1; i <= nregs; i++)
23279 /* Expect (set (mem <addr>) (reg)).
23280 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23281 e = XVECEXP (p, 0, i);
23282 if (GET_CODE (e) != SET
23283 || GET_CODE (XEXP (e, 0)) != MEM
23284 || GET_CODE (XEXP (e, 1)) != REG)
23287 reg = REGNO (XEXP (e, 1));
23292 fprintf (asm_out_file, ", ");
23293 /* We can't use %r for vfp because we need to use the
23294 double precision register names. */
23295 if (IS_VFP_REGNUM (reg))
23296 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23298 asm_fprintf (asm_out_file, "%r", reg);
23300 #ifdef ENABLE_CHECKING
23301 /* Check that the addresses are consecutive. */
23302 e = XEXP (XEXP (e, 0), 0);
23303 if (GET_CODE (e) == PLUS)
23305 offset += reg_size;
23306 if (GET_CODE (XEXP (e, 0)) != REG
23307 || REGNO (XEXP (e, 0)) != SP_REGNUM
23308 || GET_CODE (XEXP (e, 1)) != CONST_INT
23309 || offset != INTVAL (XEXP (e, 1)))
23313 || GET_CODE (e) != REG
23314 || REGNO (e) != SP_REGNUM)
23318 fprintf (asm_out_file, "}\n");
23321 /* Emit unwind directives for a SET. */
23324 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23332 switch (GET_CODE (e0))
23335 /* Pushing a single register. */
23336 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23337 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23338 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23341 asm_fprintf (asm_out_file, "\t.save ");
23342 if (IS_VFP_REGNUM (REGNO (e1)))
23343 asm_fprintf(asm_out_file, "{d%d}\n",
23344 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23346 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23350 if (REGNO (e0) == SP_REGNUM)
23352 /* A stack increment. */
23353 if (GET_CODE (e1) != PLUS
23354 || GET_CODE (XEXP (e1, 0)) != REG
23355 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23356 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23359 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23360 -INTVAL (XEXP (e1, 1)));
23362 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23364 HOST_WIDE_INT offset;
23366 if (GET_CODE (e1) == PLUS)
23368 if (GET_CODE (XEXP (e1, 0)) != REG
23369 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23371 reg = REGNO (XEXP (e1, 0));
23372 offset = INTVAL (XEXP (e1, 1));
23373 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23374 HARD_FRAME_POINTER_REGNUM, reg,
23377 else if (GET_CODE (e1) == REG)
23380 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23381 HARD_FRAME_POINTER_REGNUM, reg);
23386 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23388 /* Move from sp to reg. */
23389 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23391 else if (GET_CODE (e1) == PLUS
23392 && GET_CODE (XEXP (e1, 0)) == REG
23393 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23394 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23396 /* Set reg to offset from sp. */
23397 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23398 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23410 /* Emit unwind directives for the given insn. */
23413 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23416 bool handled_one = false;
23418 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23421 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23422 && (TREE_NOTHROW (current_function_decl)
23423 || crtl->all_throwers_are_sibcalls))
23426 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23429 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23431 pat = XEXP (note, 0);
23432 switch (REG_NOTE_KIND (note))
23434 case REG_FRAME_RELATED_EXPR:
23437 case REG_CFA_REGISTER:
23440 pat = PATTERN (insn);
23441 if (GET_CODE (pat) == PARALLEL)
23442 pat = XVECEXP (pat, 0, 0);
23445 /* Only emitted for IS_STACKALIGN re-alignment. */
23450 src = SET_SRC (pat);
23451 dest = SET_DEST (pat);
23453 gcc_assert (src == stack_pointer_rtx);
23454 reg = REGNO (dest);
23455 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23458 handled_one = true;
23461 case REG_CFA_DEF_CFA:
23462 case REG_CFA_EXPRESSION:
23463 case REG_CFA_ADJUST_CFA:
23464 case REG_CFA_OFFSET:
23465 /* ??? Only handling here what we actually emit. */
23466 gcc_unreachable ();
23474 pat = PATTERN (insn);
23477 switch (GET_CODE (pat))
23480 arm_unwind_emit_set (asm_out_file, pat);
23484 /* Store multiple. */
23485 arm_unwind_emit_sequence (asm_out_file, pat);
23494 /* Output a reference from a function exception table to the type_info
23495 object X. The EABI specifies that the symbol should be relocated by
23496 an R_ARM_TARGET2 relocation. */
23499 arm_output_ttype (rtx x)
23501 fputs ("\t.word\t", asm_out_file);
23502 output_addr_const (asm_out_file, x);
23503 /* Use special relocations for symbol references. */
23504 if (GET_CODE (x) != CONST_INT)
23505 fputs ("(TARGET2)", asm_out_file);
23506 fputc ('\n', asm_out_file);
23511 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23514 arm_asm_emit_except_personality (rtx personality)
23516 fputs ("\t.personality\t", asm_out_file);
23517 output_addr_const (asm_out_file, personality);
23518 fputc ('\n', asm_out_file);
23521 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23524 arm_asm_init_sections (void)
23526 exception_section = get_unnamed_section (0, output_section_asm_op,
23529 #endif /* ARM_UNWIND_INFO */
23531 /* Output unwind directives for the start/end of a function. */
23534 arm_output_fn_unwind (FILE * f, bool prologue)
23536 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23540 fputs ("\t.fnstart\n", f);
23543 /* If this function will never be unwound, then mark it as such.
23544 The came condition is used in arm_unwind_emit to suppress
23545 the frame annotations. */
23546 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23547 && (TREE_NOTHROW (current_function_decl)
23548 || crtl->all_throwers_are_sibcalls))
23549 fputs("\t.cantunwind\n", f);
23551 fputs ("\t.fnend\n", f);
23556 arm_emit_tls_decoration (FILE *fp, rtx x)
23558 enum tls_reloc reloc;
23561 val = XVECEXP (x, 0, 0);
23562 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23564 output_addr_const (fp, val);
23569 fputs ("(tlsgd)", fp);
23572 fputs ("(tlsldm)", fp);
23575 fputs ("(tlsldo)", fp);
23578 fputs ("(gottpoff)", fp);
23581 fputs ("(tpoff)", fp);
23584 fputs ("(tlsdesc)", fp);
23587 gcc_unreachable ();
23596 fputs (" + (. - ", fp);
23597 output_addr_const (fp, XVECEXP (x, 0, 2));
23598 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
23599 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
23600 output_addr_const (fp, XVECEXP (x, 0, 3));
23610 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23613 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
23615 gcc_assert (size == 4);
23616 fputs ("\t.word\t", file);
23617 output_addr_const (file, x);
23618 fputs ("(tlsldo)", file);
23621 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23624 arm_output_addr_const_extra (FILE *fp, rtx x)
23626 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
23627 return arm_emit_tls_decoration (fp, x);
23628 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
23631 int labelno = INTVAL (XVECEXP (x, 0, 0));
23633 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
23634 assemble_name_raw (fp, label);
23638 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
23640 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
23644 output_addr_const (fp, XVECEXP (x, 0, 0));
23648 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
23650 output_addr_const (fp, XVECEXP (x, 0, 0));
23654 output_addr_const (fp, XVECEXP (x, 0, 1));
23658 else if (GET_CODE (x) == CONST_VECTOR)
23659 return arm_emit_vector_const (fp, x);
23664 /* Output assembly for a shift instruction.
23665 SET_FLAGS determines how the instruction modifies the condition codes.
23666 0 - Do not set condition codes.
23667 1 - Set condition codes.
23668 2 - Use smallest instruction. */
23670 arm_output_shift(rtx * operands, int set_flags)
23673 static const char flag_chars[3] = {'?', '.', '!'};
23678 c = flag_chars[set_flags];
23679 if (TARGET_UNIFIED_ASM)
23681 shift = shift_op(operands[3], &val);
23685 operands[2] = GEN_INT(val);
23686 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23689 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23692 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23693 output_asm_insn (pattern, operands);
23697 /* Output a Thumb-1 casesi dispatch sequence. */
23699 thumb1_output_casesi (rtx *operands)
23701 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23703 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23705 switch (GET_MODE(diff_vec))
23708 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23709 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23711 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23712 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23714 return "bl\t%___gnu_thumb1_case_si";
23716 gcc_unreachable ();
23720 /* Output a Thumb-2 casesi instruction. */
23722 thumb2_output_casesi (rtx *operands)
23724 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23726 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23728 output_asm_insn ("cmp\t%0, %1", operands);
23729 output_asm_insn ("bhi\t%l3", operands);
23730 switch (GET_MODE(diff_vec))
23733 return "tbb\t[%|pc, %0]";
23735 return "tbh\t[%|pc, %0, lsl #1]";
23739 output_asm_insn ("adr\t%4, %l2", operands);
23740 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23741 output_asm_insn ("add\t%4, %4, %5", operands);
23746 output_asm_insn ("adr\t%4, %l2", operands);
23747 return "ldr\t%|pc, [%4, %0, lsl #2]";
23750 gcc_unreachable ();
23754 /* Most ARM cores are single issue, but some newer ones can dual issue.
23755 The scheduler descriptions rely on this being correct. */
23757 arm_issue_rate (void)
23776 /* A table and a function to perform ARM-specific name mangling for
23777 NEON vector types in order to conform to the AAPCS (see "Procedure
23778 Call Standard for the ARM Architecture", Appendix A). To qualify
23779 for emission with the mangled names defined in that document, a
23780 vector type must not only be of the correct mode but also be
23781 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23784 enum machine_mode mode;
23785 const char *element_type_name;
23786 const char *aapcs_name;
23787 } arm_mangle_map_entry;
23789 static arm_mangle_map_entry arm_mangle_map[] = {
23790 /* 64-bit containerized types. */
23791 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23792 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23793 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23794 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23795 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23796 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23797 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23798 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23799 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23800 /* 128-bit containerized types. */
23801 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23802 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23803 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23804 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23805 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23806 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23807 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23808 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23809 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23810 { VOIDmode, NULL, NULL }
23814 arm_mangle_type (const_tree type)
23816 arm_mangle_map_entry *pos = arm_mangle_map;
23818 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23819 has to be managled as if it is in the "std" namespace. */
23820 if (TARGET_AAPCS_BASED
23821 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23823 static bool warned;
23824 if (!warned && warn_psabi && !in_system_header)
23827 inform (input_location,
23828 "the mangling of %<va_list%> has changed in GCC 4.4");
23830 return "St9__va_list";
23833 /* Half-precision float. */
23834 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23837 if (TREE_CODE (type) != VECTOR_TYPE)
23840 /* Check the mode of the vector type, and the name of the vector
23841 element type, against the table. */
23842 while (pos->mode != VOIDmode)
23844 tree elt_type = TREE_TYPE (type);
23846 if (pos->mode == TYPE_MODE (type)
23847 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23848 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23849 pos->element_type_name))
23850 return pos->aapcs_name;
23855 /* Use the default mangling for unrecognized (possibly user-defined)
23860 /* Order of allocation of core registers for Thumb: this allocation is
23861 written over the corresponding initial entries of the array
23862 initialized with REG_ALLOC_ORDER. We allocate all low registers
23863 first. Saving and restoring a low register is usually cheaper than
23864 using a call-clobbered high register. */
23866 static const int thumb_core_reg_alloc_order[] =
23868 3, 2, 1, 0, 4, 5, 6, 7,
23869 14, 12, 8, 9, 10, 11, 13, 15
23872 /* Adjust register allocation order when compiling for Thumb. */
23875 arm_order_regs_for_local_alloc (void)
23877 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23878 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23880 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23881 sizeof (thumb_core_reg_alloc_order));
23884 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23887 arm_frame_pointer_required (void)
23889 return (cfun->has_nonlocal_label
23890 || SUBTARGET_FRAME_POINTER_REQUIRED
23891 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23894 /* Only thumb1 can't support conditional execution, so return true if
23895 the target is not thumb1. */
23897 arm_have_conditional_execution (void)
23899 return !TARGET_THUMB1;
23902 /* Legitimize a memory reference for sync primitive implemented using
23903 ldrex / strex. We currently force the form of the reference to be
23904 indirect without offset. We do not yet support the indirect offset
23905 addressing supported by some ARM targets for these
23908 arm_legitimize_sync_memory (rtx memory)
23910 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23911 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23913 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23914 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23915 return legitimate_memory;
23918 /* An instruction emitter. */
23919 typedef void (* emit_f) (int label, const char *, rtx *);
23921 /* An instruction emitter that emits via the conventional
23922 output_asm_insn. */
23924 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23926 output_asm_insn (pattern, operands);
23929 /* Count the number of emitted synchronization instructions. */
23930 static unsigned arm_insn_count;
23932 /* An emitter that counts emitted instructions but does not actually
23933 emit instruction into the instruction stream. */
23935 arm_count (int label,
23936 const char *pattern ATTRIBUTE_UNUSED,
23937 rtx *operands ATTRIBUTE_UNUSED)
23943 /* Construct a pattern using conventional output formatting and feed
23944 it to output_asm_insn. Provides a mechanism to construct the
23945 output pattern on the fly. Note the hard limit on the pattern
23947 static void ATTRIBUTE_PRINTF_4
23948 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23949 const char *pattern, ...)
23954 va_start (ap, pattern);
23955 vsprintf (buffer, pattern, ap);
23957 emit (label, buffer, operands);
23960 /* Emit the memory barrier instruction, if any, provided by this
23961 target to a specified emitter. */
23963 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23965 if (TARGET_HAVE_DMB)
23967 /* Note we issue a system level barrier. We should consider
23968 issuing a inner shareabilty zone barrier here instead, ie.
23970 emit (0, "dmb\tsy", operands);
23974 if (TARGET_HAVE_DMB_MCR)
23976 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23980 gcc_unreachable ();
23983 /* Emit the memory barrier instruction, if any, provided by this
23986 arm_output_memory_barrier (rtx *operands)
23988 arm_process_output_memory_barrier (arm_emit, operands);
23992 /* Helper to figure out the instruction suffix required on ldrex/strex
23993 for operations on an object of the specified mode. */
23994 static const char *
23995 arm_ldrex_suffix (enum machine_mode mode)
23999 case QImode: return "b";
24000 case HImode: return "h";
24001 case SImode: return "";
24002 case DImode: return "d";
24004 gcc_unreachable ();
24009 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
24012 arm_output_ldrex (emit_f emit,
24013 enum machine_mode mode,
24017 const char *suffix = arm_ldrex_suffix (mode);
24020 operands[0] = target;
24021 operands[1] = memory;
24022 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
24025 /* Emit a strex{b,h,d, } instruction appropriate for the specified
24028 arm_output_strex (emit_f emit,
24029 enum machine_mode mode,
24035 const char *suffix = arm_ldrex_suffix (mode);
24038 operands[0] = result;
24039 operands[1] = value;
24040 operands[2] = memory;
24041 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
24045 /* Helper to emit a two operand instruction. */
24047 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
24053 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
24056 /* Helper to emit a three operand instruction. */
24058 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
24065 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
24068 /* Emit a load store exclusive synchronization loop.
24072 if old_value != required_value
24074 t1 = sync_op (old_value, new_value)
24075 [mem] = t1, t2 = [0|1]
24079 t1 == t2 is not permitted
24080 t1 == old_value is permitted
24084 RTX register or const_int representing the required old_value for
24085 the modify to continue, if NULL no comparsion is performed. */
24087 arm_output_sync_loop (emit_f emit,
24088 enum machine_mode mode,
24091 rtx required_value,
24095 enum attr_sync_op sync_op,
24096 int early_barrier_required)
24100 gcc_assert (t1 != t2);
24102 if (early_barrier_required)
24103 arm_process_output_memory_barrier (emit, NULL);
24105 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
24107 arm_output_ldrex (emit, mode, old_value, memory);
24109 if (required_value)
24113 operands[0] = old_value;
24114 operands[1] = required_value;
24115 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
24116 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
24122 arm_output_op3 (emit, "add", t1, old_value, new_value);
24126 arm_output_op3 (emit, "sub", t1, old_value, new_value);
24130 arm_output_op3 (emit, "orr", t1, old_value, new_value);
24134 arm_output_op3 (emit, "eor", t1, old_value, new_value);
24138 arm_output_op3 (emit,"and", t1, old_value, new_value);
24142 arm_output_op3 (emit, "and", t1, old_value, new_value);
24143 arm_output_op2 (emit, "mvn", t1, t1);
24153 arm_output_strex (emit, mode, "", t2, t1, memory);
24155 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24156 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24157 LOCAL_LABEL_PREFIX);
24161 /* Use old_value for the return value because for some operations
24162 the old_value can easily be restored. This saves one register. */
24163 arm_output_strex (emit, mode, "", old_value, t1, memory);
24164 operands[0] = old_value;
24165 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24166 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24167 LOCAL_LABEL_PREFIX);
24172 arm_output_op3 (emit, "sub", old_value, t1, new_value);
24176 arm_output_op3 (emit, "add", old_value, t1, new_value);
24180 arm_output_op3 (emit, "eor", old_value, t1, new_value);
24184 arm_output_op2 (emit, "mov", old_value, required_value);
24188 gcc_unreachable ();
24192 arm_process_output_memory_barrier (emit, NULL);
24193 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
24197 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
24200 default_value = operands[index - 1];
24202 return default_value;
24205 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
24206 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
24208 /* Extract the operands for a synchroniztion instruction from the
24209 instructions attributes and emit the instruction. */
24211 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
24213 rtx result, memory, required_value, new_value, t1, t2;
24215 enum machine_mode mode;
24216 enum attr_sync_op sync_op;
24218 result = FETCH_SYNC_OPERAND(result, 0);
24219 memory = FETCH_SYNC_OPERAND(memory, 0);
24220 required_value = FETCH_SYNC_OPERAND(required_value, 0);
24221 new_value = FETCH_SYNC_OPERAND(new_value, 0);
24222 t1 = FETCH_SYNC_OPERAND(t1, 0);
24223 t2 = FETCH_SYNC_OPERAND(t2, 0);
24225 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
24226 sync_op = get_attr_sync_op (insn);
24227 mode = GET_MODE (memory);
24229 arm_output_sync_loop (emit, mode, result, memory, required_value,
24230 new_value, t1, t2, sync_op, early_barrier);
24233 /* Emit a synchronization instruction loop. */
24235 arm_output_sync_insn (rtx insn, rtx *operands)
24237 arm_process_output_sync_insn (arm_emit, insn, operands);
24241 /* Count the number of machine instruction that will be emitted for a
24242 synchronization instruction. Note that the emitter used does not
24243 emit instructions, it just counts instructions being carefull not
24244 to count labels. */
24246 arm_sync_loop_insns (rtx insn, rtx *operands)
24248 arm_insn_count = 0;
24249 arm_process_output_sync_insn (arm_count, insn, operands);
24250 return arm_insn_count;
24253 /* Helper to call a target sync instruction generator, dealing with
24254 the variation in operands required by the different generators. */
24256 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
24257 rtx memory, rtx required_value, rtx new_value)
24259 switch (generator->op)
24261 case arm_sync_generator_omn:
24262 gcc_assert (! required_value);
24263 return generator->u.omn (old_value, memory, new_value);
24265 case arm_sync_generator_omrn:
24266 gcc_assert (required_value);
24267 return generator->u.omrn (old_value, memory, required_value, new_value);
24273 /* Expand a synchronization loop. The synchronization loop is expanded
24274 as an opaque block of instructions in order to ensure that we do
24275 not subsequently get extraneous memory accesses inserted within the
24276 critical region. The exclusive access property of ldrex/strex is
24277 only guaranteed in there are no intervening memory accesses. */
24279 arm_expand_sync (enum machine_mode mode,
24280 struct arm_sync_generator *generator,
24281 rtx target, rtx memory, rtx required_value, rtx new_value)
24283 if (target == NULL)
24284 target = gen_reg_rtx (mode);
24286 memory = arm_legitimize_sync_memory (memory);
24287 if (mode != SImode)
24289 rtx load_temp = gen_reg_rtx (SImode);
24291 if (required_value)
24292 required_value = convert_modes (SImode, mode, required_value, true);
24294 new_value = convert_modes (SImode, mode, new_value, true);
24295 emit_insn (arm_call_generator (generator, load_temp, memory,
24296 required_value, new_value));
24297 emit_move_insn (target, gen_lowpart (mode, load_temp));
24301 emit_insn (arm_call_generator (generator, target, memory, required_value,
24306 static unsigned int
24307 arm_autovectorize_vector_sizes (void)
24309 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24313 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24315 /* Vectors which aren't in packed structures will not be less aligned than
24316 the natural alignment of their element type, so this is safe. */
24317 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24320 return default_builtin_vector_alignment_reachable (type, is_packed);
24324 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24325 const_tree type, int misalignment,
24328 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24330 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24335 /* If the misalignment is unknown, we should be able to handle the access
24336 so long as it is not to a member of a packed data structure. */
24337 if (misalignment == -1)
24340 /* Return true if the misalignment is a multiple of the natural alignment
24341 of the vector's element type. This is probably always going to be
24342 true in practice, since we've already established that this isn't a
24344 return ((misalignment % align) == 0);
24347 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24352 arm_conditional_register_usage (void)
24356 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24358 for (regno = FIRST_FPA_REGNUM;
24359 regno <= LAST_FPA_REGNUM; ++regno)
24360 fixed_regs[regno] = call_used_regs[regno] = 1;
24363 if (TARGET_THUMB1 && optimize_size)
24365 /* When optimizing for size on Thumb-1, it's better not
24366 to use the HI regs, because of the overhead of
24368 for (regno = FIRST_HI_REGNUM;
24369 regno <= LAST_HI_REGNUM; ++regno)
24370 fixed_regs[regno] = call_used_regs[regno] = 1;
24373 /* The link register can be clobbered by any branch insn,
24374 but we have no way to track that at present, so mark
24375 it as unavailable. */
24377 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24379 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24381 if (TARGET_MAVERICK)
24383 for (regno = FIRST_FPA_REGNUM;
24384 regno <= LAST_FPA_REGNUM; ++ regno)
24385 fixed_regs[regno] = call_used_regs[regno] = 1;
24386 for (regno = FIRST_CIRRUS_FP_REGNUM;
24387 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24389 fixed_regs[regno] = 0;
24390 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24395 /* VFPv3 registers are disabled when earlier VFP
24396 versions are selected due to the definition of
24397 LAST_VFP_REGNUM. */
24398 for (regno = FIRST_VFP_REGNUM;
24399 regno <= LAST_VFP_REGNUM; ++ regno)
24401 fixed_regs[regno] = 0;
24402 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24403 || regno >= FIRST_VFP_REGNUM + 32;
24408 if (TARGET_REALLY_IWMMXT)
24410 regno = FIRST_IWMMXT_GR_REGNUM;
24411 /* The 2002/10/09 revision of the XScale ABI has wCG0
24412 and wCG1 as call-preserved registers. The 2002/11/21
24413 revision changed this so that all wCG registers are
24414 scratch registers. */
24415 for (regno = FIRST_IWMMXT_GR_REGNUM;
24416 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24417 fixed_regs[regno] = 0;
24418 /* The XScale ABI has wR0 - wR9 as scratch registers,
24419 the rest as call-preserved registers. */
24420 for (regno = FIRST_IWMMXT_REGNUM;
24421 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24423 fixed_regs[regno] = 0;
24424 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24428 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24430 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24431 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24433 else if (TARGET_APCS_STACK)
24435 fixed_regs[10] = 1;
24436 call_used_regs[10] = 1;
24438 /* -mcaller-super-interworking reserves r11 for calls to
24439 _interwork_r11_call_via_rN(). Making the register global
24440 is an easy way of ensuring that it remains valid for all
24442 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24443 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24445 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24446 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24447 if (TARGET_CALLER_INTERWORKING)
24448 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24450 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24454 arm_preferred_rename_class (reg_class_t rclass)
24456 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24457 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24458 and code size can be reduced. */
24459 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24465 /* Compute the atrribute "length" of insn "*push_multi".
24466 So this function MUST be kept in sync with that insn pattern. */
24468 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24470 int i, regno, hi_reg;
24471 int num_saves = XVECLEN (parallel_op, 0);
24481 regno = REGNO (first_op);
24482 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24483 for (i = 1; i < num_saves && !hi_reg; i++)
24485 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24486 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24494 /* Compute the number of instructions emitted by output_move_double. */
24496 arm_count_output_move_double_insns (rtx *operands)
24499 output_move_double (operands, false, &count);
24503 #include "gt-arm.h"