1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
171 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
173 static int aapcs_select_return_coproc (const_tree, const_tree);
175 #ifdef OBJECT_FORMAT_ELF
176 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
177 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
180 static void arm_encode_section_info (tree, rtx, int);
183 static void arm_file_end (void);
184 static void arm_file_start (void);
186 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
188 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
189 enum machine_mode, const_tree, bool);
190 static bool arm_promote_prototypes (const_tree);
191 static bool arm_default_short_enums (void);
192 static bool arm_align_anon_bitfield (void);
193 static bool arm_return_in_msb (const_tree);
194 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
195 static bool arm_return_in_memory (const_tree, const_tree);
196 #ifdef TARGET_UNWIND_INFO
197 static void arm_unwind_emit (FILE *, rtx);
198 static bool arm_output_ttype (rtx);
199 static void arm_asm_emit_except_personality (rtx);
200 static void arm_asm_init_sections (void);
202 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
203 static rtx arm_dwarf_register_span (rtx);
205 static tree arm_cxx_guard_type (void);
206 static bool arm_cxx_guard_mask_bit (void);
207 static tree arm_get_cookie_size (tree);
208 static bool arm_cookie_has_size (void);
209 static bool arm_cxx_cdtor_returns_this (void);
210 static bool arm_cxx_key_method_may_be_inline (void);
211 static void arm_cxx_determine_class_data_visibility (tree);
212 static bool arm_cxx_class_data_always_comdat (void);
213 static bool arm_cxx_use_aeabi_atexit (void);
214 static void arm_init_libfuncs (void);
215 static tree arm_build_builtin_va_list (void);
216 static void arm_expand_builtin_va_start (tree, rtx);
217 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
218 static bool arm_handle_option (size_t, const char *, int);
219 static void arm_target_help (void);
220 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
221 static bool arm_cannot_copy_insn_p (rtx);
222 static bool arm_tls_symbol_p (rtx x);
223 static int arm_issue_rate (void);
224 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
225 static bool arm_allocate_stack_slots_for_args (void);
226 static const char *arm_invalid_parameter_type (const_tree t);
227 static const char *arm_invalid_return_type (const_tree t);
228 static tree arm_promoted_type (const_tree t);
229 static tree arm_convert_to_type (tree type, tree expr);
230 static bool arm_scalar_mode_supported_p (enum machine_mode);
231 static bool arm_frame_pointer_required (void);
232 static bool arm_can_eliminate (const int, const int);
233 static void arm_asm_trampoline_template (FILE *);
234 static void arm_trampoline_init (rtx, tree, rtx);
235 static rtx arm_trampoline_adjust_address (rtx);
236 static rtx arm_pic_static_addr (rtx orig, rtx reg);
237 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
238 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
239 static unsigned int arm_units_per_simd_word (enum machine_mode);
242 /* Table of machine attributes. */
243 static const struct attribute_spec arm_attribute_table[] =
245 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
246 /* Function calls made to this symbol must be done indirectly, because
247 it may lie outside of the 26 bit addressing range of a normal function
249 { "long_call", 0, 0, false, true, true, NULL },
250 /* Whereas these functions are always known to reside within the 26 bit
252 { "short_call", 0, 0, false, true, true, NULL },
253 /* Specify the procedure call conventions for a function. */
254 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
255 /* Interrupt Service Routines have special prologue and epilogue requirements. */
256 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
257 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
258 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
260 /* ARM/PE has three new attributes:
262 dllexport - for exporting a function/variable that will live in a dll
263 dllimport - for importing a function/variable from a dll
265 Microsoft allows multiple declspecs in one __declspec, separating
266 them with spaces. We do NOT support this. Instead, use __declspec
269 { "dllimport", 0, 0, true, false, false, NULL },
270 { "dllexport", 0, 0, true, false, false, NULL },
271 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
272 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
273 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
274 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
275 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
277 { NULL, 0, 0, false, false, false, NULL }
280 /* Initialize the GCC target structure. */
281 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
282 #undef TARGET_MERGE_DECL_ATTRIBUTES
283 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
286 #undef TARGET_LEGITIMIZE_ADDRESS
287 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
292 #undef TARGET_ASM_FILE_START
293 #define TARGET_ASM_FILE_START arm_file_start
294 #undef TARGET_ASM_FILE_END
295 #define TARGET_ASM_FILE_END arm_file_end
297 #undef TARGET_ASM_ALIGNED_SI_OP
298 #define TARGET_ASM_ALIGNED_SI_OP NULL
299 #undef TARGET_ASM_INTEGER
300 #define TARGET_ASM_INTEGER arm_assemble_integer
302 #undef TARGET_PRINT_OPERAND
303 #define TARGET_PRINT_OPERAND arm_print_operand
304 #undef TARGET_PRINT_OPERAND_ADDRESS
305 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
306 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
307 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
309 #undef TARGET_ASM_FUNCTION_PROLOGUE
310 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
312 #undef TARGET_ASM_FUNCTION_EPILOGUE
313 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
315 #undef TARGET_DEFAULT_TARGET_FLAGS
316 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
317 #undef TARGET_HANDLE_OPTION
318 #define TARGET_HANDLE_OPTION arm_handle_option
320 #define TARGET_HELP arm_target_help
322 #undef TARGET_COMP_TYPE_ATTRIBUTES
323 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
325 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
326 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
328 #undef TARGET_SCHED_ADJUST_COST
329 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
331 #undef TARGET_ENCODE_SECTION_INFO
333 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
335 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
338 #undef TARGET_STRIP_NAME_ENCODING
339 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
341 #undef TARGET_ASM_INTERNAL_LABEL
342 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
344 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
345 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
347 #undef TARGET_FUNCTION_VALUE
348 #define TARGET_FUNCTION_VALUE arm_function_value
350 #undef TARGET_LIBCALL_VALUE
351 #define TARGET_LIBCALL_VALUE arm_libcall_value
353 #undef TARGET_ASM_OUTPUT_MI_THUNK
354 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
355 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
356 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
358 #undef TARGET_RTX_COSTS
359 #define TARGET_RTX_COSTS arm_rtx_costs
360 #undef TARGET_ADDRESS_COST
361 #define TARGET_ADDRESS_COST arm_address_cost
363 #undef TARGET_SHIFT_TRUNCATION_MASK
364 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
365 #undef TARGET_VECTOR_MODE_SUPPORTED_P
366 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
367 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
368 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD arm_units_per_simd_word
370 #undef TARGET_MACHINE_DEPENDENT_REORG
371 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
373 #undef TARGET_INIT_BUILTINS
374 #define TARGET_INIT_BUILTINS arm_init_builtins
375 #undef TARGET_EXPAND_BUILTIN
376 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
378 #undef TARGET_INIT_LIBFUNCS
379 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
381 #undef TARGET_PROMOTE_FUNCTION_MODE
382 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
383 #undef TARGET_PROMOTE_PROTOTYPES
384 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
385 #undef TARGET_PASS_BY_REFERENCE
386 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
387 #undef TARGET_ARG_PARTIAL_BYTES
388 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
389 #undef TARGET_FUNCTION_ARG
390 #define TARGET_FUNCTION_ARG arm_function_arg
391 #undef TARGET_FUNCTION_ARG_ADVANCE
392 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
394 #undef TARGET_SETUP_INCOMING_VARARGS
395 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
397 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
398 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
400 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
401 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
402 #undef TARGET_TRAMPOLINE_INIT
403 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
404 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
405 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
407 #undef TARGET_DEFAULT_SHORT_ENUMS
408 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
410 #undef TARGET_ALIGN_ANON_BITFIELD
411 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
413 #undef TARGET_NARROW_VOLATILE_BITFIELD
414 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
416 #undef TARGET_CXX_GUARD_TYPE
417 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
419 #undef TARGET_CXX_GUARD_MASK_BIT
420 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
422 #undef TARGET_CXX_GET_COOKIE_SIZE
423 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
425 #undef TARGET_CXX_COOKIE_HAS_SIZE
426 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
428 #undef TARGET_CXX_CDTOR_RETURNS_THIS
429 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
431 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
432 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
434 #undef TARGET_CXX_USE_AEABI_ATEXIT
435 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
437 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
438 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
439 arm_cxx_determine_class_data_visibility
441 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
442 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
444 #undef TARGET_RETURN_IN_MSB
445 #define TARGET_RETURN_IN_MSB arm_return_in_msb
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
450 #undef TARGET_MUST_PASS_IN_STACK
451 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
453 #ifdef TARGET_UNWIND_INFO
454 #undef TARGET_ASM_UNWIND_EMIT
455 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
457 /* EABI unwinding tables use a different format for the typeinfo tables. */
458 #undef TARGET_ASM_TTYPE
459 #define TARGET_ASM_TTYPE arm_output_ttype
461 #undef TARGET_ARM_EABI_UNWINDER
462 #define TARGET_ARM_EABI_UNWINDER true
464 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
465 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
467 #undef TARGET_ASM_INIT_SECTIONS
468 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
469 #endif /* TARGET_UNWIND_INFO */
471 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
472 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
474 #undef TARGET_DWARF_REGISTER_SPAN
475 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
477 #undef TARGET_CANNOT_COPY_INSN_P
478 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
481 #undef TARGET_HAVE_TLS
482 #define TARGET_HAVE_TLS true
485 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
486 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
488 #undef TARGET_CANNOT_FORCE_CONST_MEM
489 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
491 #undef TARGET_MAX_ANCHOR_OFFSET
492 #define TARGET_MAX_ANCHOR_OFFSET 4095
494 /* The minimum is set such that the total size of the block
495 for a particular anchor is -4088 + 1 + 4095 bytes, which is
496 divisible by eight, ensuring natural spacing of anchors. */
497 #undef TARGET_MIN_ANCHOR_OFFSET
498 #define TARGET_MIN_ANCHOR_OFFSET -4088
500 #undef TARGET_SCHED_ISSUE_RATE
501 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
503 #undef TARGET_MANGLE_TYPE
504 #define TARGET_MANGLE_TYPE arm_mangle_type
506 #undef TARGET_BUILD_BUILTIN_VA_LIST
507 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
508 #undef TARGET_EXPAND_BUILTIN_VA_START
509 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
510 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
511 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
514 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
515 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
518 #undef TARGET_LEGITIMATE_ADDRESS_P
519 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
521 #undef TARGET_INVALID_PARAMETER_TYPE
522 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
524 #undef TARGET_INVALID_RETURN_TYPE
525 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
527 #undef TARGET_PROMOTED_TYPE
528 #define TARGET_PROMOTED_TYPE arm_promoted_type
530 #undef TARGET_CONVERT_TO_TYPE
531 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
533 #undef TARGET_SCALAR_MODE_SUPPORTED_P
534 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
536 #undef TARGET_FRAME_POINTER_REQUIRED
537 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
539 #undef TARGET_CAN_ELIMINATE
540 #define TARGET_CAN_ELIMINATE arm_can_eliminate
542 struct gcc_target targetm = TARGET_INITIALIZER;
544 /* Obstack for minipool constant handling. */
545 static struct obstack minipool_obstack;
546 static char * minipool_startobj;
548 /* The maximum number of insns skipped which
549 will be conditionalised if possible. */
550 static int max_insns_skipped = 5;
552 extern FILE * asm_out_file;
554 /* True if we are currently building a constant table. */
555 int making_const_table;
557 /* The processor for which instructions should be scheduled. */
558 enum processor_type arm_tune = arm_none;
560 /* The current tuning set. */
561 const struct tune_params *current_tune;
563 /* Which floating point hardware to schedule for. */
566 /* Which floating popint hardware to use. */
567 const struct arm_fpu_desc *arm_fpu_desc;
569 /* Whether to use floating point hardware. */
570 enum float_abi_type arm_float_abi;
572 /* Which __fp16 format to use. */
573 enum arm_fp16_format_type arm_fp16_format;
575 /* Which ABI to use. */
576 enum arm_abi_type arm_abi;
578 /* Which thread pointer model to use. */
579 enum arm_tp_type target_thread_pointer = TP_AUTO;
581 /* Used to parse -mstructure_size_boundary command line option. */
582 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
584 /* Used for Thumb call_via trampolines. */
585 rtx thumb_call_via_label[14];
586 static int thumb_call_reg_needed;
588 /* Bit values used to identify processor capabilities. */
589 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
590 #define FL_ARCH3M (1 << 1) /* Extended multiply */
591 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
592 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
593 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
594 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
595 #define FL_THUMB (1 << 6) /* Thumb aware */
596 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
597 #define FL_STRONG (1 << 8) /* StrongARM */
598 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
599 #define FL_XSCALE (1 << 10) /* XScale */
600 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
601 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
602 media instructions. */
603 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
604 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
605 Note: ARM6 & 7 derivatives only. */
606 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
607 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
608 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
610 #define FL_DIV (1 << 18) /* Hardware divide. */
611 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
612 #define FL_NEON (1 << 20) /* Neon instructions. */
613 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
615 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
617 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
619 /* Flags that only effect tuning, not available instructions. */
620 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
623 #define FL_FOR_ARCH2 FL_NOTM
624 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
625 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
626 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
627 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
628 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
629 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
630 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
631 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
632 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
633 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
634 #define FL_FOR_ARCH6J FL_FOR_ARCH6
635 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
636 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
637 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
638 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
639 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
640 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
641 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
642 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
643 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
644 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
646 /* The bits in this mask specify which
647 instructions we are allowed to generate. */
648 static unsigned long insn_flags = 0;
650 /* The bits in this mask specify which instruction scheduling options should
652 static unsigned long tune_flags = 0;
654 /* The following are used in the arm.md file as equivalents to bits
655 in the above two flag variables. */
657 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
660 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
663 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
666 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
669 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
672 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
675 /* Nonzero if this chip supports the ARM 6K extensions. */
678 /* Nonzero if this chip supports the ARM 7 extensions. */
681 /* Nonzero if instructions not present in the 'M' profile can be used. */
682 int arm_arch_notm = 0;
684 /* Nonzero if instructions present in ARMv7E-M can be used. */
687 /* Nonzero if this chip can benefit from load scheduling. */
688 int arm_ld_sched = 0;
690 /* Nonzero if this chip is a StrongARM. */
691 int arm_tune_strongarm = 0;
693 /* Nonzero if this chip is a Cirrus variant. */
694 int arm_arch_cirrus = 0;
696 /* Nonzero if this chip supports Intel Wireless MMX technology. */
697 int arm_arch_iwmmxt = 0;
699 /* Nonzero if this chip is an XScale. */
700 int arm_arch_xscale = 0;
702 /* Nonzero if tuning for XScale */
703 int arm_tune_xscale = 0;
705 /* Nonzero if we want to tune for stores that access the write-buffer.
706 This typically means an ARM6 or ARM7 with MMU or MPU. */
707 int arm_tune_wbuf = 0;
709 /* Nonzero if tuning for Cortex-A9. */
710 int arm_tune_cortex_a9 = 0;
712 /* Nonzero if generating Thumb instructions. */
715 /* Nonzero if generating Thumb-1 instructions. */
718 /* Nonzero if we should define __THUMB_INTERWORK__ in the
720 XXX This is a bit of a hack, it's intended to help work around
721 problems in GLD which doesn't understand that armv5t code is
722 interworking clean. */
723 int arm_cpp_interwork = 0;
725 /* Nonzero if chip supports Thumb 2. */
728 /* Nonzero if chip supports integer division instruction. */
731 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
732 we must report the mode of the memory reference from
733 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
734 enum machine_mode output_memory_reference_mode;
736 /* The register number to be used for the PIC offset register. */
737 unsigned arm_pic_register = INVALID_REGNUM;
739 /* Set to 1 after arm_reorg has started. Reset to start at the start of
740 the next function. */
741 static int after_arm_reorg = 0;
743 enum arm_pcs arm_pcs_default;
745 /* For an explanation of these variables, see final_prescan_insn below. */
747 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
748 enum arm_cond_code arm_current_cc;
751 int arm_target_label;
752 /* The number of conditionally executed insns, including the current insn. */
753 int arm_condexec_count = 0;
754 /* A bitmask specifying the patterns for the IT block.
755 Zero means do not output an IT block before this insn. */
756 int arm_condexec_mask = 0;
757 /* The number of bits used in arm_condexec_mask. */
758 int arm_condexec_masklen = 0;
760 /* The condition codes of the ARM, and the inverse function. */
761 static const char * const arm_condition_codes[] =
763 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
764 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
767 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
768 int arm_regs_in_sequence[] =
770 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
773 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
774 #define streq(string1, string2) (strcmp (string1, string2) == 0)
776 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
777 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
778 | (1 << PIC_OFFSET_TABLE_REGNUM)))
780 /* Initialization code. */
784 const char *const name;
785 enum processor_type core;
787 const unsigned long flags;
788 const struct tune_params *const tune;
791 const struct tune_params arm_slowmul_tune =
793 arm_slowmul_rtx_costs,
798 const struct tune_params arm_fastmul_tune =
800 arm_fastmul_rtx_costs,
805 const struct tune_params arm_xscale_tune =
807 arm_xscale_rtx_costs,
808 xscale_sched_adjust_cost,
812 const struct tune_params arm_9e_tune =
819 const struct tune_params arm_cortex_a9_tune =
822 cortex_a9_sched_adjust_cost,
827 /* Not all of these give usefully different compilation alternatives,
828 but there is no simple way of generalizing them. */
829 static const struct processors all_cores[] =
832 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
833 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
834 #include "arm-cores.def"
836 {NULL, arm_none, NULL, 0, NULL}
839 static const struct processors all_architectures[] =
841 /* ARM Architectures */
842 /* We don't specify tuning costs here as it will be figured out
845 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
846 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
847 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
848 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
849 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
850 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
851 implementations that support it, so we will leave it out for now. */
852 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
853 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
854 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
855 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
856 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
857 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
858 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
859 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
860 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
861 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
862 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
863 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
864 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
865 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
866 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
867 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
868 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
869 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
870 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
871 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
872 {NULL, arm_none, NULL, 0 , NULL}
876 /* These are populated as commandline arguments are processed, or NULL
878 static const struct processors *arm_selected_arch;
879 static const struct processors *arm_selected_cpu;
880 static const struct processors *arm_selected_tune;
882 /* The name of the preprocessor macro to define for this architecture. */
884 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
886 /* Available values for -mfpu=. */
888 static const struct arm_fpu_desc all_fpus[] =
890 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
891 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
892 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
893 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
894 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
895 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
896 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
897 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
898 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
899 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
900 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
901 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
902 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
903 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
904 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
905 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
906 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
907 /* Compatibility aliases. */
908 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
915 enum float_abi_type abi_type;
919 /* Available values for -mfloat-abi=. */
921 static const struct float_abi all_float_abis[] =
923 {"soft", ARM_FLOAT_ABI_SOFT},
924 {"softfp", ARM_FLOAT_ABI_SOFTFP},
925 {"hard", ARM_FLOAT_ABI_HARD}
932 enum arm_fp16_format_type fp16_format_type;
936 /* Available values for -mfp16-format=. */
938 static const struct fp16_format all_fp16_formats[] =
940 {"none", ARM_FP16_FORMAT_NONE},
941 {"ieee", ARM_FP16_FORMAT_IEEE},
942 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
949 enum arm_abi_type abi_type;
953 /* Available values for -mabi=. */
955 static const struct abi_name arm_all_abis[] =
957 {"apcs-gnu", ARM_ABI_APCS},
958 {"atpcs", ARM_ABI_ATPCS},
959 {"aapcs", ARM_ABI_AAPCS},
960 {"iwmmxt", ARM_ABI_IWMMXT},
961 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
964 /* Supported TLS relocations. */
974 /* The maximum number of insns to be used when loading a constant. */
976 arm_constant_limit (bool size_p)
978 return size_p ? 1 : current_tune->constant_limit;
981 /* Emit an insn that's a simple single-set. Both the operands must be known
984 emit_set_insn (rtx x, rtx y)
986 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
989 /* Return the number of bits set in VALUE. */
991 bit_count (unsigned long value)
993 unsigned long count = 0;
998 value &= value - 1; /* Clear the least-significant set bit. */
1004 /* Set up library functions unique to ARM. */
1007 arm_init_libfuncs (void)
1009 /* There are no special library functions unless we are using the
1014 /* The functions below are described in Section 4 of the "Run-Time
1015 ABI for the ARM architecture", Version 1.0. */
1017 /* Double-precision floating-point arithmetic. Table 2. */
1018 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1019 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1020 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1021 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1022 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1024 /* Double-precision comparisons. Table 3. */
1025 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1026 set_optab_libfunc (ne_optab, DFmode, NULL);
1027 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1028 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1029 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1030 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1031 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1033 /* Single-precision floating-point arithmetic. Table 4. */
1034 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1035 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1036 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1037 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1038 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1040 /* Single-precision comparisons. Table 5. */
1041 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1042 set_optab_libfunc (ne_optab, SFmode, NULL);
1043 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1044 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1045 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1046 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1047 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1049 /* Floating-point to integer conversions. Table 6. */
1050 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1051 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1052 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1053 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1054 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1055 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1056 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1057 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1059 /* Conversions between floating types. Table 7. */
1060 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1061 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1063 /* Integer to floating-point conversions. Table 8. */
1064 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1065 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1066 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1067 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1068 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1069 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1070 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1071 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1073 /* Long long. Table 9. */
1074 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1075 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1076 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1077 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1078 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1079 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1080 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1081 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1083 /* Integer (32/32->32) division. \S 4.3.1. */
1084 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1085 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1087 /* The divmod functions are designed so that they can be used for
1088 plain division, even though they return both the quotient and the
1089 remainder. The quotient is returned in the usual location (i.e.,
1090 r0 for SImode, {r0, r1} for DImode), just as would be expected
1091 for an ordinary division routine. Because the AAPCS calling
1092 conventions specify that all of { r0, r1, r2, r3 } are
1093 callee-saved registers, there is no need to tell the compiler
1094 explicitly that those registers are clobbered by these
1096 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1097 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1099 /* For SImode division the ABI provides div-without-mod routines,
1100 which are faster. */
1101 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1102 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1104 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1105 divmod libcalls instead. */
1106 set_optab_libfunc (smod_optab, DImode, NULL);
1107 set_optab_libfunc (umod_optab, DImode, NULL);
1108 set_optab_libfunc (smod_optab, SImode, NULL);
1109 set_optab_libfunc (umod_optab, SImode, NULL);
1111 /* Half-precision float operations. The compiler handles all operations
1112 with NULL libfuncs by converting the SFmode. */
1113 switch (arm_fp16_format)
1115 case ARM_FP16_FORMAT_IEEE:
1116 case ARM_FP16_FORMAT_ALTERNATIVE:
1119 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1120 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1122 : "__gnu_f2h_alternative"));
1123 set_conv_libfunc (sext_optab, SFmode, HFmode,
1124 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1126 : "__gnu_h2f_alternative"));
1129 set_optab_libfunc (add_optab, HFmode, NULL);
1130 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1131 set_optab_libfunc (smul_optab, HFmode, NULL);
1132 set_optab_libfunc (neg_optab, HFmode, NULL);
1133 set_optab_libfunc (sub_optab, HFmode, NULL);
1136 set_optab_libfunc (eq_optab, HFmode, NULL);
1137 set_optab_libfunc (ne_optab, HFmode, NULL);
1138 set_optab_libfunc (lt_optab, HFmode, NULL);
1139 set_optab_libfunc (le_optab, HFmode, NULL);
1140 set_optab_libfunc (ge_optab, HFmode, NULL);
1141 set_optab_libfunc (gt_optab, HFmode, NULL);
1142 set_optab_libfunc (unord_optab, HFmode, NULL);
1149 if (TARGET_AAPCS_BASED)
1150 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1153 /* On AAPCS systems, this is the "struct __va_list". */
1154 static GTY(()) tree va_list_type;
1156 /* Return the type to use as __builtin_va_list. */
1158 arm_build_builtin_va_list (void)
1163 if (!TARGET_AAPCS_BASED)
1164 return std_build_builtin_va_list ();
1166 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1174 The C Library ABI further reinforces this definition in \S
1177 We must follow this definition exactly. The structure tag
1178 name is visible in C++ mangled names, and thus forms a part
1179 of the ABI. The field name may be used by people who
1180 #include <stdarg.h>. */
1181 /* Create the type. */
1182 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1183 /* Give it the required name. */
1184 va_list_name = build_decl (BUILTINS_LOCATION,
1186 get_identifier ("__va_list"),
1188 DECL_ARTIFICIAL (va_list_name) = 1;
1189 TYPE_NAME (va_list_type) = va_list_name;
1190 /* Create the __ap field. */
1191 ap_field = build_decl (BUILTINS_LOCATION,
1193 get_identifier ("__ap"),
1195 DECL_ARTIFICIAL (ap_field) = 1;
1196 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1197 TYPE_FIELDS (va_list_type) = ap_field;
1198 /* Compute its layout. */
1199 layout_type (va_list_type);
1201 return va_list_type;
1204 /* Return an expression of type "void *" pointing to the next
1205 available argument in a variable-argument list. VALIST is the
1206 user-level va_list object, of type __builtin_va_list. */
1208 arm_extract_valist_ptr (tree valist)
1210 if (TREE_TYPE (valist) == error_mark_node)
1211 return error_mark_node;
1213 /* On an AAPCS target, the pointer is stored within "struct
1215 if (TARGET_AAPCS_BASED)
1217 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1218 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1219 valist, ap_field, NULL_TREE);
1225 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1227 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1229 valist = arm_extract_valist_ptr (valist);
1230 std_expand_builtin_va_start (valist, nextarg);
1233 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1235 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1238 valist = arm_extract_valist_ptr (valist);
1239 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1242 /* Lookup NAME in SEL. */
1244 static const struct processors *
1245 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1247 if (!(name && *name))
1250 for (; sel->name != NULL; sel++)
1252 if (streq (name, sel->name))
1256 error ("bad value (%s) for %s switch", name, desc);
1260 /* Implement TARGET_HANDLE_OPTION. */
1263 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1268 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1272 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1275 case OPT_mhard_float:
1276 target_float_abi_name = "hard";
1279 case OPT_msoft_float:
1280 target_float_abi_name = "soft";
1284 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1293 arm_target_help (void)
1296 static int columns = 0;
1299 /* If we have not done so already, obtain the desired maximum width of
1300 the output. Note - this is a duplication of the code at the start of
1301 gcc/opts.c:print_specific_help() - the two copies should probably be
1302 replaced by a single function. */
1307 GET_ENVIRONMENT (p, "COLUMNS");
1310 int value = atoi (p);
1317 /* Use a reasonable default. */
1321 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1323 /* The - 2 is because we know that the last entry in the array is NULL. */
1324 i = ARRAY_SIZE (all_cores) - 2;
1326 printf (" %s", all_cores[i].name);
1327 remaining = columns - (strlen (all_cores[i].name) + 4);
1328 gcc_assert (remaining >= 0);
1332 int len = strlen (all_cores[i].name);
1334 if (remaining > len + 2)
1336 printf (", %s", all_cores[i].name);
1337 remaining -= len + 2;
1343 printf ("\n %s", all_cores[i].name);
1344 remaining = columns - (len + 4);
1348 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1350 i = ARRAY_SIZE (all_architectures) - 2;
1353 printf (" %s", all_architectures[i].name);
1354 remaining = columns - (strlen (all_architectures[i].name) + 4);
1355 gcc_assert (remaining >= 0);
1359 int len = strlen (all_architectures[i].name);
1361 if (remaining > len + 2)
1363 printf (", %s", all_architectures[i].name);
1364 remaining -= len + 2;
1370 printf ("\n %s", all_architectures[i].name);
1371 remaining = columns - (len + 4);
1378 /* Fix up any incompatible options that the user has specified.
1379 This has now turned into a maze. */
1381 arm_override_options (void)
1385 if (arm_selected_arch)
1387 if (arm_selected_cpu)
1389 /* Check for conflict between mcpu and march. */
1390 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1392 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1393 arm_selected_cpu->name, arm_selected_arch->name);
1394 /* -march wins for code generation.
1395 -mcpu wins for default tuning. */
1396 if (!arm_selected_tune)
1397 arm_selected_tune = arm_selected_cpu;
1399 arm_selected_cpu = arm_selected_arch;
1403 arm_selected_arch = NULL;
1406 /* Pick a CPU based on the architecture. */
1407 arm_selected_cpu = arm_selected_arch;
1410 /* If the user did not specify a processor, choose one for them. */
1411 if (!arm_selected_cpu)
1413 const struct processors * sel;
1414 unsigned int sought;
1416 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1417 if (!arm_selected_cpu->name)
1419 #ifdef SUBTARGET_CPU_DEFAULT
1420 /* Use the subtarget default CPU if none was specified by
1422 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1424 /* Default to ARM6. */
1425 if (!arm_selected_cpu->name)
1426 arm_selected_cpu = &all_cores[arm6];
1429 sel = arm_selected_cpu;
1430 insn_flags = sel->flags;
1432 /* Now check to see if the user has specified some command line
1433 switch that require certain abilities from the cpu. */
1436 if (TARGET_INTERWORK || TARGET_THUMB)
1438 sought |= (FL_THUMB | FL_MODE32);
1440 /* There are no ARM processors that support both APCS-26 and
1441 interworking. Therefore we force FL_MODE26 to be removed
1442 from insn_flags here (if it was set), so that the search
1443 below will always be able to find a compatible processor. */
1444 insn_flags &= ~FL_MODE26;
1447 if (sought != 0 && ((sought & insn_flags) != sought))
1449 /* Try to locate a CPU type that supports all of the abilities
1450 of the default CPU, plus the extra abilities requested by
1452 for (sel = all_cores; sel->name != NULL; sel++)
1453 if ((sel->flags & sought) == (sought | insn_flags))
1456 if (sel->name == NULL)
1458 unsigned current_bit_count = 0;
1459 const struct processors * best_fit = NULL;
1461 /* Ideally we would like to issue an error message here
1462 saying that it was not possible to find a CPU compatible
1463 with the default CPU, but which also supports the command
1464 line options specified by the programmer, and so they
1465 ought to use the -mcpu=<name> command line option to
1466 override the default CPU type.
1468 If we cannot find a cpu that has both the
1469 characteristics of the default cpu and the given
1470 command line options we scan the array again looking
1471 for a best match. */
1472 for (sel = all_cores; sel->name != NULL; sel++)
1473 if ((sel->flags & sought) == sought)
1477 count = bit_count (sel->flags & insn_flags);
1479 if (count >= current_bit_count)
1482 current_bit_count = count;
1486 gcc_assert (best_fit);
1490 arm_selected_cpu = sel;
1494 gcc_assert (arm_selected_cpu);
1495 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1496 if (!arm_selected_tune)
1497 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1499 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1500 insn_flags = arm_selected_cpu->flags;
1502 arm_tune = arm_selected_tune->core;
1503 tune_flags = arm_selected_tune->flags;
1504 current_tune = arm_selected_tune->tune;
1506 if (target_fp16_format_name)
1508 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1510 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1512 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1516 if (i == ARRAY_SIZE (all_fp16_formats))
1517 error ("invalid __fp16 format option: -mfp16-format=%s",
1518 target_fp16_format_name);
1521 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1523 if (target_abi_name)
1525 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1527 if (streq (arm_all_abis[i].name, target_abi_name))
1529 arm_abi = arm_all_abis[i].abi_type;
1533 if (i == ARRAY_SIZE (arm_all_abis))
1534 error ("invalid ABI option: -mabi=%s", target_abi_name);
1537 arm_abi = ARM_DEFAULT_ABI;
1539 /* Make sure that the processor choice does not conflict with any of the
1540 other command line choices. */
1541 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1542 error ("target CPU does not support ARM mode");
1544 /* BPABI targets use linker tricks to allow interworking on cores
1545 without thumb support. */
1546 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1548 warning (0, "target CPU does not support interworking" );
1549 target_flags &= ~MASK_INTERWORK;
1552 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1554 warning (0, "target CPU does not support THUMB instructions");
1555 target_flags &= ~MASK_THUMB;
1558 if (TARGET_APCS_FRAME && TARGET_THUMB)
1560 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1561 target_flags &= ~MASK_APCS_FRAME;
1564 /* Callee super interworking implies thumb interworking. Adding
1565 this to the flags here simplifies the logic elsewhere. */
1566 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1567 target_flags |= MASK_INTERWORK;
1569 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1570 from here where no function is being compiled currently. */
1571 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1572 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1574 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1575 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1577 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1579 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1580 target_flags |= MASK_APCS_FRAME;
1583 if (TARGET_POKE_FUNCTION_NAME)
1584 target_flags |= MASK_APCS_FRAME;
1586 if (TARGET_APCS_REENT && flag_pic)
1587 error ("-fpic and -mapcs-reent are incompatible");
1589 if (TARGET_APCS_REENT)
1590 warning (0, "APCS reentrant code not supported. Ignored");
1592 /* If this target is normally configured to use APCS frames, warn if they
1593 are turned off and debugging is turned on. */
1595 && write_symbols != NO_DEBUG
1596 && !TARGET_APCS_FRAME
1597 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1598 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1600 if (TARGET_APCS_FLOAT)
1601 warning (0, "passing floating point arguments in fp regs not yet supported");
1603 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1604 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1605 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1606 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1607 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1608 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1609 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1610 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1611 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1612 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1613 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1614 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1615 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1616 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1618 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1619 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1620 thumb_code = TARGET_ARM == 0;
1621 thumb1_code = TARGET_THUMB1 != 0;
1622 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1623 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1624 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1625 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1626 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1628 /* If we are not using the default (ARM mode) section anchor offset
1629 ranges, then set the correct ranges now. */
1632 /* Thumb-1 LDR instructions cannot have negative offsets.
1633 Permissible positive offset ranges are 5-bit (for byte loads),
1634 6-bit (for halfword loads), or 7-bit (for word loads).
1635 Empirical results suggest a 7-bit anchor range gives the best
1636 overall code size. */
1637 targetm.min_anchor_offset = 0;
1638 targetm.max_anchor_offset = 127;
1640 else if (TARGET_THUMB2)
1642 /* The minimum is set such that the total size of the block
1643 for a particular anchor is 248 + 1 + 4095 bytes, which is
1644 divisible by eight, ensuring natural spacing of anchors. */
1645 targetm.min_anchor_offset = -248;
1646 targetm.max_anchor_offset = 4095;
1649 /* V5 code we generate is completely interworking capable, so we turn off
1650 TARGET_INTERWORK here to avoid many tests later on. */
1652 /* XXX However, we must pass the right pre-processor defines to CPP
1653 or GLD can get confused. This is a hack. */
1654 if (TARGET_INTERWORK)
1655 arm_cpp_interwork = 1;
1658 target_flags &= ~MASK_INTERWORK;
1660 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1661 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1663 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1664 error ("iwmmxt abi requires an iwmmxt capable cpu");
1666 if (target_fpu_name == NULL && target_fpe_name != NULL)
1668 if (streq (target_fpe_name, "2"))
1669 target_fpu_name = "fpe2";
1670 else if (streq (target_fpe_name, "3"))
1671 target_fpu_name = "fpe3";
1673 error ("invalid floating point emulation option: -mfpe=%s",
1677 if (target_fpu_name == NULL)
1679 #ifdef FPUTYPE_DEFAULT
1680 target_fpu_name = FPUTYPE_DEFAULT;
1682 if (arm_arch_cirrus)
1683 target_fpu_name = "maverick";
1685 target_fpu_name = "fpe2";
1689 arm_fpu_desc = NULL;
1690 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1692 if (streq (all_fpus[i].name, target_fpu_name))
1694 arm_fpu_desc = &all_fpus[i];
1701 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1705 switch (arm_fpu_desc->model)
1707 case ARM_FP_MODEL_FPA:
1708 if (arm_fpu_desc->rev == 2)
1709 arm_fpu_attr = FPU_FPE2;
1710 else if (arm_fpu_desc->rev == 3)
1711 arm_fpu_attr = FPU_FPE3;
1713 arm_fpu_attr = FPU_FPA;
1716 case ARM_FP_MODEL_MAVERICK:
1717 arm_fpu_attr = FPU_MAVERICK;
1720 case ARM_FP_MODEL_VFP:
1721 arm_fpu_attr = FPU_VFP;
1728 if (target_float_abi_name != NULL)
1730 /* The user specified a FP ABI. */
1731 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1733 if (streq (all_float_abis[i].name, target_float_abi_name))
1735 arm_float_abi = all_float_abis[i].abi_type;
1739 if (i == ARRAY_SIZE (all_float_abis))
1740 error ("invalid floating point abi: -mfloat-abi=%s",
1741 target_float_abi_name);
1744 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1746 if (TARGET_AAPCS_BASED
1747 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1748 error ("FPA is unsupported in the AAPCS");
1750 if (TARGET_AAPCS_BASED)
1752 if (TARGET_CALLER_INTERWORKING)
1753 error ("AAPCS does not support -mcaller-super-interworking");
1755 if (TARGET_CALLEE_INTERWORKING)
1756 error ("AAPCS does not support -mcallee-super-interworking");
1759 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1760 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1761 will ever exist. GCC makes no attempt to support this combination. */
1762 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1763 sorry ("iWMMXt and hardware floating point");
1765 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1766 if (TARGET_THUMB2 && TARGET_IWMMXT)
1767 sorry ("Thumb-2 iWMMXt");
1769 /* __fp16 support currently assumes the core has ldrh. */
1770 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1771 sorry ("__fp16 and no ldrh");
1773 /* If soft-float is specified then don't use FPU. */
1774 if (TARGET_SOFT_FLOAT)
1775 arm_fpu_attr = FPU_NONE;
1777 if (TARGET_AAPCS_BASED)
1779 if (arm_abi == ARM_ABI_IWMMXT)
1780 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1781 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1782 && TARGET_HARD_FLOAT
1784 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1786 arm_pcs_default = ARM_PCS_AAPCS;
1790 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1791 sorry ("-mfloat-abi=hard and VFP");
1793 if (arm_abi == ARM_ABI_APCS)
1794 arm_pcs_default = ARM_PCS_APCS;
1796 arm_pcs_default = ARM_PCS_ATPCS;
1799 /* For arm2/3 there is no need to do any scheduling if there is only
1800 a floating point emulator, or we are doing software floating-point. */
1801 if ((TARGET_SOFT_FLOAT
1802 || (TARGET_FPA && arm_fpu_desc->rev))
1803 && (tune_flags & FL_MODE32) == 0)
1804 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1806 if (target_thread_switch)
1808 if (strcmp (target_thread_switch, "soft") == 0)
1809 target_thread_pointer = TP_SOFT;
1810 else if (strcmp (target_thread_switch, "auto") == 0)
1811 target_thread_pointer = TP_AUTO;
1812 else if (strcmp (target_thread_switch, "cp15") == 0)
1813 target_thread_pointer = TP_CP15;
1815 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1818 /* Use the cp15 method if it is available. */
1819 if (target_thread_pointer == TP_AUTO)
1821 if (arm_arch6k && !TARGET_THUMB1)
1822 target_thread_pointer = TP_CP15;
1824 target_thread_pointer = TP_SOFT;
1827 if (TARGET_HARD_TP && TARGET_THUMB1)
1828 error ("can not use -mtp=cp15 with 16-bit Thumb");
1830 /* Override the default structure alignment for AAPCS ABI. */
1831 if (TARGET_AAPCS_BASED)
1832 arm_structure_size_boundary = 8;
1834 if (structure_size_string != NULL)
1836 int size = strtol (structure_size_string, NULL, 0);
1838 if (size == 8 || size == 32
1839 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1840 arm_structure_size_boundary = size;
1842 warning (0, "structure size boundary can only be set to %s",
1843 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1846 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1848 error ("RTP PIC is incompatible with Thumb");
1852 /* If stack checking is disabled, we can use r10 as the PIC register,
1853 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1854 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1856 if (TARGET_VXWORKS_RTP)
1857 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1858 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1861 if (flag_pic && TARGET_VXWORKS_RTP)
1862 arm_pic_register = 9;
1864 if (arm_pic_register_string != NULL)
1866 int pic_register = decode_reg_name (arm_pic_register_string);
1869 warning (0, "-mpic-register= is useless without -fpic");
1871 /* Prevent the user from choosing an obviously stupid PIC register. */
1872 else if (pic_register < 0 || call_used_regs[pic_register]
1873 || pic_register == HARD_FRAME_POINTER_REGNUM
1874 || pic_register == STACK_POINTER_REGNUM
1875 || pic_register >= PC_REGNUM
1876 || (TARGET_VXWORKS_RTP
1877 && (unsigned int) pic_register != arm_pic_register))
1878 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1880 arm_pic_register = pic_register;
1883 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1884 if (fix_cm3_ldrd == 2)
1886 if (arm_selected_cpu->core == cortexm3)
1892 if (TARGET_THUMB1 && flag_schedule_insns)
1894 /* Don't warn since it's on by default in -O2. */
1895 flag_schedule_insns = 0;
1900 /* If optimizing for size, bump the number of instructions that we
1901 are prepared to conditionally execute (even on a StrongARM). */
1902 max_insns_skipped = 6;
1906 /* StrongARM has early execution of branches, so a sequence
1907 that is worth skipping is shorter. */
1908 if (arm_tune_strongarm)
1909 max_insns_skipped = 3;
1912 /* Hot/Cold partitioning is not currently supported, since we can't
1913 handle literal pool placement in that case. */
1914 if (flag_reorder_blocks_and_partition)
1916 inform (input_location,
1917 "-freorder-blocks-and-partition not supported on this architecture");
1918 flag_reorder_blocks_and_partition = 0;
1919 flag_reorder_blocks = 1;
1922 if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
1924 /* Hoisting PIC address calculations more aggressively provides a small,
1925 but measurable, size reduction for PIC code. Therefore, we decrease
1926 the bar for unrestricted expression hoisting to the cost of PIC address
1927 calculation, which is 2 instructions. */
1928 set_param_value ("gcse-unrestricted-cost", 2);
1930 /* Register global variables with the garbage collector. */
1931 arm_add_gc_roots ();
1935 arm_add_gc_roots (void)
1937 gcc_obstack_init(&minipool_obstack);
1938 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1941 /* A table of known ARM exception types.
1942 For use with the interrupt function attribute. */
1946 const char *const arg;
1947 const unsigned long return_value;
1951 static const isr_attribute_arg isr_attribute_args [] =
1953 { "IRQ", ARM_FT_ISR },
1954 { "irq", ARM_FT_ISR },
1955 { "FIQ", ARM_FT_FIQ },
1956 { "fiq", ARM_FT_FIQ },
1957 { "ABORT", ARM_FT_ISR },
1958 { "abort", ARM_FT_ISR },
1959 { "ABORT", ARM_FT_ISR },
1960 { "abort", ARM_FT_ISR },
1961 { "UNDEF", ARM_FT_EXCEPTION },
1962 { "undef", ARM_FT_EXCEPTION },
1963 { "SWI", ARM_FT_EXCEPTION },
1964 { "swi", ARM_FT_EXCEPTION },
1965 { NULL, ARM_FT_NORMAL }
1968 /* Returns the (interrupt) function type of the current
1969 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1971 static unsigned long
1972 arm_isr_value (tree argument)
1974 const isr_attribute_arg * ptr;
1978 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1980 /* No argument - default to IRQ. */
1981 if (argument == NULL_TREE)
1984 /* Get the value of the argument. */
1985 if (TREE_VALUE (argument) == NULL_TREE
1986 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1987 return ARM_FT_UNKNOWN;
1989 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1991 /* Check it against the list of known arguments. */
1992 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1993 if (streq (arg, ptr->arg))
1994 return ptr->return_value;
1996 /* An unrecognized interrupt type. */
1997 return ARM_FT_UNKNOWN;
2000 /* Computes the type of the current function. */
2002 static unsigned long
2003 arm_compute_func_type (void)
2005 unsigned long type = ARM_FT_UNKNOWN;
2009 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2011 /* Decide if the current function is volatile. Such functions
2012 never return, and many memory cycles can be saved by not storing
2013 register values that will never be needed again. This optimization
2014 was added to speed up context switching in a kernel application. */
2016 && (TREE_NOTHROW (current_function_decl)
2017 || !(flag_unwind_tables
2018 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
2019 && TREE_THIS_VOLATILE (current_function_decl))
2020 type |= ARM_FT_VOLATILE;
2022 if (cfun->static_chain_decl != NULL)
2023 type |= ARM_FT_NESTED;
2025 attr = DECL_ATTRIBUTES (current_function_decl);
2027 a = lookup_attribute ("naked", attr);
2029 type |= ARM_FT_NAKED;
2031 a = lookup_attribute ("isr", attr);
2033 a = lookup_attribute ("interrupt", attr);
2036 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2038 type |= arm_isr_value (TREE_VALUE (a));
2043 /* Returns the type of the current function. */
2046 arm_current_func_type (void)
2048 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2049 cfun->machine->func_type = arm_compute_func_type ();
2051 return cfun->machine->func_type;
2055 arm_allocate_stack_slots_for_args (void)
2057 /* Naked functions should not allocate stack slots for arguments. */
2058 return !IS_NAKED (arm_current_func_type ());
2062 /* Output assembler code for a block containing the constant parts
2063 of a trampoline, leaving space for the variable parts.
2065 On the ARM, (if r8 is the static chain regnum, and remembering that
2066 referencing pc adds an offset of 8) the trampoline looks like:
2069 .word static chain value
2070 .word function's address
2071 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2074 arm_asm_trampoline_template (FILE *f)
2078 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2079 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2081 else if (TARGET_THUMB2)
2083 /* The Thumb-2 trampoline is similar to the arm implementation.
2084 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2085 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2086 STATIC_CHAIN_REGNUM, PC_REGNUM);
2087 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2091 ASM_OUTPUT_ALIGN (f, 2);
2092 fprintf (f, "\t.code\t16\n");
2093 fprintf (f, ".Ltrampoline_start:\n");
2094 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2095 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2096 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2097 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2098 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2099 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2101 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2102 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2105 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2108 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2110 rtx fnaddr, mem, a_tramp;
2112 emit_block_move (m_tramp, assemble_trampoline_template (),
2113 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2115 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2116 emit_move_insn (mem, chain_value);
2118 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2119 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2120 emit_move_insn (mem, fnaddr);
2122 a_tramp = XEXP (m_tramp, 0);
2123 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2124 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2125 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2128 /* Thumb trampolines should be entered in thumb mode, so set
2129 the bottom bit of the address. */
2132 arm_trampoline_adjust_address (rtx addr)
2135 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2136 NULL, 0, OPTAB_LIB_WIDEN);
2140 /* Return 1 if it is possible to return using a single instruction.
2141 If SIBLING is non-null, this is a test for a return before a sibling
2142 call. SIBLING is the call insn, so we can examine its register usage. */
2145 use_return_insn (int iscond, rtx sibling)
2148 unsigned int func_type;
2149 unsigned long saved_int_regs;
2150 unsigned HOST_WIDE_INT stack_adjust;
2151 arm_stack_offsets *offsets;
2153 /* Never use a return instruction before reload has run. */
2154 if (!reload_completed)
2157 func_type = arm_current_func_type ();
2159 /* Naked, volatile and stack alignment functions need special
2161 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2164 /* So do interrupt functions that use the frame pointer and Thumb
2165 interrupt functions. */
2166 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2169 offsets = arm_get_frame_offsets ();
2170 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2172 /* As do variadic functions. */
2173 if (crtl->args.pretend_args_size
2174 || cfun->machine->uses_anonymous_args
2175 /* Or if the function calls __builtin_eh_return () */
2176 || crtl->calls_eh_return
2177 /* Or if the function calls alloca */
2178 || cfun->calls_alloca
2179 /* Or if there is a stack adjustment. However, if the stack pointer
2180 is saved on the stack, we can use a pre-incrementing stack load. */
2181 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2182 && stack_adjust == 4)))
2185 saved_int_regs = offsets->saved_regs_mask;
2187 /* Unfortunately, the insn
2189 ldmib sp, {..., sp, ...}
2191 triggers a bug on most SA-110 based devices, such that the stack
2192 pointer won't be correctly restored if the instruction takes a
2193 page fault. We work around this problem by popping r3 along with
2194 the other registers, since that is never slower than executing
2195 another instruction.
2197 We test for !arm_arch5 here, because code for any architecture
2198 less than this could potentially be run on one of the buggy
2200 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2202 /* Validate that r3 is a call-clobbered register (always true in
2203 the default abi) ... */
2204 if (!call_used_regs[3])
2207 /* ... that it isn't being used for a return value ... */
2208 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2211 /* ... or for a tail-call argument ... */
2214 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2216 if (find_regno_fusage (sibling, USE, 3))
2220 /* ... and that there are no call-saved registers in r0-r2
2221 (always true in the default ABI). */
2222 if (saved_int_regs & 0x7)
2226 /* Can't be done if interworking with Thumb, and any registers have been
2228 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2231 /* On StrongARM, conditional returns are expensive if they aren't
2232 taken and multiple registers have been stacked. */
2233 if (iscond && arm_tune_strongarm)
2235 /* Conditional return when just the LR is stored is a simple
2236 conditional-load instruction, that's not expensive. */
2237 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2241 && arm_pic_register != INVALID_REGNUM
2242 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2246 /* If there are saved registers but the LR isn't saved, then we need
2247 two instructions for the return. */
2248 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2251 /* Can't be done if any of the FPA regs are pushed,
2252 since this also requires an insn. */
2253 if (TARGET_HARD_FLOAT && TARGET_FPA)
2254 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2255 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2258 /* Likewise VFP regs. */
2259 if (TARGET_HARD_FLOAT && TARGET_VFP)
2260 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2261 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2264 if (TARGET_REALLY_IWMMXT)
2265 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2266 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2272 /* Return TRUE if int I is a valid immediate ARM constant. */
2275 const_ok_for_arm (HOST_WIDE_INT i)
2279 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2280 be all zero, or all one. */
2281 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2282 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2283 != ((~(unsigned HOST_WIDE_INT) 0)
2284 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2287 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2289 /* Fast return for 0 and small values. We must do this for zero, since
2290 the code below can't handle that one case. */
2291 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2294 /* Get the number of trailing zeros. */
2295 lowbit = ffs((int) i) - 1;
2297 /* Only even shifts are allowed in ARM mode so round down to the
2298 nearest even number. */
2302 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2307 /* Allow rotated constants in ARM mode. */
2309 && ((i & ~0xc000003f) == 0
2310 || (i & ~0xf000000f) == 0
2311 || (i & ~0xfc000003) == 0))
2318 /* Allow repeated pattern. */
2321 if (i == v || i == (v | (v << 8)))
2328 /* Return true if I is a valid constant for the operation CODE. */
2330 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2332 if (const_ok_for_arm (i))
2356 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2358 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2364 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2368 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2375 /* Emit a sequence of insns to handle a large constant.
2376 CODE is the code of the operation required, it can be any of SET, PLUS,
2377 IOR, AND, XOR, MINUS;
2378 MODE is the mode in which the operation is being performed;
2379 VAL is the integer to operate on;
2380 SOURCE is the other operand (a register, or a null-pointer for SET);
2381 SUBTARGETS means it is safe to create scratch registers if that will
2382 either produce a simpler sequence, or we will want to cse the values.
2383 Return value is the number of insns emitted. */
2385 /* ??? Tweak this for thumb2. */
2387 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2388 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2392 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2393 cond = COND_EXEC_TEST (PATTERN (insn));
2397 if (subtargets || code == SET
2398 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2399 && REGNO (target) != REGNO (source)))
2401 /* After arm_reorg has been called, we can't fix up expensive
2402 constants by pushing them into memory so we must synthesize
2403 them in-line, regardless of the cost. This is only likely to
2404 be more costly on chips that have load delay slots and we are
2405 compiling without running the scheduler (so no splitting
2406 occurred before the final instruction emission).
2408 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2410 if (!after_arm_reorg
2412 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2414 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2419 /* Currently SET is the only monadic value for CODE, all
2420 the rest are diadic. */
2421 if (TARGET_USE_MOVT)
2422 arm_emit_movpair (target, GEN_INT (val));
2424 emit_set_insn (target, GEN_INT (val));
2430 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2432 if (TARGET_USE_MOVT)
2433 arm_emit_movpair (temp, GEN_INT (val));
2435 emit_set_insn (temp, GEN_INT (val));
2437 /* For MINUS, the value is subtracted from, since we never
2438 have subtraction of a constant. */
2440 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2442 emit_set_insn (target,
2443 gen_rtx_fmt_ee (code, mode, source, temp));
2449 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2453 /* Return the number of instructions required to synthesize the given
2454 constant, if we start emitting them from bit-position I. */
2456 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2458 HOST_WIDE_INT temp1;
2459 int step_size = TARGET_ARM ? 2 : 1;
2462 gcc_assert (TARGET_ARM || i == 0);
2470 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2475 temp1 = remainder & ((0x0ff << end)
2476 | ((i < end) ? (0xff >> (32 - end)) : 0));
2477 remainder &= ~temp1;
2482 } while (remainder);
2487 find_best_start (unsigned HOST_WIDE_INT remainder)
2489 int best_consecutive_zeros = 0;
2493 /* If we aren't targetting ARM, the best place to start is always at
2498 for (i = 0; i < 32; i += 2)
2500 int consecutive_zeros = 0;
2502 if (!(remainder & (3 << i)))
2504 while ((i < 32) && !(remainder & (3 << i)))
2506 consecutive_zeros += 2;
2509 if (consecutive_zeros > best_consecutive_zeros)
2511 best_consecutive_zeros = consecutive_zeros;
2512 best_start = i - consecutive_zeros;
2518 /* So long as it won't require any more insns to do so, it's
2519 desirable to emit a small constant (in bits 0...9) in the last
2520 insn. This way there is more chance that it can be combined with
2521 a later addressing insn to form a pre-indexed load or store
2522 operation. Consider:
2524 *((volatile int *)0xe0000100) = 1;
2525 *((volatile int *)0xe0000110) = 2;
2527 We want this to wind up as:
2531 str rB, [rA, #0x100]
2533 str rB, [rA, #0x110]
2535 rather than having to synthesize both large constants from scratch.
2537 Therefore, we calculate how many insns would be required to emit
2538 the constant starting from `best_start', and also starting from
2539 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2540 yield a shorter sequence, we may as well use zero. */
2542 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2543 && (count_insns_for_constant (remainder, 0) <=
2544 count_insns_for_constant (remainder, best_start)))
2550 /* Emit an instruction with the indicated PATTERN. If COND is
2551 non-NULL, conditionalize the execution of the instruction on COND
2555 emit_constant_insn (rtx cond, rtx pattern)
2558 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2559 emit_insn (pattern);
2562 /* As above, but extra parameter GENERATE which, if clear, suppresses
2564 /* ??? This needs more work for thumb2. */
2567 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2568 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2573 int final_invert = 0;
2574 int can_negate_initial = 0;
2576 int num_bits_set = 0;
2577 int set_sign_bit_copies = 0;
2578 int clear_sign_bit_copies = 0;
2579 int clear_zero_bit_copies = 0;
2580 int set_zero_bit_copies = 0;
2582 unsigned HOST_WIDE_INT temp1, temp2;
2583 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2584 int step_size = TARGET_ARM ? 2 : 1;
2586 /* Find out which operations are safe for a given CODE. Also do a quick
2587 check for degenerate cases; these can occur when DImode operations
2598 can_negate_initial = 1;
2602 if (remainder == 0xffffffff)
2605 emit_constant_insn (cond,
2606 gen_rtx_SET (VOIDmode, target,
2607 GEN_INT (ARM_SIGN_EXTEND (val))));
2613 if (reload_completed && rtx_equal_p (target, source))
2617 emit_constant_insn (cond,
2618 gen_rtx_SET (VOIDmode, target, source));
2630 emit_constant_insn (cond,
2631 gen_rtx_SET (VOIDmode, target, const0_rtx));
2634 if (remainder == 0xffffffff)
2636 if (reload_completed && rtx_equal_p (target, source))
2639 emit_constant_insn (cond,
2640 gen_rtx_SET (VOIDmode, target, source));
2649 if (reload_completed && rtx_equal_p (target, source))
2652 emit_constant_insn (cond,
2653 gen_rtx_SET (VOIDmode, target, source));
2657 if (remainder == 0xffffffff)
2660 emit_constant_insn (cond,
2661 gen_rtx_SET (VOIDmode, target,
2662 gen_rtx_NOT (mode, source)));
2668 /* We treat MINUS as (val - source), since (source - val) is always
2669 passed as (source + (-val)). */
2673 emit_constant_insn (cond,
2674 gen_rtx_SET (VOIDmode, target,
2675 gen_rtx_NEG (mode, source)));
2678 if (const_ok_for_arm (val))
2681 emit_constant_insn (cond,
2682 gen_rtx_SET (VOIDmode, target,
2683 gen_rtx_MINUS (mode, GEN_INT (val),
2695 /* If we can do it in one insn get out quickly. */
2696 if (const_ok_for_arm (val)
2697 || (can_negate_initial && const_ok_for_arm (-val))
2698 || (can_invert && const_ok_for_arm (~val)))
2701 emit_constant_insn (cond,
2702 gen_rtx_SET (VOIDmode, target,
2704 ? gen_rtx_fmt_ee (code, mode, source,
2710 /* Calculate a few attributes that may be useful for specific
2712 /* Count number of leading zeros. */
2713 for (i = 31; i >= 0; i--)
2715 if ((remainder & (1 << i)) == 0)
2716 clear_sign_bit_copies++;
2721 /* Count number of leading 1's. */
2722 for (i = 31; i >= 0; i--)
2724 if ((remainder & (1 << i)) != 0)
2725 set_sign_bit_copies++;
2730 /* Count number of trailing zero's. */
2731 for (i = 0; i <= 31; i++)
2733 if ((remainder & (1 << i)) == 0)
2734 clear_zero_bit_copies++;
2739 /* Count number of trailing 1's. */
2740 for (i = 0; i <= 31; i++)
2742 if ((remainder & (1 << i)) != 0)
2743 set_zero_bit_copies++;
2751 /* See if we can use movw. */
2752 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2755 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2760 /* See if we can do this by sign_extending a constant that is known
2761 to be negative. This is a good, way of doing it, since the shift
2762 may well merge into a subsequent insn. */
2763 if (set_sign_bit_copies > 1)
2765 if (const_ok_for_arm
2766 (temp1 = ARM_SIGN_EXTEND (remainder
2767 << (set_sign_bit_copies - 1))))
2771 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2772 emit_constant_insn (cond,
2773 gen_rtx_SET (VOIDmode, new_src,
2775 emit_constant_insn (cond,
2776 gen_ashrsi3 (target, new_src,
2777 GEN_INT (set_sign_bit_copies - 1)));
2781 /* For an inverted constant, we will need to set the low bits,
2782 these will be shifted out of harm's way. */
2783 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2784 if (const_ok_for_arm (~temp1))
2788 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2789 emit_constant_insn (cond,
2790 gen_rtx_SET (VOIDmode, new_src,
2792 emit_constant_insn (cond,
2793 gen_ashrsi3 (target, new_src,
2794 GEN_INT (set_sign_bit_copies - 1)));
2800 /* See if we can calculate the value as the difference between two
2801 valid immediates. */
2802 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2804 int topshift = clear_sign_bit_copies & ~1;
2806 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2807 & (0xff000000 >> topshift));
2809 /* If temp1 is zero, then that means the 9 most significant
2810 bits of remainder were 1 and we've caused it to overflow.
2811 When topshift is 0 we don't need to do anything since we
2812 can borrow from 'bit 32'. */
2813 if (temp1 == 0 && topshift != 0)
2814 temp1 = 0x80000000 >> (topshift - 1);
2816 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2818 if (const_ok_for_arm (temp2))
2822 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2823 emit_constant_insn (cond,
2824 gen_rtx_SET (VOIDmode, new_src,
2826 emit_constant_insn (cond,
2827 gen_addsi3 (target, new_src,
2835 /* See if we can generate this by setting the bottom (or the top)
2836 16 bits, and then shifting these into the other half of the
2837 word. We only look for the simplest cases, to do more would cost
2838 too much. Be careful, however, not to generate this when the
2839 alternative would take fewer insns. */
2840 if (val & 0xffff0000)
2842 temp1 = remainder & 0xffff0000;
2843 temp2 = remainder & 0x0000ffff;
2845 /* Overlaps outside this range are best done using other methods. */
2846 for (i = 9; i < 24; i++)
2848 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2849 && !const_ok_for_arm (temp2))
2851 rtx new_src = (subtargets
2852 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2854 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2855 source, subtargets, generate);
2863 gen_rtx_ASHIFT (mode, source,
2870 /* Don't duplicate cases already considered. */
2871 for (i = 17; i < 24; i++)
2873 if (((temp1 | (temp1 >> i)) == remainder)
2874 && !const_ok_for_arm (temp1))
2876 rtx new_src = (subtargets
2877 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2879 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2880 source, subtargets, generate);
2885 gen_rtx_SET (VOIDmode, target,
2888 gen_rtx_LSHIFTRT (mode, source,
2899 /* If we have IOR or XOR, and the constant can be loaded in a
2900 single instruction, and we can find a temporary to put it in,
2901 then this can be done in two instructions instead of 3-4. */
2903 /* TARGET can't be NULL if SUBTARGETS is 0 */
2904 || (reload_completed && !reg_mentioned_p (target, source)))
2906 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2910 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2912 emit_constant_insn (cond,
2913 gen_rtx_SET (VOIDmode, sub,
2915 emit_constant_insn (cond,
2916 gen_rtx_SET (VOIDmode, target,
2917 gen_rtx_fmt_ee (code, mode,
2928 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2929 and the remainder 0s for e.g. 0xfff00000)
2930 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2932 This can be done in 2 instructions by using shifts with mov or mvn.
2937 mvn r0, r0, lsr #12 */
2938 if (set_sign_bit_copies > 8
2939 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2943 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2944 rtx shift = GEN_INT (set_sign_bit_copies);
2948 gen_rtx_SET (VOIDmode, sub,
2950 gen_rtx_ASHIFT (mode,
2955 gen_rtx_SET (VOIDmode, target,
2957 gen_rtx_LSHIFTRT (mode, sub,
2964 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2966 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2968 For eg. r0 = r0 | 0xfff
2973 if (set_zero_bit_copies > 8
2974 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2978 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2979 rtx shift = GEN_INT (set_zero_bit_copies);
2983 gen_rtx_SET (VOIDmode, sub,
2985 gen_rtx_LSHIFTRT (mode,
2990 gen_rtx_SET (VOIDmode, target,
2992 gen_rtx_ASHIFT (mode, sub,
2998 /* This will never be reached for Thumb2 because orn is a valid
2999 instruction. This is for Thumb1 and the ARM 32 bit cases.
3001 x = y | constant (such that ~constant is a valid constant)
3003 x = ~(~y & ~constant).
3005 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3009 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3010 emit_constant_insn (cond,
3011 gen_rtx_SET (VOIDmode, sub,
3012 gen_rtx_NOT (mode, source)));
3015 sub = gen_reg_rtx (mode);
3016 emit_constant_insn (cond,
3017 gen_rtx_SET (VOIDmode, sub,
3018 gen_rtx_AND (mode, source,
3020 emit_constant_insn (cond,
3021 gen_rtx_SET (VOIDmode, target,
3022 gen_rtx_NOT (mode, sub)));
3029 /* See if two shifts will do 2 or more insn's worth of work. */
3030 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3032 HOST_WIDE_INT shift_mask = ((0xffffffff
3033 << (32 - clear_sign_bit_copies))
3036 if ((remainder | shift_mask) != 0xffffffff)
3040 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3041 insns = arm_gen_constant (AND, mode, cond,
3042 remainder | shift_mask,
3043 new_src, source, subtargets, 1);
3048 rtx targ = subtargets ? NULL_RTX : target;
3049 insns = arm_gen_constant (AND, mode, cond,
3050 remainder | shift_mask,
3051 targ, source, subtargets, 0);
3057 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3058 rtx shift = GEN_INT (clear_sign_bit_copies);
3060 emit_insn (gen_ashlsi3 (new_src, source, shift));
3061 emit_insn (gen_lshrsi3 (target, new_src, shift));
3067 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3069 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3071 if ((remainder | shift_mask) != 0xffffffff)
3075 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3077 insns = arm_gen_constant (AND, mode, cond,
3078 remainder | shift_mask,
3079 new_src, source, subtargets, 1);
3084 rtx targ = subtargets ? NULL_RTX : target;
3086 insns = arm_gen_constant (AND, mode, cond,
3087 remainder | shift_mask,
3088 targ, source, subtargets, 0);
3094 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3095 rtx shift = GEN_INT (clear_zero_bit_copies);
3097 emit_insn (gen_lshrsi3 (new_src, source, shift));
3098 emit_insn (gen_ashlsi3 (target, new_src, shift));
3110 for (i = 0; i < 32; i++)
3111 if (remainder & (1 << i))
3115 || (code != IOR && can_invert && num_bits_set > 16))
3116 remainder ^= 0xffffffff;
3117 else if (code == PLUS && num_bits_set > 16)
3118 remainder = (-remainder) & 0xffffffff;
3120 /* For XOR, if more than half the bits are set and there's a sequence
3121 of more than 8 consecutive ones in the pattern then we can XOR by the
3122 inverted constant and then invert the final result; this may save an
3123 instruction and might also lead to the final mvn being merged with
3124 some other operation. */
3125 else if (code == XOR && num_bits_set > 16
3126 && (count_insns_for_constant (remainder ^ 0xffffffff,
3128 (remainder ^ 0xffffffff))
3129 < count_insns_for_constant (remainder,
3130 find_best_start (remainder))))
3132 remainder ^= 0xffffffff;
3141 /* Now try and find a way of doing the job in either two or three
3143 We start by looking for the largest block of zeros that are aligned on
3144 a 2-bit boundary, we then fill up the temps, wrapping around to the
3145 top of the word when we drop off the bottom.
3146 In the worst case this code should produce no more than four insns.
3147 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3148 best place to start. */
3150 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3153 /* Now start emitting the insns. */
3154 i = find_best_start (remainder);
3161 if (remainder & (3 << (i - 2)))
3166 temp1 = remainder & ((0x0ff << end)
3167 | ((i < end) ? (0xff >> (32 - end)) : 0));
3168 remainder &= ~temp1;
3172 rtx new_src, temp1_rtx;
3174 if (code == SET || code == MINUS)
3176 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3177 if (can_invert && code != MINUS)
3182 if ((final_invert || remainder) && subtargets)
3183 new_src = gen_reg_rtx (mode);
3188 else if (can_negate)
3192 temp1 = trunc_int_for_mode (temp1, mode);
3193 temp1_rtx = GEN_INT (temp1);
3197 else if (code == MINUS)
3198 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3200 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3202 emit_constant_insn (cond,
3203 gen_rtx_SET (VOIDmode, new_src,
3213 else if (code == MINUS)
3219 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3229 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3230 gen_rtx_NOT (mode, source)));
3237 /* Canonicalize a comparison so that we are more likely to recognize it.
3238 This can be done for a few constant compares, where we can make the
3239 immediate value easier to load. */
3242 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3244 enum machine_mode mode;
3245 unsigned HOST_WIDE_INT i, maxval;
3247 mode = GET_MODE (*op0);
3248 if (mode == VOIDmode)
3249 mode = GET_MODE (*op1);
3251 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3253 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3254 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3255 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3256 for GTU/LEU in Thumb mode. */
3261 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3263 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3266 if (code == GT || code == LE
3267 || (!TARGET_ARM && (code == GTU || code == LEU)))
3269 /* Missing comparison. First try to use an available
3271 if (GET_CODE (*op1) == CONST_INT)
3279 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3281 *op1 = GEN_INT (i + 1);
3282 return code == GT ? GE : LT;
3287 if (i != ~((unsigned HOST_WIDE_INT) 0)
3288 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3290 *op1 = GEN_INT (i + 1);
3291 return code == GTU ? GEU : LTU;
3299 /* If that did not work, reverse the condition. */
3303 return swap_condition (code);
3309 /* Comparisons smaller than DImode. Only adjust comparisons against
3310 an out-of-range constant. */
3311 if (GET_CODE (*op1) != CONST_INT
3312 || const_ok_for_arm (INTVAL (*op1))
3313 || const_ok_for_arm (- INTVAL (*op1)))
3327 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3329 *op1 = GEN_INT (i + 1);
3330 return code == GT ? GE : LT;
3337 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3339 *op1 = GEN_INT (i - 1);
3340 return code == GE ? GT : LE;
3346 if (i != ~((unsigned HOST_WIDE_INT) 0)
3347 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3349 *op1 = GEN_INT (i + 1);
3350 return code == GTU ? GEU : LTU;
3357 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3359 *op1 = GEN_INT (i - 1);
3360 return code == GEU ? GTU : LEU;
3372 /* Define how to find the value returned by a function. */
3375 arm_function_value(const_tree type, const_tree func,
3376 bool outgoing ATTRIBUTE_UNUSED)
3378 enum machine_mode mode;
3379 int unsignedp ATTRIBUTE_UNUSED;
3380 rtx r ATTRIBUTE_UNUSED;
3382 mode = TYPE_MODE (type);
3384 if (TARGET_AAPCS_BASED)
3385 return aapcs_allocate_return_reg (mode, type, func);
3387 /* Promote integer types. */
3388 if (INTEGRAL_TYPE_P (type))
3389 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3391 /* Promotes small structs returned in a register to full-word size
3392 for big-endian AAPCS. */
3393 if (arm_return_in_msb (type))
3395 HOST_WIDE_INT size = int_size_in_bytes (type);
3396 if (size % UNITS_PER_WORD != 0)
3398 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3399 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3403 return LIBCALL_VALUE (mode);
3407 libcall_eq (const void *p1, const void *p2)
3409 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3413 libcall_hash (const void *p1)
3415 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3419 add_libcall (htab_t htab, rtx libcall)
3421 *htab_find_slot (htab, libcall, INSERT) = libcall;
3425 arm_libcall_uses_aapcs_base (const_rtx libcall)
3427 static bool init_done = false;
3428 static htab_t libcall_htab;
3434 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3436 add_libcall (libcall_htab,
3437 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3438 add_libcall (libcall_htab,
3439 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3440 add_libcall (libcall_htab,
3441 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3442 add_libcall (libcall_htab,
3443 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3445 add_libcall (libcall_htab,
3446 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3447 add_libcall (libcall_htab,
3448 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3449 add_libcall (libcall_htab,
3450 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3451 add_libcall (libcall_htab,
3452 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3454 add_libcall (libcall_htab,
3455 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3456 add_libcall (libcall_htab,
3457 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3458 add_libcall (libcall_htab,
3459 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3460 add_libcall (libcall_htab,
3461 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3462 add_libcall (libcall_htab,
3463 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3464 add_libcall (libcall_htab,
3465 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3468 return libcall && htab_find (libcall_htab, libcall) != NULL;
3472 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3474 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3475 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3477 /* The following libcalls return their result in integer registers,
3478 even though they return a floating point value. */
3479 if (arm_libcall_uses_aapcs_base (libcall))
3480 return gen_rtx_REG (mode, ARG_REGISTER(1));
3484 return LIBCALL_VALUE (mode);
3487 /* Determine the amount of memory needed to store the possible return
3488 registers of an untyped call. */
3490 arm_apply_result_size (void)
3496 if (TARGET_HARD_FLOAT_ABI)
3502 if (TARGET_MAVERICK)
3505 if (TARGET_IWMMXT_ABI)
3512 /* Decide whether TYPE should be returned in memory (true)
3513 or in a register (false). FNTYPE is the type of the function making
3516 arm_return_in_memory (const_tree type, const_tree fntype)
3520 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3522 if (TARGET_AAPCS_BASED)
3524 /* Simple, non-aggregate types (ie not including vectors and
3525 complex) are always returned in a register (or registers).
3526 We don't care about which register here, so we can short-cut
3527 some of the detail. */
3528 if (!AGGREGATE_TYPE_P (type)
3529 && TREE_CODE (type) != VECTOR_TYPE
3530 && TREE_CODE (type) != COMPLEX_TYPE)
3533 /* Any return value that is no larger than one word can be
3535 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3538 /* Check any available co-processors to see if they accept the
3539 type as a register candidate (VFP, for example, can return
3540 some aggregates in consecutive registers). These aren't
3541 available if the call is variadic. */
3542 if (aapcs_select_return_coproc (type, fntype) >= 0)
3545 /* Vector values should be returned using ARM registers, not
3546 memory (unless they're over 16 bytes, which will break since
3547 we only have four call-clobbered registers to play with). */
3548 if (TREE_CODE (type) == VECTOR_TYPE)
3549 return (size < 0 || size > (4 * UNITS_PER_WORD));
3551 /* The rest go in memory. */
3555 if (TREE_CODE (type) == VECTOR_TYPE)
3556 return (size < 0 || size > (4 * UNITS_PER_WORD));
3558 if (!AGGREGATE_TYPE_P (type) &&
3559 (TREE_CODE (type) != VECTOR_TYPE))
3560 /* All simple types are returned in registers. */
3563 if (arm_abi != ARM_ABI_APCS)
3565 /* ATPCS and later return aggregate types in memory only if they are
3566 larger than a word (or are variable size). */
3567 return (size < 0 || size > UNITS_PER_WORD);
3570 /* For the arm-wince targets we choose to be compatible with Microsoft's
3571 ARM and Thumb compilers, which always return aggregates in memory. */
3573 /* All structures/unions bigger than one word are returned in memory.
3574 Also catch the case where int_size_in_bytes returns -1. In this case
3575 the aggregate is either huge or of variable size, and in either case
3576 we will want to return it via memory and not in a register. */
3577 if (size < 0 || size > UNITS_PER_WORD)
3580 if (TREE_CODE (type) == RECORD_TYPE)
3584 /* For a struct the APCS says that we only return in a register
3585 if the type is 'integer like' and every addressable element
3586 has an offset of zero. For practical purposes this means
3587 that the structure can have at most one non bit-field element
3588 and that this element must be the first one in the structure. */
3590 /* Find the first field, ignoring non FIELD_DECL things which will
3591 have been created by C++. */
3592 for (field = TYPE_FIELDS (type);
3593 field && TREE_CODE (field) != FIELD_DECL;
3594 field = DECL_CHAIN (field))
3598 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3600 /* Check that the first field is valid for returning in a register. */
3602 /* ... Floats are not allowed */
3603 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3606 /* ... Aggregates that are not themselves valid for returning in
3607 a register are not allowed. */
3608 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3611 /* Now check the remaining fields, if any. Only bitfields are allowed,
3612 since they are not addressable. */
3613 for (field = DECL_CHAIN (field);
3615 field = DECL_CHAIN (field))
3617 if (TREE_CODE (field) != FIELD_DECL)
3620 if (!DECL_BIT_FIELD_TYPE (field))
3627 if (TREE_CODE (type) == UNION_TYPE)
3631 /* Unions can be returned in registers if every element is
3632 integral, or can be returned in an integer register. */
3633 for (field = TYPE_FIELDS (type);
3635 field = DECL_CHAIN (field))
3637 if (TREE_CODE (field) != FIELD_DECL)
3640 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3643 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3649 #endif /* not ARM_WINCE */
3651 /* Return all other types in memory. */
3655 /* Indicate whether or not words of a double are in big-endian order. */
3658 arm_float_words_big_endian (void)
3660 if (TARGET_MAVERICK)
3663 /* For FPA, float words are always big-endian. For VFP, floats words
3664 follow the memory system mode. */
3672 return (TARGET_BIG_END ? 1 : 0);
3677 const struct pcs_attribute_arg
3681 } pcs_attribute_args[] =
3683 {"aapcs", ARM_PCS_AAPCS},
3684 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3686 /* We could recognize these, but changes would be needed elsewhere
3687 * to implement them. */
3688 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3689 {"atpcs", ARM_PCS_ATPCS},
3690 {"apcs", ARM_PCS_APCS},
3692 {NULL, ARM_PCS_UNKNOWN}
3696 arm_pcs_from_attribute (tree attr)
3698 const struct pcs_attribute_arg *ptr;
3701 /* Get the value of the argument. */
3702 if (TREE_VALUE (attr) == NULL_TREE
3703 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3704 return ARM_PCS_UNKNOWN;
3706 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3708 /* Check it against the list of known arguments. */
3709 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3710 if (streq (arg, ptr->arg))
3713 /* An unrecognized interrupt type. */
3714 return ARM_PCS_UNKNOWN;
3717 /* Get the PCS variant to use for this call. TYPE is the function's type
3718 specification, DECL is the specific declartion. DECL may be null if
3719 the call could be indirect or if this is a library call. */
3721 arm_get_pcs_model (const_tree type, const_tree decl)
3723 bool user_convention = false;
3724 enum arm_pcs user_pcs = arm_pcs_default;
3729 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3732 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3733 user_convention = true;
3736 if (TARGET_AAPCS_BASED)
3738 /* Detect varargs functions. These always use the base rules
3739 (no argument is ever a candidate for a co-processor
3741 bool base_rules = stdarg_p (type);
3743 if (user_convention)
3745 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3746 sorry ("Non-AAPCS derived PCS variant");
3747 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3748 error ("Variadic functions must use the base AAPCS variant");
3752 return ARM_PCS_AAPCS;
3753 else if (user_convention)
3755 else if (decl && flag_unit_at_a_time)
3757 /* Local functions never leak outside this compilation unit,
3758 so we are free to use whatever conventions are
3760 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3761 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3763 return ARM_PCS_AAPCS_LOCAL;
3766 else if (user_convention && user_pcs != arm_pcs_default)
3767 sorry ("PCS variant");
3769 /* For everything else we use the target's default. */
3770 return arm_pcs_default;
3775 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3776 const_tree fntype ATTRIBUTE_UNUSED,
3777 rtx libcall ATTRIBUTE_UNUSED,
3778 const_tree fndecl ATTRIBUTE_UNUSED)
3780 /* Record the unallocated VFP registers. */
3781 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3782 pcum->aapcs_vfp_reg_alloc = 0;
3785 /* Walk down the type tree of TYPE counting consecutive base elements.
3786 If *MODEP is VOIDmode, then set it to the first valid floating point
3787 type. If a non-floating point type is found, or if a floating point
3788 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3789 otherwise return the count in the sub-tree. */
3791 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3793 enum machine_mode mode;
3796 switch (TREE_CODE (type))
3799 mode = TYPE_MODE (type);
3800 if (mode != DFmode && mode != SFmode)
3803 if (*modep == VOIDmode)
3812 mode = TYPE_MODE (TREE_TYPE (type));
3813 if (mode != DFmode && mode != SFmode)
3816 if (*modep == VOIDmode)
3825 /* Use V2SImode and V4SImode as representatives of all 64-bit
3826 and 128-bit vector types, whether or not those modes are
3827 supported with the present options. */
3828 size = int_size_in_bytes (type);
3841 if (*modep == VOIDmode)
3844 /* Vector modes are considered to be opaque: two vectors are
3845 equivalent for the purposes of being homogeneous aggregates
3846 if they are the same size. */
3855 tree index = TYPE_DOMAIN (type);
3857 /* Can't handle incomplete types. */
3858 if (!COMPLETE_TYPE_P(type))
3861 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3864 || !TYPE_MAX_VALUE (index)
3865 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3866 || !TYPE_MIN_VALUE (index)
3867 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3871 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3872 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3874 /* There must be no padding. */
3875 if (!host_integerp (TYPE_SIZE (type), 1)
3876 || (tree_low_cst (TYPE_SIZE (type), 1)
3877 != count * GET_MODE_BITSIZE (*modep)))
3889 /* Can't handle incomplete types. */
3890 if (!COMPLETE_TYPE_P(type))
3893 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3895 if (TREE_CODE (field) != FIELD_DECL)
3898 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3904 /* There must be no padding. */
3905 if (!host_integerp (TYPE_SIZE (type), 1)
3906 || (tree_low_cst (TYPE_SIZE (type), 1)
3907 != count * GET_MODE_BITSIZE (*modep)))
3914 case QUAL_UNION_TYPE:
3916 /* These aren't very interesting except in a degenerate case. */
3921 /* Can't handle incomplete types. */
3922 if (!COMPLETE_TYPE_P(type))
3925 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3927 if (TREE_CODE (field) != FIELD_DECL)
3930 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3933 count = count > sub_count ? count : sub_count;
3936 /* There must be no padding. */
3937 if (!host_integerp (TYPE_SIZE (type), 1)
3938 || (tree_low_cst (TYPE_SIZE (type), 1)
3939 != count * GET_MODE_BITSIZE (*modep)))
3952 /* Return true if PCS_VARIANT should use VFP registers. */
3954 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3956 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3958 static bool seen_thumb1_vfp = false;
3960 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3962 sorry ("Thumb-1 hard-float VFP ABI");
3963 /* sorry() is not immediately fatal, so only display this once. */
3964 seen_thumb1_vfp = true;
3970 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3973 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3974 (TARGET_VFP_DOUBLE || !is_double));
3978 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3979 enum machine_mode mode, const_tree type,
3980 enum machine_mode *base_mode, int *count)
3982 enum machine_mode new_mode = VOIDmode;
3984 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3985 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3986 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3991 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3994 new_mode = (mode == DCmode ? DFmode : SFmode);
3996 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3998 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4000 if (ag_count > 0 && ag_count <= 4)
4009 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4012 *base_mode = new_mode;
4017 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4018 enum machine_mode mode, const_tree type)
4020 int count ATTRIBUTE_UNUSED;
4021 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4023 if (!use_vfp_abi (pcs_variant, false))
4025 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4030 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4033 if (!use_vfp_abi (pcum->pcs_variant, false))
4036 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4037 &pcum->aapcs_vfp_rmode,
4038 &pcum->aapcs_vfp_rcount);
4042 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4043 const_tree type ATTRIBUTE_UNUSED)
4045 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4046 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4049 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4050 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4052 pcum->aapcs_vfp_reg_alloc = mask << regno;
4053 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4056 int rcount = pcum->aapcs_vfp_rcount;
4058 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4062 /* Avoid using unsupported vector modes. */
4063 if (rmode == V2SImode)
4065 else if (rmode == V4SImode)
4072 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4073 for (i = 0; i < rcount; i++)
4075 rtx tmp = gen_rtx_REG (rmode,
4076 FIRST_VFP_REGNUM + regno + i * rshift);
4077 tmp = gen_rtx_EXPR_LIST
4079 GEN_INT (i * GET_MODE_SIZE (rmode)));
4080 XVECEXP (par, 0, i) = tmp;
4083 pcum->aapcs_reg = par;
4086 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4093 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4094 enum machine_mode mode,
4095 const_tree type ATTRIBUTE_UNUSED)
4097 if (!use_vfp_abi (pcs_variant, false))
4100 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4103 enum machine_mode ag_mode;
4108 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4113 if (ag_mode == V2SImode)
4115 else if (ag_mode == V4SImode)
4121 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4122 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4123 for (i = 0; i < count; i++)
4125 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4126 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4127 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4128 XVECEXP (par, 0, i) = tmp;
4134 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4138 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4139 enum machine_mode mode ATTRIBUTE_UNUSED,
4140 const_tree type ATTRIBUTE_UNUSED)
4142 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4143 pcum->aapcs_vfp_reg_alloc = 0;
4147 #define AAPCS_CP(X) \
4149 aapcs_ ## X ## _cum_init, \
4150 aapcs_ ## X ## _is_call_candidate, \
4151 aapcs_ ## X ## _allocate, \
4152 aapcs_ ## X ## _is_return_candidate, \
4153 aapcs_ ## X ## _allocate_return_reg, \
4154 aapcs_ ## X ## _advance \
4157 /* Table of co-processors that can be used to pass arguments in
4158 registers. Idealy no arugment should be a candidate for more than
4159 one co-processor table entry, but the table is processed in order
4160 and stops after the first match. If that entry then fails to put
4161 the argument into a co-processor register, the argument will go on
4165 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4166 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4168 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4169 BLKmode) is a candidate for this co-processor's registers; this
4170 function should ignore any position-dependent state in
4171 CUMULATIVE_ARGS and only use call-type dependent information. */
4172 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4174 /* Return true if the argument does get a co-processor register; it
4175 should set aapcs_reg to an RTX of the register allocated as is
4176 required for a return from FUNCTION_ARG. */
4177 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4179 /* Return true if a result of mode MODE (or type TYPE if MODE is
4180 BLKmode) is can be returned in this co-processor's registers. */
4181 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4183 /* Allocate and return an RTX element to hold the return type of a
4184 call, this routine must not fail and will only be called if
4185 is_return_candidate returned true with the same parameters. */
4186 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4188 /* Finish processing this argument and prepare to start processing
4190 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4191 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4199 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4204 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4205 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4212 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4214 /* We aren't passed a decl, so we can't check that a call is local.
4215 However, it isn't clear that that would be a win anyway, since it
4216 might limit some tail-calling opportunities. */
4217 enum arm_pcs pcs_variant;
4221 const_tree fndecl = NULL_TREE;
4223 if (TREE_CODE (fntype) == FUNCTION_DECL)
4226 fntype = TREE_TYPE (fntype);
4229 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4232 pcs_variant = arm_pcs_default;
4234 if (pcs_variant != ARM_PCS_AAPCS)
4238 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4239 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4248 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4251 /* We aren't passed a decl, so we can't check that a call is local.
4252 However, it isn't clear that that would be a win anyway, since it
4253 might limit some tail-calling opportunities. */
4254 enum arm_pcs pcs_variant;
4255 int unsignedp ATTRIBUTE_UNUSED;
4259 const_tree fndecl = NULL_TREE;
4261 if (TREE_CODE (fntype) == FUNCTION_DECL)
4264 fntype = TREE_TYPE (fntype);
4267 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4270 pcs_variant = arm_pcs_default;
4272 /* Promote integer types. */
4273 if (type && INTEGRAL_TYPE_P (type))
4274 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4276 if (pcs_variant != ARM_PCS_AAPCS)
4280 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4281 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4283 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4287 /* Promotes small structs returned in a register to full-word size
4288 for big-endian AAPCS. */
4289 if (type && arm_return_in_msb (type))
4291 HOST_WIDE_INT size = int_size_in_bytes (type);
4292 if (size % UNITS_PER_WORD != 0)
4294 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4295 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4299 return gen_rtx_REG (mode, R0_REGNUM);
4303 aapcs_libcall_value (enum machine_mode mode)
4305 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4308 /* Lay out a function argument using the AAPCS rules. The rule
4309 numbers referred to here are those in the AAPCS. */
4311 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4312 const_tree type, bool named)
4317 /* We only need to do this once per argument. */
4318 if (pcum->aapcs_arg_processed)
4321 pcum->aapcs_arg_processed = true;
4323 /* Special case: if named is false then we are handling an incoming
4324 anonymous argument which is on the stack. */
4328 /* Is this a potential co-processor register candidate? */
4329 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4331 int slot = aapcs_select_call_coproc (pcum, mode, type);
4332 pcum->aapcs_cprc_slot = slot;
4334 /* We don't have to apply any of the rules from part B of the
4335 preparation phase, these are handled elsewhere in the
4340 /* A Co-processor register candidate goes either in its own
4341 class of registers or on the stack. */
4342 if (!pcum->aapcs_cprc_failed[slot])
4344 /* C1.cp - Try to allocate the argument to co-processor
4346 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4349 /* C2.cp - Put the argument on the stack and note that we
4350 can't assign any more candidates in this slot. We also
4351 need to note that we have allocated stack space, so that
4352 we won't later try to split a non-cprc candidate between
4353 core registers and the stack. */
4354 pcum->aapcs_cprc_failed[slot] = true;
4355 pcum->can_split = false;
4358 /* We didn't get a register, so this argument goes on the
4360 gcc_assert (pcum->can_split == false);
4365 /* C3 - For double-word aligned arguments, round the NCRN up to the
4366 next even number. */
4367 ncrn = pcum->aapcs_ncrn;
4368 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4371 nregs = ARM_NUM_REGS2(mode, type);
4373 /* Sigh, this test should really assert that nregs > 0, but a GCC
4374 extension allows empty structs and then gives them empty size; it
4375 then allows such a structure to be passed by value. For some of
4376 the code below we have to pretend that such an argument has
4377 non-zero size so that we 'locate' it correctly either in
4378 registers or on the stack. */
4379 gcc_assert (nregs >= 0);
4381 nregs2 = nregs ? nregs : 1;
4383 /* C4 - Argument fits entirely in core registers. */
4384 if (ncrn + nregs2 <= NUM_ARG_REGS)
4386 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4387 pcum->aapcs_next_ncrn = ncrn + nregs;
4391 /* C5 - Some core registers left and there are no arguments already
4392 on the stack: split this argument between the remaining core
4393 registers and the stack. */
4394 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4396 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4397 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4398 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4402 /* C6 - NCRN is set to 4. */
4403 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4405 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4409 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4410 for a call to a function whose data type is FNTYPE.
4411 For a library call, FNTYPE is NULL. */
4413 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4415 tree fndecl ATTRIBUTE_UNUSED)
4417 /* Long call handling. */
4419 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4421 pcum->pcs_variant = arm_pcs_default;
4423 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4425 if (arm_libcall_uses_aapcs_base (libname))
4426 pcum->pcs_variant = ARM_PCS_AAPCS;
4428 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4429 pcum->aapcs_reg = NULL_RTX;
4430 pcum->aapcs_partial = 0;
4431 pcum->aapcs_arg_processed = false;
4432 pcum->aapcs_cprc_slot = -1;
4433 pcum->can_split = true;
4435 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4439 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4441 pcum->aapcs_cprc_failed[i] = false;
4442 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4450 /* On the ARM, the offset starts at 0. */
4452 pcum->iwmmxt_nregs = 0;
4453 pcum->can_split = true;
4455 /* Varargs vectors are treated the same as long long.
4456 named_count avoids having to change the way arm handles 'named' */
4457 pcum->named_count = 0;
4460 if (TARGET_REALLY_IWMMXT && fntype)
4464 for (fn_arg = TYPE_ARG_TYPES (fntype);
4466 fn_arg = TREE_CHAIN (fn_arg))
4467 pcum->named_count += 1;
4469 if (! pcum->named_count)
4470 pcum->named_count = INT_MAX;
4475 /* Return true if mode/type need doubleword alignment. */
4477 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4479 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4480 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4484 /* Determine where to put an argument to a function.
4485 Value is zero to push the argument on the stack,
4486 or a hard register in which to store the argument.
4488 MODE is the argument's machine mode.
4489 TYPE is the data type of the argument (as a tree).
4490 This is null for libcalls where that information may
4492 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4493 the preceding args and about the function being called.
4494 NAMED is nonzero if this argument is a named parameter
4495 (otherwise it is an extra parameter matching an ellipsis).
4497 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4498 other arguments are passed on the stack. If (NAMED == 0) (which happens
4499 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4500 defined), say it is passed in the stack (function_prologue will
4501 indeed make it pass in the stack if necessary). */
4504 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4505 const_tree type, bool named)
4509 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4510 a call insn (op3 of a call_value insn). */
4511 if (mode == VOIDmode)
4514 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4516 aapcs_layout_arg (pcum, mode, type, named);
4517 return pcum->aapcs_reg;
4520 /* Varargs vectors are treated the same as long long.
4521 named_count avoids having to change the way arm handles 'named' */
4522 if (TARGET_IWMMXT_ABI
4523 && arm_vector_mode_supported_p (mode)
4524 && pcum->named_count > pcum->nargs + 1)
4526 if (pcum->iwmmxt_nregs <= 9)
4527 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4530 pcum->can_split = false;
4535 /* Put doubleword aligned quantities in even register pairs. */
4537 && ARM_DOUBLEWORD_ALIGN
4538 && arm_needs_doubleword_align (mode, type))
4541 /* Only allow splitting an arg between regs and memory if all preceding
4542 args were allocated to regs. For args passed by reference we only count
4543 the reference pointer. */
4544 if (pcum->can_split)
4547 nregs = ARM_NUM_REGS2 (mode, type);
4549 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4552 return gen_rtx_REG (mode, pcum->nregs);
4556 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4557 tree type, bool named)
4559 int nregs = pcum->nregs;
4561 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4563 aapcs_layout_arg (pcum, mode, type, named);
4564 return pcum->aapcs_partial;
4567 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4570 if (NUM_ARG_REGS > nregs
4571 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4573 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4578 /* Update the data in PCUM to advance over an argument
4579 of mode MODE and data type TYPE.
4580 (TYPE is null for libcalls where that information may not be available.) */
4583 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4584 const_tree type, bool named)
4586 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4588 aapcs_layout_arg (pcum, mode, type, named);
4590 if (pcum->aapcs_cprc_slot >= 0)
4592 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4594 pcum->aapcs_cprc_slot = -1;
4597 /* Generic stuff. */
4598 pcum->aapcs_arg_processed = false;
4599 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4600 pcum->aapcs_reg = NULL_RTX;
4601 pcum->aapcs_partial = 0;
4606 if (arm_vector_mode_supported_p (mode)
4607 && pcum->named_count > pcum->nargs
4608 && TARGET_IWMMXT_ABI)
4609 pcum->iwmmxt_nregs += 1;
4611 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4615 /* Variable sized types are passed by reference. This is a GCC
4616 extension to the ARM ABI. */
4619 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4620 enum machine_mode mode ATTRIBUTE_UNUSED,
4621 const_tree type, bool named ATTRIBUTE_UNUSED)
4623 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4626 /* Encode the current state of the #pragma [no_]long_calls. */
4629 OFF, /* No #pragma [no_]long_calls is in effect. */
4630 LONG, /* #pragma long_calls is in effect. */
4631 SHORT /* #pragma no_long_calls is in effect. */
4634 static arm_pragma_enum arm_pragma_long_calls = OFF;
4637 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4639 arm_pragma_long_calls = LONG;
4643 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4645 arm_pragma_long_calls = SHORT;
4649 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4651 arm_pragma_long_calls = OFF;
4654 /* Handle an attribute requiring a FUNCTION_DECL;
4655 arguments as in struct attribute_spec.handler. */
4657 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4658 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4660 if (TREE_CODE (*node) != FUNCTION_DECL)
4662 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4664 *no_add_attrs = true;
4670 /* Handle an "interrupt" or "isr" attribute;
4671 arguments as in struct attribute_spec.handler. */
4673 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4678 if (TREE_CODE (*node) != FUNCTION_DECL)
4680 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4682 *no_add_attrs = true;
4684 /* FIXME: the argument if any is checked for type attributes;
4685 should it be checked for decl ones? */
4689 if (TREE_CODE (*node) == FUNCTION_TYPE
4690 || TREE_CODE (*node) == METHOD_TYPE)
4692 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4694 warning (OPT_Wattributes, "%qE attribute ignored",
4696 *no_add_attrs = true;
4699 else if (TREE_CODE (*node) == POINTER_TYPE
4700 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4701 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4702 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4704 *node = build_variant_type_copy (*node);
4705 TREE_TYPE (*node) = build_type_attribute_variant
4707 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4708 *no_add_attrs = true;
4712 /* Possibly pass this attribute on from the type to a decl. */
4713 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4714 | (int) ATTR_FLAG_FUNCTION_NEXT
4715 | (int) ATTR_FLAG_ARRAY_NEXT))
4717 *no_add_attrs = true;
4718 return tree_cons (name, args, NULL_TREE);
4722 warning (OPT_Wattributes, "%qE attribute ignored",
4731 /* Handle a "pcs" attribute; arguments as in struct
4732 attribute_spec.handler. */
4734 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4735 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4737 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4739 warning (OPT_Wattributes, "%qE attribute ignored", name);
4740 *no_add_attrs = true;
4745 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4746 /* Handle the "notshared" attribute. This attribute is another way of
4747 requesting hidden visibility. ARM's compiler supports
4748 "__declspec(notshared)"; we support the same thing via an
4752 arm_handle_notshared_attribute (tree *node,
4753 tree name ATTRIBUTE_UNUSED,
4754 tree args ATTRIBUTE_UNUSED,
4755 int flags ATTRIBUTE_UNUSED,
4758 tree decl = TYPE_NAME (*node);
4762 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4763 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4764 *no_add_attrs = false;
4770 /* Return 0 if the attributes for two types are incompatible, 1 if they
4771 are compatible, and 2 if they are nearly compatible (which causes a
4772 warning to be generated). */
4774 arm_comp_type_attributes (const_tree type1, const_tree type2)
4778 /* Check for mismatch of non-default calling convention. */
4779 if (TREE_CODE (type1) != FUNCTION_TYPE)
4782 /* Check for mismatched call attributes. */
4783 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4784 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4785 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4786 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4788 /* Only bother to check if an attribute is defined. */
4789 if (l1 | l2 | s1 | s2)
4791 /* If one type has an attribute, the other must have the same attribute. */
4792 if ((l1 != l2) || (s1 != s2))
4795 /* Disallow mixed attributes. */
4796 if ((l1 & s2) || (l2 & s1))
4800 /* Check for mismatched ISR attribute. */
4801 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4803 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4804 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4806 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4813 /* Assigns default attributes to newly defined type. This is used to
4814 set short_call/long_call attributes for function types of
4815 functions defined inside corresponding #pragma scopes. */
4817 arm_set_default_type_attributes (tree type)
4819 /* Add __attribute__ ((long_call)) to all functions, when
4820 inside #pragma long_calls or __attribute__ ((short_call)),
4821 when inside #pragma no_long_calls. */
4822 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4824 tree type_attr_list, attr_name;
4825 type_attr_list = TYPE_ATTRIBUTES (type);
4827 if (arm_pragma_long_calls == LONG)
4828 attr_name = get_identifier ("long_call");
4829 else if (arm_pragma_long_calls == SHORT)
4830 attr_name = get_identifier ("short_call");
4834 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4835 TYPE_ATTRIBUTES (type) = type_attr_list;
4839 /* Return true if DECL is known to be linked into section SECTION. */
4842 arm_function_in_section_p (tree decl, section *section)
4844 /* We can only be certain about functions defined in the same
4845 compilation unit. */
4846 if (!TREE_STATIC (decl))
4849 /* Make sure that SYMBOL always binds to the definition in this
4850 compilation unit. */
4851 if (!targetm.binds_local_p (decl))
4854 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4855 if (!DECL_SECTION_NAME (decl))
4857 /* Make sure that we will not create a unique section for DECL. */
4858 if (flag_function_sections || DECL_ONE_ONLY (decl))
4862 return function_section (decl) == section;
4865 /* Return nonzero if a 32-bit "long_call" should be generated for
4866 a call from the current function to DECL. We generate a long_call
4869 a. has an __attribute__((long call))
4870 or b. is within the scope of a #pragma long_calls
4871 or c. the -mlong-calls command line switch has been specified
4873 However we do not generate a long call if the function:
4875 d. has an __attribute__ ((short_call))
4876 or e. is inside the scope of a #pragma no_long_calls
4877 or f. is defined in the same section as the current function. */
4880 arm_is_long_call_p (tree decl)
4885 return TARGET_LONG_CALLS;
4887 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4888 if (lookup_attribute ("short_call", attrs))
4891 /* For "f", be conservative, and only cater for cases in which the
4892 whole of the current function is placed in the same section. */
4893 if (!flag_reorder_blocks_and_partition
4894 && TREE_CODE (decl) == FUNCTION_DECL
4895 && arm_function_in_section_p (decl, current_function_section ()))
4898 if (lookup_attribute ("long_call", attrs))
4901 return TARGET_LONG_CALLS;
4904 /* Return nonzero if it is ok to make a tail-call to DECL. */
4906 arm_function_ok_for_sibcall (tree decl, tree exp)
4908 unsigned long func_type;
4910 if (cfun->machine->sibcall_blocked)
4913 /* Never tailcall something for which we have no decl, or if we
4914 are generating code for Thumb-1. */
4915 if (decl == NULL || TARGET_THUMB1)
4918 /* The PIC register is live on entry to VxWorks PLT entries, so we
4919 must make the call before restoring the PIC register. */
4920 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4923 /* Cannot tail-call to long calls, since these are out of range of
4924 a branch instruction. */
4925 if (arm_is_long_call_p (decl))
4928 /* If we are interworking and the function is not declared static
4929 then we can't tail-call it unless we know that it exists in this
4930 compilation unit (since it might be a Thumb routine). */
4931 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4934 func_type = arm_current_func_type ();
4935 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4936 if (IS_INTERRUPT (func_type))
4939 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4941 /* Check that the return value locations are the same. For
4942 example that we aren't returning a value from the sibling in
4943 a VFP register but then need to transfer it to a core
4947 a = arm_function_value (TREE_TYPE (exp), decl, false);
4948 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4950 if (!rtx_equal_p (a, b))
4954 /* Never tailcall if function may be called with a misaligned SP. */
4955 if (IS_STACKALIGN (func_type))
4958 /* Everything else is ok. */
4963 /* Addressing mode support functions. */
4965 /* Return nonzero if X is a legitimate immediate operand when compiling
4966 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4968 legitimate_pic_operand_p (rtx x)
4970 if (GET_CODE (x) == SYMBOL_REF
4971 || (GET_CODE (x) == CONST
4972 && GET_CODE (XEXP (x, 0)) == PLUS
4973 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4979 /* Record that the current function needs a PIC register. Initialize
4980 cfun->machine->pic_reg if we have not already done so. */
4983 require_pic_register (void)
4985 /* A lot of the logic here is made obscure by the fact that this
4986 routine gets called as part of the rtx cost estimation process.
4987 We don't want those calls to affect any assumptions about the real
4988 function; and further, we can't call entry_of_function() until we
4989 start the real expansion process. */
4990 if (!crtl->uses_pic_offset_table)
4992 gcc_assert (can_create_pseudo_p ());
4993 if (arm_pic_register != INVALID_REGNUM)
4995 if (!cfun->machine->pic_reg)
4996 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4998 /* Play games to avoid marking the function as needing pic
4999 if we are being called as part of the cost-estimation
5001 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5002 crtl->uses_pic_offset_table = 1;
5008 if (!cfun->machine->pic_reg)
5009 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5011 /* Play games to avoid marking the function as needing pic
5012 if we are being called as part of the cost-estimation
5014 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5016 crtl->uses_pic_offset_table = 1;
5019 arm_load_pic_register (0UL);
5023 /* We can be called during expansion of PHI nodes, where
5024 we can't yet emit instructions directly in the final
5025 insn stream. Queue the insns on the entry edge, they will
5026 be committed after everything else is expanded. */
5027 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5034 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5036 if (GET_CODE (orig) == SYMBOL_REF
5037 || GET_CODE (orig) == LABEL_REF)
5043 gcc_assert (can_create_pseudo_p ());
5044 reg = gen_reg_rtx (Pmode);
5047 /* VxWorks does not impose a fixed gap between segments; the run-time
5048 gap can be different from the object-file gap. We therefore can't
5049 use GOTOFF unless we are absolutely sure that the symbol is in the
5050 same segment as the GOT. Unfortunately, the flexibility of linker
5051 scripts means that we can't be sure of that in general, so assume
5052 that GOTOFF is never valid on VxWorks. */
5053 if ((GET_CODE (orig) == LABEL_REF
5054 || (GET_CODE (orig) == SYMBOL_REF &&
5055 SYMBOL_REF_LOCAL_P (orig)))
5057 && !TARGET_VXWORKS_RTP)
5058 insn = arm_pic_static_addr (orig, reg);
5064 /* If this function doesn't have a pic register, create one now. */
5065 require_pic_register ();
5067 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5069 /* Make the MEM as close to a constant as possible. */
5070 mem = SET_SRC (pat);
5071 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5072 MEM_READONLY_P (mem) = 1;
5073 MEM_NOTRAP_P (mem) = 1;
5075 insn = emit_insn (pat);
5078 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5080 set_unique_reg_note (insn, REG_EQUAL, orig);
5084 else if (GET_CODE (orig) == CONST)
5088 if (GET_CODE (XEXP (orig, 0)) == PLUS
5089 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5092 /* Handle the case where we have: const (UNSPEC_TLS). */
5093 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5094 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5097 /* Handle the case where we have:
5098 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5100 if (GET_CODE (XEXP (orig, 0)) == PLUS
5101 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5102 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5104 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5110 gcc_assert (can_create_pseudo_p ());
5111 reg = gen_reg_rtx (Pmode);
5114 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5116 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5117 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5118 base == reg ? 0 : reg);
5120 if (GET_CODE (offset) == CONST_INT)
5122 /* The base register doesn't really matter, we only want to
5123 test the index for the appropriate mode. */
5124 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5126 gcc_assert (can_create_pseudo_p ());
5127 offset = force_reg (Pmode, offset);
5130 if (GET_CODE (offset) == CONST_INT)
5131 return plus_constant (base, INTVAL (offset));
5134 if (GET_MODE_SIZE (mode) > 4
5135 && (GET_MODE_CLASS (mode) == MODE_INT
5136 || TARGET_SOFT_FLOAT))
5138 emit_insn (gen_addsi3 (reg, base, offset));
5142 return gen_rtx_PLUS (Pmode, base, offset);
5149 /* Find a spare register to use during the prolog of a function. */
5152 thumb_find_work_register (unsigned long pushed_regs_mask)
5156 /* Check the argument registers first as these are call-used. The
5157 register allocation order means that sometimes r3 might be used
5158 but earlier argument registers might not, so check them all. */
5159 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5160 if (!df_regs_ever_live_p (reg))
5163 /* Before going on to check the call-saved registers we can try a couple
5164 more ways of deducing that r3 is available. The first is when we are
5165 pushing anonymous arguments onto the stack and we have less than 4
5166 registers worth of fixed arguments(*). In this case r3 will be part of
5167 the variable argument list and so we can be sure that it will be
5168 pushed right at the start of the function. Hence it will be available
5169 for the rest of the prologue.
5170 (*): ie crtl->args.pretend_args_size is greater than 0. */
5171 if (cfun->machine->uses_anonymous_args
5172 && crtl->args.pretend_args_size > 0)
5173 return LAST_ARG_REGNUM;
5175 /* The other case is when we have fixed arguments but less than 4 registers
5176 worth. In this case r3 might be used in the body of the function, but
5177 it is not being used to convey an argument into the function. In theory
5178 we could just check crtl->args.size to see how many bytes are
5179 being passed in argument registers, but it seems that it is unreliable.
5180 Sometimes it will have the value 0 when in fact arguments are being
5181 passed. (See testcase execute/20021111-1.c for an example). So we also
5182 check the args_info.nregs field as well. The problem with this field is
5183 that it makes no allowances for arguments that are passed to the
5184 function but which are not used. Hence we could miss an opportunity
5185 when a function has an unused argument in r3. But it is better to be
5186 safe than to be sorry. */
5187 if (! cfun->machine->uses_anonymous_args
5188 && crtl->args.size >= 0
5189 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5190 && crtl->args.info.nregs < 4)
5191 return LAST_ARG_REGNUM;
5193 /* Otherwise look for a call-saved register that is going to be pushed. */
5194 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5195 if (pushed_regs_mask & (1 << reg))
5200 /* Thumb-2 can use high regs. */
5201 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5202 if (pushed_regs_mask & (1 << reg))
5205 /* Something went wrong - thumb_compute_save_reg_mask()
5206 should have arranged for a suitable register to be pushed. */
5210 static GTY(()) int pic_labelno;
5212 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5216 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5218 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5220 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5223 gcc_assert (flag_pic);
5225 pic_reg = cfun->machine->pic_reg;
5226 if (TARGET_VXWORKS_RTP)
5228 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5229 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5230 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5232 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5234 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5235 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5239 /* We use an UNSPEC rather than a LABEL_REF because this label
5240 never appears in the code stream. */
5242 labelno = GEN_INT (pic_labelno++);
5243 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5244 l1 = gen_rtx_CONST (VOIDmode, l1);
5246 /* On the ARM the PC register contains 'dot + 8' at the time of the
5247 addition, on the Thumb it is 'dot + 4'. */
5248 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5249 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5251 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5255 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5257 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5259 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5261 else /* TARGET_THUMB1 */
5263 if (arm_pic_register != INVALID_REGNUM
5264 && REGNO (pic_reg) > LAST_LO_REGNUM)
5266 /* We will have pushed the pic register, so we should always be
5267 able to find a work register. */
5268 pic_tmp = gen_rtx_REG (SImode,
5269 thumb_find_work_register (saved_regs));
5270 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5271 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5274 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5275 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5279 /* Need to emit this whether or not we obey regdecls,
5280 since setjmp/longjmp can cause life info to screw up. */
5284 /* Generate code to load the address of a static var when flag_pic is set. */
5286 arm_pic_static_addr (rtx orig, rtx reg)
5288 rtx l1, labelno, offset_rtx, insn;
5290 gcc_assert (flag_pic);
5292 /* We use an UNSPEC rather than a LABEL_REF because this label
5293 never appears in the code stream. */
5294 labelno = GEN_INT (pic_labelno++);
5295 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5296 l1 = gen_rtx_CONST (VOIDmode, l1);
5298 /* On the ARM the PC register contains 'dot + 8' at the time of the
5299 addition, on the Thumb it is 'dot + 4'. */
5300 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5301 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5302 UNSPEC_SYMBOL_OFFSET);
5303 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5307 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5309 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5311 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5313 else /* TARGET_THUMB1 */
5315 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5316 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5322 /* Return nonzero if X is valid as an ARM state addressing register. */
5324 arm_address_register_rtx_p (rtx x, int strict_p)
5328 if (GET_CODE (x) != REG)
5334 return ARM_REGNO_OK_FOR_BASE_P (regno);
5336 return (regno <= LAST_ARM_REGNUM
5337 || regno >= FIRST_PSEUDO_REGISTER
5338 || regno == FRAME_POINTER_REGNUM
5339 || regno == ARG_POINTER_REGNUM);
5342 /* Return TRUE if this rtx is the difference of a symbol and a label,
5343 and will reduce to a PC-relative relocation in the object file.
5344 Expressions like this can be left alone when generating PIC, rather
5345 than forced through the GOT. */
5347 pcrel_constant_p (rtx x)
5349 if (GET_CODE (x) == MINUS)
5350 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5355 /* Return true if X will surely end up in an index register after next
5358 will_be_in_index_register (const_rtx x)
5360 /* arm.md: calculate_pic_address will split this into a register. */
5361 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5364 /* Return nonzero if X is a valid ARM state address operand. */
5366 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5370 enum rtx_code code = GET_CODE (x);
5372 if (arm_address_register_rtx_p (x, strict_p))
5375 use_ldrd = (TARGET_LDRD
5377 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5379 if (code == POST_INC || code == PRE_DEC
5380 || ((code == PRE_INC || code == POST_DEC)
5381 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5382 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5384 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5385 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5386 && GET_CODE (XEXP (x, 1)) == PLUS
5387 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5389 rtx addend = XEXP (XEXP (x, 1), 1);
5391 /* Don't allow ldrd post increment by register because it's hard
5392 to fixup invalid register choices. */
5394 && GET_CODE (x) == POST_MODIFY
5395 && GET_CODE (addend) == REG)
5398 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5399 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5402 /* After reload constants split into minipools will have addresses
5403 from a LABEL_REF. */
5404 else if (reload_completed
5405 && (code == LABEL_REF
5407 && GET_CODE (XEXP (x, 0)) == PLUS
5408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5409 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5412 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5415 else if (code == PLUS)
5417 rtx xop0 = XEXP (x, 0);
5418 rtx xop1 = XEXP (x, 1);
5420 return ((arm_address_register_rtx_p (xop0, strict_p)
5421 && ((GET_CODE(xop1) == CONST_INT
5422 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5423 || (!strict_p && will_be_in_index_register (xop1))))
5424 || (arm_address_register_rtx_p (xop1, strict_p)
5425 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5429 /* Reload currently can't handle MINUS, so disable this for now */
5430 else if (GET_CODE (x) == MINUS)
5432 rtx xop0 = XEXP (x, 0);
5433 rtx xop1 = XEXP (x, 1);
5435 return (arm_address_register_rtx_p (xop0, strict_p)
5436 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5440 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5441 && code == SYMBOL_REF
5442 && CONSTANT_POOL_ADDRESS_P (x)
5444 && symbol_mentioned_p (get_pool_constant (x))
5445 && ! pcrel_constant_p (get_pool_constant (x))))
5451 /* Return nonzero if X is a valid Thumb-2 address operand. */
5453 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5456 enum rtx_code code = GET_CODE (x);
5458 if (arm_address_register_rtx_p (x, strict_p))
5461 use_ldrd = (TARGET_LDRD
5463 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5465 if (code == POST_INC || code == PRE_DEC
5466 || ((code == PRE_INC || code == POST_DEC)
5467 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5468 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5470 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5471 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5472 && GET_CODE (XEXP (x, 1)) == PLUS
5473 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5475 /* Thumb-2 only has autoincrement by constant. */
5476 rtx addend = XEXP (XEXP (x, 1), 1);
5477 HOST_WIDE_INT offset;
5479 if (GET_CODE (addend) != CONST_INT)
5482 offset = INTVAL(addend);
5483 if (GET_MODE_SIZE (mode) <= 4)
5484 return (offset > -256 && offset < 256);
5486 return (use_ldrd && offset > -1024 && offset < 1024
5487 && (offset & 3) == 0);
5490 /* After reload constants split into minipools will have addresses
5491 from a LABEL_REF. */
5492 else if (reload_completed
5493 && (code == LABEL_REF
5495 && GET_CODE (XEXP (x, 0)) == PLUS
5496 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5497 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5500 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5503 else if (code == PLUS)
5505 rtx xop0 = XEXP (x, 0);
5506 rtx xop1 = XEXP (x, 1);
5508 return ((arm_address_register_rtx_p (xop0, strict_p)
5509 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5510 || (!strict_p && will_be_in_index_register (xop1))))
5511 || (arm_address_register_rtx_p (xop1, strict_p)
5512 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5515 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5516 && code == SYMBOL_REF
5517 && CONSTANT_POOL_ADDRESS_P (x)
5519 && symbol_mentioned_p (get_pool_constant (x))
5520 && ! pcrel_constant_p (get_pool_constant (x))))
5526 /* Return nonzero if INDEX is valid for an address index operand in
5529 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5532 HOST_WIDE_INT range;
5533 enum rtx_code code = GET_CODE (index);
5535 /* Standard coprocessor addressing modes. */
5536 if (TARGET_HARD_FLOAT
5537 && (TARGET_FPA || TARGET_MAVERICK)
5538 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5539 || (TARGET_MAVERICK && mode == DImode)))
5540 return (code == CONST_INT && INTVAL (index) < 1024
5541 && INTVAL (index) > -1024
5542 && (INTVAL (index) & 3) == 0);
5545 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5546 return (code == CONST_INT
5547 && INTVAL (index) < 1016
5548 && INTVAL (index) > -1024
5549 && (INTVAL (index) & 3) == 0);
5551 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5552 return (code == CONST_INT
5553 && INTVAL (index) < 1024
5554 && INTVAL (index) > -1024
5555 && (INTVAL (index) & 3) == 0);
5557 if (arm_address_register_rtx_p (index, strict_p)
5558 && (GET_MODE_SIZE (mode) <= 4))
5561 if (mode == DImode || mode == DFmode)
5563 if (code == CONST_INT)
5565 HOST_WIDE_INT val = INTVAL (index);
5568 return val > -256 && val < 256;
5570 return val > -4096 && val < 4092;
5573 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5576 if (GET_MODE_SIZE (mode) <= 4
5580 || (mode == QImode && outer == SIGN_EXTEND))))
5584 rtx xiop0 = XEXP (index, 0);
5585 rtx xiop1 = XEXP (index, 1);
5587 return ((arm_address_register_rtx_p (xiop0, strict_p)
5588 && power_of_two_operand (xiop1, SImode))
5589 || (arm_address_register_rtx_p (xiop1, strict_p)
5590 && power_of_two_operand (xiop0, SImode)));
5592 else if (code == LSHIFTRT || code == ASHIFTRT
5593 || code == ASHIFT || code == ROTATERT)
5595 rtx op = XEXP (index, 1);
5597 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5598 && GET_CODE (op) == CONST_INT
5600 && INTVAL (op) <= 31);
5604 /* For ARM v4 we may be doing a sign-extend operation during the
5610 || (outer == SIGN_EXTEND && mode == QImode))
5616 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5618 return (code == CONST_INT
5619 && INTVAL (index) < range
5620 && INTVAL (index) > -range);
5623 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5624 index operand. i.e. 1, 2, 4 or 8. */
5626 thumb2_index_mul_operand (rtx op)
5630 if (GET_CODE(op) != CONST_INT)
5634 return (val == 1 || val == 2 || val == 4 || val == 8);
5637 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5639 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5641 enum rtx_code code = GET_CODE (index);
5643 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5644 /* Standard coprocessor addressing modes. */
5645 if (TARGET_HARD_FLOAT
5646 && (TARGET_FPA || TARGET_MAVERICK)
5647 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5648 || (TARGET_MAVERICK && mode == DImode)))
5649 return (code == CONST_INT && INTVAL (index) < 1024
5650 && INTVAL (index) > -1024
5651 && (INTVAL (index) & 3) == 0);
5653 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5655 /* For DImode assume values will usually live in core regs
5656 and only allow LDRD addressing modes. */
5657 if (!TARGET_LDRD || mode != DImode)
5658 return (code == CONST_INT
5659 && INTVAL (index) < 1024
5660 && INTVAL (index) > -1024
5661 && (INTVAL (index) & 3) == 0);
5665 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5666 return (code == CONST_INT
5667 && INTVAL (index) < 1016
5668 && INTVAL (index) > -1024
5669 && (INTVAL (index) & 3) == 0);
5671 if (arm_address_register_rtx_p (index, strict_p)
5672 && (GET_MODE_SIZE (mode) <= 4))
5675 if (mode == DImode || mode == DFmode)
5677 if (code == CONST_INT)
5679 HOST_WIDE_INT val = INTVAL (index);
5680 /* ??? Can we assume ldrd for thumb2? */
5681 /* Thumb-2 ldrd only has reg+const addressing modes. */
5682 /* ldrd supports offsets of +-1020.
5683 However the ldr fallback does not. */
5684 return val > -256 && val < 256 && (val & 3) == 0;
5692 rtx xiop0 = XEXP (index, 0);
5693 rtx xiop1 = XEXP (index, 1);
5695 return ((arm_address_register_rtx_p (xiop0, strict_p)
5696 && thumb2_index_mul_operand (xiop1))
5697 || (arm_address_register_rtx_p (xiop1, strict_p)
5698 && thumb2_index_mul_operand (xiop0)));
5700 else if (code == ASHIFT)
5702 rtx op = XEXP (index, 1);
5704 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5705 && GET_CODE (op) == CONST_INT
5707 && INTVAL (op) <= 3);
5710 return (code == CONST_INT
5711 && INTVAL (index) < 4096
5712 && INTVAL (index) > -256);
5715 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5717 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5721 if (GET_CODE (x) != REG)
5727 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5729 return (regno <= LAST_LO_REGNUM
5730 || regno > LAST_VIRTUAL_REGISTER
5731 || regno == FRAME_POINTER_REGNUM
5732 || (GET_MODE_SIZE (mode) >= 4
5733 && (regno == STACK_POINTER_REGNUM
5734 || regno >= FIRST_PSEUDO_REGISTER
5735 || x == hard_frame_pointer_rtx
5736 || x == arg_pointer_rtx)));
5739 /* Return nonzero if x is a legitimate index register. This is the case
5740 for any base register that can access a QImode object. */
5742 thumb1_index_register_rtx_p (rtx x, int strict_p)
5744 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5747 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5749 The AP may be eliminated to either the SP or the FP, so we use the
5750 least common denominator, e.g. SImode, and offsets from 0 to 64.
5752 ??? Verify whether the above is the right approach.
5754 ??? Also, the FP may be eliminated to the SP, so perhaps that
5755 needs special handling also.
5757 ??? Look at how the mips16 port solves this problem. It probably uses
5758 better ways to solve some of these problems.
5760 Although it is not incorrect, we don't accept QImode and HImode
5761 addresses based on the frame pointer or arg pointer until the
5762 reload pass starts. This is so that eliminating such addresses
5763 into stack based ones won't produce impossible code. */
5765 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5767 /* ??? Not clear if this is right. Experiment. */
5768 if (GET_MODE_SIZE (mode) < 4
5769 && !(reload_in_progress || reload_completed)
5770 && (reg_mentioned_p (frame_pointer_rtx, x)
5771 || reg_mentioned_p (arg_pointer_rtx, x)
5772 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5773 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5774 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5775 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5778 /* Accept any base register. SP only in SImode or larger. */
5779 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5782 /* This is PC relative data before arm_reorg runs. */
5783 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5784 && GET_CODE (x) == SYMBOL_REF
5785 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5788 /* This is PC relative data after arm_reorg runs. */
5789 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5791 && (GET_CODE (x) == LABEL_REF
5792 || (GET_CODE (x) == CONST
5793 && GET_CODE (XEXP (x, 0)) == PLUS
5794 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5795 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5798 /* Post-inc indexing only supported for SImode and larger. */
5799 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5800 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5803 else if (GET_CODE (x) == PLUS)
5805 /* REG+REG address can be any two index registers. */
5806 /* We disallow FRAME+REG addressing since we know that FRAME
5807 will be replaced with STACK, and SP relative addressing only
5808 permits SP+OFFSET. */
5809 if (GET_MODE_SIZE (mode) <= 4
5810 && XEXP (x, 0) != frame_pointer_rtx
5811 && XEXP (x, 1) != frame_pointer_rtx
5812 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5813 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5814 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5817 /* REG+const has 5-7 bit offset for non-SP registers. */
5818 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5819 || XEXP (x, 0) == arg_pointer_rtx)
5820 && GET_CODE (XEXP (x, 1)) == CONST_INT
5821 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5824 /* REG+const has 10-bit offset for SP, but only SImode and
5825 larger is supported. */
5826 /* ??? Should probably check for DI/DFmode overflow here
5827 just like GO_IF_LEGITIMATE_OFFSET does. */
5828 else if (GET_CODE (XEXP (x, 0)) == REG
5829 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5830 && GET_MODE_SIZE (mode) >= 4
5831 && GET_CODE (XEXP (x, 1)) == CONST_INT
5832 && INTVAL (XEXP (x, 1)) >= 0
5833 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5834 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5837 else if (GET_CODE (XEXP (x, 0)) == REG
5838 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5839 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5840 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5841 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5842 && GET_MODE_SIZE (mode) >= 4
5843 && GET_CODE (XEXP (x, 1)) == CONST_INT
5844 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5848 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5849 && GET_MODE_SIZE (mode) == 4
5850 && GET_CODE (x) == SYMBOL_REF
5851 && CONSTANT_POOL_ADDRESS_P (x)
5853 && symbol_mentioned_p (get_pool_constant (x))
5854 && ! pcrel_constant_p (get_pool_constant (x))))
5860 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5861 instruction of mode MODE. */
5863 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5865 switch (GET_MODE_SIZE (mode))
5868 return val >= 0 && val < 32;
5871 return val >= 0 && val < 64 && (val & 1) == 0;
5875 && (val + GET_MODE_SIZE (mode)) <= 128
5881 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5884 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5885 else if (TARGET_THUMB2)
5886 return thumb2_legitimate_address_p (mode, x, strict_p);
5887 else /* if (TARGET_THUMB1) */
5888 return thumb1_legitimate_address_p (mode, x, strict_p);
5891 /* Build the SYMBOL_REF for __tls_get_addr. */
5893 static GTY(()) rtx tls_get_addr_libfunc;
5896 get_tls_get_addr (void)
5898 if (!tls_get_addr_libfunc)
5899 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5900 return tls_get_addr_libfunc;
5904 arm_load_tp (rtx target)
5907 target = gen_reg_rtx (SImode);
5911 /* Can return in any reg. */
5912 emit_insn (gen_load_tp_hard (target));
5916 /* Always returned in r0. Immediately copy the result into a pseudo,
5917 otherwise other uses of r0 (e.g. setting up function arguments) may
5918 clobber the value. */
5922 emit_insn (gen_load_tp_soft ());
5924 tmp = gen_rtx_REG (SImode, 0);
5925 emit_move_insn (target, tmp);
5931 load_tls_operand (rtx x, rtx reg)
5935 if (reg == NULL_RTX)
5936 reg = gen_reg_rtx (SImode);
5938 tmp = gen_rtx_CONST (SImode, x);
5940 emit_move_insn (reg, tmp);
5946 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5948 rtx insns, label, labelno, sum;
5952 labelno = GEN_INT (pic_labelno++);
5953 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5954 label = gen_rtx_CONST (VOIDmode, label);
5956 sum = gen_rtx_UNSPEC (Pmode,
5957 gen_rtvec (4, x, GEN_INT (reloc), label,
5958 GEN_INT (TARGET_ARM ? 8 : 4)),
5960 reg = load_tls_operand (sum, reg);
5963 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5964 else if (TARGET_THUMB2)
5965 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5966 else /* TARGET_THUMB1 */
5967 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5969 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5970 Pmode, 1, reg, Pmode);
5972 insns = get_insns ();
5979 legitimize_tls_address (rtx x, rtx reg)
5981 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5982 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5986 case TLS_MODEL_GLOBAL_DYNAMIC:
5987 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5988 dest = gen_reg_rtx (Pmode);
5989 emit_libcall_block (insns, dest, ret, x);
5992 case TLS_MODEL_LOCAL_DYNAMIC:
5993 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5995 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5996 share the LDM result with other LD model accesses. */
5997 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5999 dest = gen_reg_rtx (Pmode);
6000 emit_libcall_block (insns, dest, ret, eqv);
6002 /* Load the addend. */
6003 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6005 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6006 return gen_rtx_PLUS (Pmode, dest, addend);
6008 case TLS_MODEL_INITIAL_EXEC:
6009 labelno = GEN_INT (pic_labelno++);
6010 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6011 label = gen_rtx_CONST (VOIDmode, label);
6012 sum = gen_rtx_UNSPEC (Pmode,
6013 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6014 GEN_INT (TARGET_ARM ? 8 : 4)),
6016 reg = load_tls_operand (sum, reg);
6019 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6020 else if (TARGET_THUMB2)
6021 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6024 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6025 emit_move_insn (reg, gen_const_mem (SImode, reg));
6028 tp = arm_load_tp (NULL_RTX);
6030 return gen_rtx_PLUS (Pmode, tp, reg);
6032 case TLS_MODEL_LOCAL_EXEC:
6033 tp = arm_load_tp (NULL_RTX);
6035 reg = gen_rtx_UNSPEC (Pmode,
6036 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6038 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6040 return gen_rtx_PLUS (Pmode, tp, reg);
6047 /* Try machine-dependent ways of modifying an illegitimate address
6048 to be legitimate. If we find one, return the new, valid address. */
6050 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6054 /* TODO: legitimize_address for Thumb2. */
6057 return thumb_legitimize_address (x, orig_x, mode);
6060 if (arm_tls_symbol_p (x))
6061 return legitimize_tls_address (x, NULL_RTX);
6063 if (GET_CODE (x) == PLUS)
6065 rtx xop0 = XEXP (x, 0);
6066 rtx xop1 = XEXP (x, 1);
6068 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6069 xop0 = force_reg (SImode, xop0);
6071 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6072 xop1 = force_reg (SImode, xop1);
6074 if (ARM_BASE_REGISTER_RTX_P (xop0)
6075 && GET_CODE (xop1) == CONST_INT)
6077 HOST_WIDE_INT n, low_n;
6081 /* VFP addressing modes actually allow greater offsets, but for
6082 now we just stick with the lowest common denominator. */
6084 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6096 low_n = ((mode) == TImode ? 0
6097 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6101 base_reg = gen_reg_rtx (SImode);
6102 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6103 emit_move_insn (base_reg, val);
6104 x = plus_constant (base_reg, low_n);
6106 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6107 x = gen_rtx_PLUS (SImode, xop0, xop1);
6110 /* XXX We don't allow MINUS any more -- see comment in
6111 arm_legitimate_address_outer_p (). */
6112 else if (GET_CODE (x) == MINUS)
6114 rtx xop0 = XEXP (x, 0);
6115 rtx xop1 = XEXP (x, 1);
6117 if (CONSTANT_P (xop0))
6118 xop0 = force_reg (SImode, xop0);
6120 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6121 xop1 = force_reg (SImode, xop1);
6123 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6124 x = gen_rtx_MINUS (SImode, xop0, xop1);
6127 /* Make sure to take full advantage of the pre-indexed addressing mode
6128 with absolute addresses which often allows for the base register to
6129 be factorized for multiple adjacent memory references, and it might
6130 even allows for the mini pool to be avoided entirely. */
6131 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6134 HOST_WIDE_INT mask, base, index;
6137 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6138 use a 8-bit index. So let's use a 12-bit index for SImode only and
6139 hope that arm_gen_constant will enable ldrb to use more bits. */
6140 bits = (mode == SImode) ? 12 : 8;
6141 mask = (1 << bits) - 1;
6142 base = INTVAL (x) & ~mask;
6143 index = INTVAL (x) & mask;
6144 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6146 /* It'll most probably be more efficient to generate the base
6147 with more bits set and use a negative index instead. */
6151 base_reg = force_reg (SImode, GEN_INT (base));
6152 x = plus_constant (base_reg, index);
6157 /* We need to find and carefully transform any SYMBOL and LABEL
6158 references; so go back to the original address expression. */
6159 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6161 if (new_x != orig_x)
6169 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6170 to be legitimate. If we find one, return the new, valid address. */
6172 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6174 if (arm_tls_symbol_p (x))
6175 return legitimize_tls_address (x, NULL_RTX);
6177 if (GET_CODE (x) == PLUS
6178 && GET_CODE (XEXP (x, 1)) == CONST_INT
6179 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6180 || INTVAL (XEXP (x, 1)) < 0))
6182 rtx xop0 = XEXP (x, 0);
6183 rtx xop1 = XEXP (x, 1);
6184 HOST_WIDE_INT offset = INTVAL (xop1);
6186 /* Try and fold the offset into a biasing of the base register and
6187 then offsetting that. Don't do this when optimizing for space
6188 since it can cause too many CSEs. */
6189 if (optimize_size && offset >= 0
6190 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6192 HOST_WIDE_INT delta;
6195 delta = offset - (256 - GET_MODE_SIZE (mode));
6196 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6197 delta = 31 * GET_MODE_SIZE (mode);
6199 delta = offset & (~31 * GET_MODE_SIZE (mode));
6201 xop0 = force_operand (plus_constant (xop0, offset - delta),
6203 x = plus_constant (xop0, delta);
6205 else if (offset < 0 && offset > -256)
6206 /* Small negative offsets are best done with a subtract before the
6207 dereference, forcing these into a register normally takes two
6209 x = force_operand (x, NULL_RTX);
6212 /* For the remaining cases, force the constant into a register. */
6213 xop1 = force_reg (SImode, xop1);
6214 x = gen_rtx_PLUS (SImode, xop0, xop1);
6217 else if (GET_CODE (x) == PLUS
6218 && s_register_operand (XEXP (x, 1), SImode)
6219 && !s_register_operand (XEXP (x, 0), SImode))
6221 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6223 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6228 /* We need to find and carefully transform any SYMBOL and LABEL
6229 references; so go back to the original address expression. */
6230 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6232 if (new_x != orig_x)
6240 thumb_legitimize_reload_address (rtx *x_p,
6241 enum machine_mode mode,
6242 int opnum, int type,
6243 int ind_levels ATTRIBUTE_UNUSED)
6247 if (GET_CODE (x) == PLUS
6248 && GET_MODE_SIZE (mode) < 4
6249 && REG_P (XEXP (x, 0))
6250 && XEXP (x, 0) == stack_pointer_rtx
6251 && GET_CODE (XEXP (x, 1)) == CONST_INT
6252 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6257 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6258 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6262 /* If both registers are hi-regs, then it's better to reload the
6263 entire expression rather than each register individually. That
6264 only requires one reload register rather than two. */
6265 if (GET_CODE (x) == PLUS
6266 && REG_P (XEXP (x, 0))
6267 && REG_P (XEXP (x, 1))
6268 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6269 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6274 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6275 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6282 /* Test for various thread-local symbols. */
6284 /* Return TRUE if X is a thread-local symbol. */
6287 arm_tls_symbol_p (rtx x)
6289 if (! TARGET_HAVE_TLS)
6292 if (GET_CODE (x) != SYMBOL_REF)
6295 return SYMBOL_REF_TLS_MODEL (x) != 0;
6298 /* Helper for arm_tls_referenced_p. */
6301 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6303 if (GET_CODE (*x) == SYMBOL_REF)
6304 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6306 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6307 TLS offsets, not real symbol references. */
6308 if (GET_CODE (*x) == UNSPEC
6309 && XINT (*x, 1) == UNSPEC_TLS)
6315 /* Return TRUE if X contains any TLS symbol references. */
6318 arm_tls_referenced_p (rtx x)
6320 if (! TARGET_HAVE_TLS)
6323 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6326 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6329 arm_cannot_force_const_mem (rtx x)
6333 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6335 split_const (x, &base, &offset);
6336 if (GET_CODE (base) == SYMBOL_REF
6337 && !offset_within_block_p (base, INTVAL (offset)))
6340 return arm_tls_referenced_p (x);
6343 #define REG_OR_SUBREG_REG(X) \
6344 (GET_CODE (X) == REG \
6345 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6347 #define REG_OR_SUBREG_RTX(X) \
6348 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6351 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6353 enum machine_mode mode = GET_MODE (x);
6367 return COSTS_N_INSNS (1);
6370 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6373 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6380 return COSTS_N_INSNS (2) + cycles;
6382 return COSTS_N_INSNS (1) + 16;
6385 return (COSTS_N_INSNS (1)
6386 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6387 + GET_CODE (SET_DEST (x)) == MEM));
6392 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6394 if (thumb_shiftable_const (INTVAL (x)))
6395 return COSTS_N_INSNS (2);
6396 return COSTS_N_INSNS (3);
6398 else if ((outer == PLUS || outer == COMPARE)
6399 && INTVAL (x) < 256 && INTVAL (x) > -256)
6401 else if ((outer == IOR || outer == XOR || outer == AND)
6402 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6403 return COSTS_N_INSNS (1);
6404 else if (outer == AND)
6407 /* This duplicates the tests in the andsi3 expander. */
6408 for (i = 9; i <= 31; i++)
6409 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6410 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6411 return COSTS_N_INSNS (2);
6413 else if (outer == ASHIFT || outer == ASHIFTRT
6414 || outer == LSHIFTRT)
6416 return COSTS_N_INSNS (2);
6422 return COSTS_N_INSNS (3);
6440 /* XXX another guess. */
6441 /* Memory costs quite a lot for the first word, but subsequent words
6442 load at the equivalent of a single insn each. */
6443 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6444 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6449 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6455 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6456 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6462 return total + COSTS_N_INSNS (1);
6464 /* Assume a two-shift sequence. Increase the cost slightly so
6465 we prefer actual shifts over an extend operation. */
6466 return total + 1 + COSTS_N_INSNS (2);
6474 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6476 enum machine_mode mode = GET_MODE (x);
6477 enum rtx_code subcode;
6479 enum rtx_code code = GET_CODE (x);
6485 /* Memory costs quite a lot for the first word, but subsequent words
6486 load at the equivalent of a single insn each. */
6487 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6494 if (TARGET_HARD_FLOAT && mode == SFmode)
6495 *total = COSTS_N_INSNS (2);
6496 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6497 *total = COSTS_N_INSNS (4);
6499 *total = COSTS_N_INSNS (20);
6503 if (GET_CODE (XEXP (x, 1)) == REG)
6504 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6505 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6506 *total = rtx_cost (XEXP (x, 1), code, speed);
6512 *total += COSTS_N_INSNS (4);
6517 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6518 *total += rtx_cost (XEXP (x, 0), code, speed);
6521 *total += COSTS_N_INSNS (3);
6525 *total += COSTS_N_INSNS (1);
6526 /* Increase the cost of complex shifts because they aren't any faster,
6527 and reduce dual issue opportunities. */
6528 if (arm_tune_cortex_a9
6529 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6537 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6538 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6539 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6541 *total += rtx_cost (XEXP (x, 1), code, speed);
6545 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6546 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6548 *total += rtx_cost (XEXP (x, 0), code, speed);
6555 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6557 if (TARGET_HARD_FLOAT
6559 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6561 *total = COSTS_N_INSNS (1);
6562 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6563 && arm_const_double_rtx (XEXP (x, 0)))
6565 *total += rtx_cost (XEXP (x, 1), code, speed);
6569 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6570 && arm_const_double_rtx (XEXP (x, 1)))
6572 *total += rtx_cost (XEXP (x, 0), code, speed);
6578 *total = COSTS_N_INSNS (20);
6582 *total = COSTS_N_INSNS (1);
6583 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6584 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6586 *total += rtx_cost (XEXP (x, 1), code, speed);
6590 subcode = GET_CODE (XEXP (x, 1));
6591 if (subcode == ASHIFT || subcode == ASHIFTRT
6592 || subcode == LSHIFTRT
6593 || subcode == ROTATE || subcode == ROTATERT)
6595 *total += rtx_cost (XEXP (x, 0), code, speed);
6596 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6600 /* A shift as a part of RSB costs no more than RSB itself. */
6601 if (GET_CODE (XEXP (x, 0)) == MULT
6602 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6604 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6605 *total += rtx_cost (XEXP (x, 1), code, speed);
6610 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6612 *total += rtx_cost (XEXP (x, 0), code, speed);
6613 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6617 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6618 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6620 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6621 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6622 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6623 *total += COSTS_N_INSNS (1);
6631 if (code == PLUS && arm_arch6 && mode == SImode
6632 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6633 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6635 *total = COSTS_N_INSNS (1);
6636 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6638 *total += rtx_cost (XEXP (x, 1), code, speed);
6642 /* MLA: All arguments must be registers. We filter out
6643 multiplication by a power of two, so that we fall down into
6645 if (GET_CODE (XEXP (x, 0)) == MULT
6646 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6648 /* The cost comes from the cost of the multiply. */
6652 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6654 if (TARGET_HARD_FLOAT
6656 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6658 *total = COSTS_N_INSNS (1);
6659 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6660 && arm_const_double_rtx (XEXP (x, 1)))
6662 *total += rtx_cost (XEXP (x, 0), code, speed);
6669 *total = COSTS_N_INSNS (20);
6673 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6674 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6676 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6677 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6678 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6679 *total += COSTS_N_INSNS (1);
6685 case AND: case XOR: case IOR:
6687 /* Normally the frame registers will be spilt into reg+const during
6688 reload, so it is a bad idea to combine them with other instructions,
6689 since then they might not be moved outside of loops. As a compromise
6690 we allow integration with ops that have a constant as their second
6692 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6693 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6694 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6695 *total = COSTS_N_INSNS (1);
6699 *total += COSTS_N_INSNS (2);
6700 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6701 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6703 *total += rtx_cost (XEXP (x, 0), code, speed);
6710 *total += COSTS_N_INSNS (1);
6711 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6712 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6714 *total += rtx_cost (XEXP (x, 0), code, speed);
6717 subcode = GET_CODE (XEXP (x, 0));
6718 if (subcode == ASHIFT || subcode == ASHIFTRT
6719 || subcode == LSHIFTRT
6720 || subcode == ROTATE || subcode == ROTATERT)
6722 *total += rtx_cost (XEXP (x, 1), code, speed);
6723 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6728 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6730 *total += rtx_cost (XEXP (x, 1), code, speed);
6731 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6735 if (subcode == UMIN || subcode == UMAX
6736 || subcode == SMIN || subcode == SMAX)
6738 *total = COSTS_N_INSNS (3);
6745 /* This should have been handled by the CPU specific routines. */
6749 if (arm_arch3m && mode == SImode
6750 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6752 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6753 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6754 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6755 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6757 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6760 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6764 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6766 if (TARGET_HARD_FLOAT
6768 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6770 *total = COSTS_N_INSNS (1);
6773 *total = COSTS_N_INSNS (2);
6779 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6780 if (mode == SImode && code == NOT)
6782 subcode = GET_CODE (XEXP (x, 0));
6783 if (subcode == ASHIFT || subcode == ASHIFTRT
6784 || subcode == LSHIFTRT
6785 || subcode == ROTATE || subcode == ROTATERT
6787 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6789 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6790 /* Register shifts cost an extra cycle. */
6791 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6792 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6801 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6803 *total = COSTS_N_INSNS (4);
6807 operand = XEXP (x, 0);
6809 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6810 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6811 && GET_CODE (XEXP (operand, 0)) == REG
6812 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6813 *total += COSTS_N_INSNS (1);
6814 *total += (rtx_cost (XEXP (x, 1), code, speed)
6815 + rtx_cost (XEXP (x, 2), code, speed));
6819 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6821 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6827 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6828 && mode == SImode && XEXP (x, 1) == const0_rtx)
6830 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6836 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6837 && mode == SImode && XEXP (x, 1) == const0_rtx)
6839 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6859 /* SCC insns. In the case where the comparison has already been
6860 performed, then they cost 2 instructions. Otherwise they need
6861 an additional comparison before them. */
6862 *total = COSTS_N_INSNS (2);
6863 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6870 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6876 *total += COSTS_N_INSNS (1);
6877 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6878 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6880 *total += rtx_cost (XEXP (x, 0), code, speed);
6884 subcode = GET_CODE (XEXP (x, 0));
6885 if (subcode == ASHIFT || subcode == ASHIFTRT
6886 || subcode == LSHIFTRT
6887 || subcode == ROTATE || subcode == ROTATERT)
6889 *total += rtx_cost (XEXP (x, 1), code, speed);
6890 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6895 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6897 *total += rtx_cost (XEXP (x, 1), code, speed);
6898 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6908 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6909 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6910 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6911 *total += rtx_cost (XEXP (x, 1), code, speed);
6915 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6917 if (TARGET_HARD_FLOAT
6919 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6921 *total = COSTS_N_INSNS (1);
6924 *total = COSTS_N_INSNS (20);
6927 *total = COSTS_N_INSNS (1);
6929 *total += COSTS_N_INSNS (3);
6935 if (GET_MODE_CLASS (mode) == MODE_INT)
6937 rtx op = XEXP (x, 0);
6938 enum machine_mode opmode = GET_MODE (op);
6941 *total += COSTS_N_INSNS (1);
6943 if (opmode != SImode)
6947 /* If !arm_arch4, we use one of the extendhisi2_mem
6948 or movhi_bytes patterns for HImode. For a QImode
6949 sign extension, we first zero-extend from memory
6950 and then perform a shift sequence. */
6951 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6952 *total += COSTS_N_INSNS (2);
6955 *total += COSTS_N_INSNS (1);
6957 /* We don't have the necessary insn, so we need to perform some
6959 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6960 /* An and with constant 255. */
6961 *total += COSTS_N_INSNS (1);
6963 /* A shift sequence. Increase costs slightly to avoid
6964 combining two shifts into an extend operation. */
6965 *total += COSTS_N_INSNS (2) + 1;
6971 switch (GET_MODE (XEXP (x, 0)))
6978 *total = COSTS_N_INSNS (1);
6988 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6992 if (const_ok_for_arm (INTVAL (x))
6993 || const_ok_for_arm (~INTVAL (x)))
6994 *total = COSTS_N_INSNS (1);
6996 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6997 INTVAL (x), NULL_RTX,
7004 *total = COSTS_N_INSNS (3);
7008 *total = COSTS_N_INSNS (1);
7012 *total = COSTS_N_INSNS (1);
7013 *total += rtx_cost (XEXP (x, 0), code, speed);
7017 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7018 && (mode == SFmode || !TARGET_VFP_SINGLE))
7019 *total = COSTS_N_INSNS (1);
7021 *total = COSTS_N_INSNS (4);
7025 *total = COSTS_N_INSNS (4);
7030 /* Estimates the size cost of thumb1 instructions.
7031 For now most of the code is copied from thumb1_rtx_costs. We need more
7032 fine grain tuning when we have more related test cases. */
7034 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7036 enum machine_mode mode = GET_MODE (x);
7049 return COSTS_N_INSNS (1);
7052 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7054 /* Thumb1 mul instruction can't operate on const. We must Load it
7055 into a register first. */
7056 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7057 return COSTS_N_INSNS (1) + const_size;
7059 return COSTS_N_INSNS (1);
7062 return (COSTS_N_INSNS (1)
7063 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7064 + GET_CODE (SET_DEST (x)) == MEM));
7069 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7070 return COSTS_N_INSNS (1);
7071 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7072 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7073 return COSTS_N_INSNS (2);
7074 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7075 if (thumb_shiftable_const (INTVAL (x)))
7076 return COSTS_N_INSNS (2);
7077 return COSTS_N_INSNS (3);
7079 else if ((outer == PLUS || outer == COMPARE)
7080 && INTVAL (x) < 256 && INTVAL (x) > -256)
7082 else if ((outer == IOR || outer == XOR || outer == AND)
7083 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7084 return COSTS_N_INSNS (1);
7085 else if (outer == AND)
7088 /* This duplicates the tests in the andsi3 expander. */
7089 for (i = 9; i <= 31; i++)
7090 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7091 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7092 return COSTS_N_INSNS (2);
7094 else if (outer == ASHIFT || outer == ASHIFTRT
7095 || outer == LSHIFTRT)
7097 return COSTS_N_INSNS (2);
7103 return COSTS_N_INSNS (3);
7121 /* XXX another guess. */
7122 /* Memory costs quite a lot for the first word, but subsequent words
7123 load at the equivalent of a single insn each. */
7124 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7125 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7130 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7135 /* XXX still guessing. */
7136 switch (GET_MODE (XEXP (x, 0)))
7139 return (1 + (mode == DImode ? 4 : 0)
7140 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7143 return (4 + (mode == DImode ? 4 : 0)
7144 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7147 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7158 /* RTX costs when optimizing for size. */
7160 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7163 enum machine_mode mode = GET_MODE (x);
7166 *total = thumb1_size_rtx_costs (x, code, outer_code);
7170 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7174 /* A memory access costs 1 insn if the mode is small, or the address is
7175 a single register, otherwise it costs one insn per word. */
7176 if (REG_P (XEXP (x, 0)))
7177 *total = COSTS_N_INSNS (1);
7179 && GET_CODE (XEXP (x, 0)) == PLUS
7180 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7181 /* This will be split into two instructions.
7182 See arm.md:calculate_pic_address. */
7183 *total = COSTS_N_INSNS (2);
7185 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7192 /* Needs a libcall, so it costs about this. */
7193 *total = COSTS_N_INSNS (2);
7197 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7199 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7207 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7209 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7212 else if (mode == SImode)
7214 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7215 /* Slightly disparage register shifts, but not by much. */
7216 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7217 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7221 /* Needs a libcall. */
7222 *total = COSTS_N_INSNS (2);
7226 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7227 && (mode == SFmode || !TARGET_VFP_SINGLE))
7229 *total = COSTS_N_INSNS (1);
7235 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7236 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7238 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7239 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7240 || subcode1 == ROTATE || subcode1 == ROTATERT
7241 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7242 || subcode1 == ASHIFTRT)
7244 /* It's just the cost of the two operands. */
7249 *total = COSTS_N_INSNS (1);
7253 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7257 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7258 && (mode == SFmode || !TARGET_VFP_SINGLE))
7260 *total = COSTS_N_INSNS (1);
7264 /* A shift as a part of ADD costs nothing. */
7265 if (GET_CODE (XEXP (x, 0)) == MULT
7266 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7268 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7269 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7270 *total += rtx_cost (XEXP (x, 1), code, false);
7275 case AND: case XOR: case IOR:
7278 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7280 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7281 || subcode == LSHIFTRT || subcode == ASHIFTRT
7282 || (code == AND && subcode == NOT))
7284 /* It's just the cost of the two operands. */
7290 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7294 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7298 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7299 && (mode == SFmode || !TARGET_VFP_SINGLE))
7301 *total = COSTS_N_INSNS (1);
7307 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7316 if (cc_register (XEXP (x, 0), VOIDmode))
7319 *total = COSTS_N_INSNS (1);
7323 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7324 && (mode == SFmode || !TARGET_VFP_SINGLE))
7325 *total = COSTS_N_INSNS (1);
7327 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7332 return arm_rtx_costs_1 (x, outer_code, total, 0);
7335 if (const_ok_for_arm (INTVAL (x)))
7336 /* A multiplication by a constant requires another instruction
7337 to load the constant to a register. */
7338 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7340 else if (const_ok_for_arm (~INTVAL (x)))
7341 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7342 else if (const_ok_for_arm (-INTVAL (x)))
7344 if (outer_code == COMPARE || outer_code == PLUS
7345 || outer_code == MINUS)
7348 *total = COSTS_N_INSNS (1);
7351 *total = COSTS_N_INSNS (2);
7357 *total = COSTS_N_INSNS (2);
7361 *total = COSTS_N_INSNS (4);
7366 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7367 cost of these slightly. */
7368 *total = COSTS_N_INSNS (1) + 1;
7372 if (mode != VOIDmode)
7373 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7375 *total = COSTS_N_INSNS (4); /* How knows? */
7380 /* RTX costs when optimizing for size. */
7382 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7386 return arm_size_rtx_costs (x, (enum rtx_code) code,
7387 (enum rtx_code) outer_code, total);
7389 return current_tune->rtx_costs (x, (enum rtx_code) code,
7390 (enum rtx_code) outer_code,
7394 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7395 supported on any "slowmul" cores, so it can be ignored. */
7398 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7399 int *total, bool speed)
7401 enum machine_mode mode = GET_MODE (x);
7405 *total = thumb1_rtx_costs (x, code, outer_code);
7412 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7415 *total = COSTS_N_INSNS (20);
7419 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7421 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7422 & (unsigned HOST_WIDE_INT) 0xffffffff);
7423 int cost, const_ok = const_ok_for_arm (i);
7424 int j, booth_unit_size;
7426 /* Tune as appropriate. */
7427 cost = const_ok ? 4 : 8;
7428 booth_unit_size = 2;
7429 for (j = 0; i && j < 32; j += booth_unit_size)
7431 i >>= booth_unit_size;
7435 *total = COSTS_N_INSNS (cost);
7436 *total += rtx_cost (XEXP (x, 0), code, speed);
7440 *total = COSTS_N_INSNS (20);
7444 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7449 /* RTX cost for cores with a fast multiply unit (M variants). */
7452 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7453 int *total, bool speed)
7455 enum machine_mode mode = GET_MODE (x);
7459 *total = thumb1_rtx_costs (x, code, outer_code);
7463 /* ??? should thumb2 use different costs? */
7467 /* There is no point basing this on the tuning, since it is always the
7468 fast variant if it exists at all. */
7470 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7471 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7472 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7474 *total = COSTS_N_INSNS(2);
7481 *total = COSTS_N_INSNS (5);
7485 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7487 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7488 & (unsigned HOST_WIDE_INT) 0xffffffff);
7489 int cost, const_ok = const_ok_for_arm (i);
7490 int j, booth_unit_size;
7492 /* Tune as appropriate. */
7493 cost = const_ok ? 4 : 8;
7494 booth_unit_size = 8;
7495 for (j = 0; i && j < 32; j += booth_unit_size)
7497 i >>= booth_unit_size;
7501 *total = COSTS_N_INSNS(cost);
7507 *total = COSTS_N_INSNS (4);
7511 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7513 if (TARGET_HARD_FLOAT
7515 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7517 *total = COSTS_N_INSNS (1);
7522 /* Requires a lib call */
7523 *total = COSTS_N_INSNS (20);
7527 return arm_rtx_costs_1 (x, outer_code, total, speed);
7532 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7533 so it can be ignored. */
7536 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7537 int *total, bool speed)
7539 enum machine_mode mode = GET_MODE (x);
7543 *total = thumb1_rtx_costs (x, code, outer_code);
7550 if (GET_CODE (XEXP (x, 0)) != MULT)
7551 return arm_rtx_costs_1 (x, outer_code, total, speed);
7553 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7554 will stall until the multiplication is complete. */
7555 *total = COSTS_N_INSNS (3);
7559 /* There is no point basing this on the tuning, since it is always the
7560 fast variant if it exists at all. */
7562 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7563 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7564 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7566 *total = COSTS_N_INSNS (2);
7573 *total = COSTS_N_INSNS (5);
7577 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7579 /* If operand 1 is a constant we can more accurately
7580 calculate the cost of the multiply. The multiplier can
7581 retire 15 bits on the first cycle and a further 12 on the
7582 second. We do, of course, have to load the constant into
7583 a register first. */
7584 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7585 /* There's a general overhead of one cycle. */
7587 unsigned HOST_WIDE_INT masked_const;
7592 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7594 masked_const = i & 0xffff8000;
7595 if (masked_const != 0)
7598 masked_const = i & 0xf8000000;
7599 if (masked_const != 0)
7602 *total = COSTS_N_INSNS (cost);
7608 *total = COSTS_N_INSNS (3);
7612 /* Requires a lib call */
7613 *total = COSTS_N_INSNS (20);
7617 return arm_rtx_costs_1 (x, outer_code, total, speed);
7622 /* RTX costs for 9e (and later) cores. */
7625 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7626 int *total, bool speed)
7628 enum machine_mode mode = GET_MODE (x);
7635 *total = COSTS_N_INSNS (3);
7639 *total = thumb1_rtx_costs (x, code, outer_code);
7647 /* There is no point basing this on the tuning, since it is always the
7648 fast variant if it exists at all. */
7650 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7651 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7652 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7654 *total = COSTS_N_INSNS (2);
7661 *total = COSTS_N_INSNS (5);
7667 *total = COSTS_N_INSNS (2);
7671 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7673 if (TARGET_HARD_FLOAT
7675 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7677 *total = COSTS_N_INSNS (1);
7682 *total = COSTS_N_INSNS (20);
7686 return arm_rtx_costs_1 (x, outer_code, total, speed);
7689 /* All address computations that can be done are free, but rtx cost returns
7690 the same for practically all of them. So we weight the different types
7691 of address here in the order (most pref first):
7692 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7694 arm_arm_address_cost (rtx x)
7696 enum rtx_code c = GET_CODE (x);
7698 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7700 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7705 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7708 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7718 arm_thumb_address_cost (rtx x)
7720 enum rtx_code c = GET_CODE (x);
7725 && GET_CODE (XEXP (x, 0)) == REG
7726 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7733 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7735 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7738 /* Adjust cost hook for XScale. */
7740 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7742 /* Some true dependencies can have a higher cost depending
7743 on precisely how certain input operands are used. */
7744 if (REG_NOTE_KIND(link) == 0
7745 && recog_memoized (insn) >= 0
7746 && recog_memoized (dep) >= 0)
7748 int shift_opnum = get_attr_shift (insn);
7749 enum attr_type attr_type = get_attr_type (dep);
7751 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7752 operand for INSN. If we have a shifted input operand and the
7753 instruction we depend on is another ALU instruction, then we may
7754 have to account for an additional stall. */
7755 if (shift_opnum != 0
7756 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7758 rtx shifted_operand;
7761 /* Get the shifted operand. */
7762 extract_insn (insn);
7763 shifted_operand = recog_data.operand[shift_opnum];
7765 /* Iterate over all the operands in DEP. If we write an operand
7766 that overlaps with SHIFTED_OPERAND, then we have increase the
7767 cost of this dependency. */
7769 preprocess_constraints ();
7770 for (opno = 0; opno < recog_data.n_operands; opno++)
7772 /* We can ignore strict inputs. */
7773 if (recog_data.operand_type[opno] == OP_IN)
7776 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7788 /* Adjust cost hook for Cortex A9. */
7790 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7792 switch (REG_NOTE_KIND (link))
7799 case REG_DEP_OUTPUT:
7800 if (recog_memoized (insn) >= 0
7801 && recog_memoized (dep) >= 0)
7803 if (GET_CODE (PATTERN (insn)) == SET)
7806 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7808 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7810 enum attr_type attr_type_insn = get_attr_type (insn);
7811 enum attr_type attr_type_dep = get_attr_type (dep);
7813 /* By default all dependencies of the form
7816 have an extra latency of 1 cycle because
7817 of the input and output dependency in this
7818 case. However this gets modeled as an true
7819 dependency and hence all these checks. */
7820 if (REG_P (SET_DEST (PATTERN (insn)))
7821 && REG_P (SET_DEST (PATTERN (dep)))
7822 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7823 SET_DEST (PATTERN (dep))))
7825 /* FMACS is a special case where the dependant
7826 instruction can be issued 3 cycles before
7827 the normal latency in case of an output
7829 if ((attr_type_insn == TYPE_FMACS
7830 || attr_type_insn == TYPE_FMACD)
7831 && (attr_type_dep == TYPE_FMACS
7832 || attr_type_dep == TYPE_FMACD))
7834 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7835 *cost = insn_default_latency (dep) - 3;
7837 *cost = insn_default_latency (dep);
7842 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7843 *cost = insn_default_latency (dep) + 1;
7845 *cost = insn_default_latency (dep);
7861 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7862 It corrects the value of COST based on the relationship between
7863 INSN and DEP through the dependence LINK. It returns the new
7864 value. There is a per-core adjust_cost hook to adjust scheduler costs
7865 and the per-core hook can choose to completely override the generic
7866 adjust_cost function. Only put bits of code into arm_adjust_cost that
7867 are common across all cores. */
7869 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7873 /* When generating Thumb-1 code, we want to place flag-setting operations
7874 close to a conditional branch which depends on them, so that we can
7875 omit the comparison. */
7877 && REG_NOTE_KIND (link) == 0
7878 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7879 && recog_memoized (dep) >= 0
7880 && get_attr_conds (dep) == CONDS_SET)
7883 if (current_tune->sched_adjust_cost != NULL)
7885 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7889 /* XXX This is not strictly true for the FPA. */
7890 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7891 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7894 /* Call insns don't incur a stall, even if they follow a load. */
7895 if (REG_NOTE_KIND (link) == 0
7896 && GET_CODE (insn) == CALL_INSN)
7899 if ((i_pat = single_set (insn)) != NULL
7900 && GET_CODE (SET_SRC (i_pat)) == MEM
7901 && (d_pat = single_set (dep)) != NULL
7902 && GET_CODE (SET_DEST (d_pat)) == MEM)
7904 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7905 /* This is a load after a store, there is no conflict if the load reads
7906 from a cached area. Assume that loads from the stack, and from the
7907 constant pool are cached, and that others will miss. This is a
7910 if ((GET_CODE (src_mem) == SYMBOL_REF
7911 && CONSTANT_POOL_ADDRESS_P (src_mem))
7912 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7913 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7914 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7921 static int fp_consts_inited = 0;
7923 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7924 static const char * const strings_fp[8] =
7927 "4", "5", "0.5", "10"
7930 static REAL_VALUE_TYPE values_fp[8];
7933 init_fp_table (void)
7939 fp_consts_inited = 1;
7941 fp_consts_inited = 8;
7943 for (i = 0; i < fp_consts_inited; i++)
7945 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7950 /* Return TRUE if rtx X is a valid immediate FP constant. */
7952 arm_const_double_rtx (rtx x)
7957 if (!fp_consts_inited)
7960 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7961 if (REAL_VALUE_MINUS_ZERO (r))
7964 for (i = 0; i < fp_consts_inited; i++)
7965 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7971 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7973 neg_const_double_rtx_ok_for_fpa (rtx x)
7978 if (!fp_consts_inited)
7981 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7982 r = real_value_negate (&r);
7983 if (REAL_VALUE_MINUS_ZERO (r))
7986 for (i = 0; i < 8; i++)
7987 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7994 /* VFPv3 has a fairly wide range of representable immediates, formed from
7995 "quarter-precision" floating-point values. These can be evaluated using this
7996 formula (with ^ for exponentiation):
8000 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8001 16 <= n <= 31 and 0 <= r <= 7.
8003 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8005 - A (most-significant) is the sign bit.
8006 - BCD are the exponent (encoded as r XOR 3).
8007 - EFGH are the mantissa (encoded as n - 16).
8010 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8011 fconst[sd] instruction, or -1 if X isn't suitable. */
8013 vfp3_const_double_index (rtx x)
8015 REAL_VALUE_TYPE r, m;
8017 unsigned HOST_WIDE_INT mantissa, mant_hi;
8018 unsigned HOST_WIDE_INT mask;
8019 HOST_WIDE_INT m1, m2;
8020 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8022 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8025 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8027 /* We can't represent these things, so detect them first. */
8028 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8031 /* Extract sign, exponent and mantissa. */
8032 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8033 r = real_value_abs (&r);
8034 exponent = REAL_EXP (&r);
8035 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8036 highest (sign) bit, with a fixed binary point at bit point_pos.
8037 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8038 bits for the mantissa, this may fail (low bits would be lost). */
8039 real_ldexp (&m, &r, point_pos - exponent);
8040 REAL_VALUE_TO_INT (&m1, &m2, m);
8044 /* If there are bits set in the low part of the mantissa, we can't
8045 represent this value. */
8049 /* Now make it so that mantissa contains the most-significant bits, and move
8050 the point_pos to indicate that the least-significant bits have been
8052 point_pos -= HOST_BITS_PER_WIDE_INT;
8055 /* We can permit four significant bits of mantissa only, plus a high bit
8056 which is always 1. */
8057 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8058 if ((mantissa & mask) != 0)
8061 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8062 mantissa >>= point_pos - 5;
8064 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8065 floating-point immediate zero with Neon using an integer-zero load, but
8066 that case is handled elsewhere.) */
8070 gcc_assert (mantissa >= 16 && mantissa <= 31);
8072 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8073 normalized significands are in the range [1, 2). (Our mantissa is shifted
8074 left 4 places at this point relative to normalized IEEE754 values). GCC
8075 internally uses [0.5, 1) (see real.c), so the exponent returned from
8076 REAL_EXP must be altered. */
8077 exponent = 5 - exponent;
8079 if (exponent < 0 || exponent > 7)
8082 /* Sign, mantissa and exponent are now in the correct form to plug into the
8083 formula described in the comment above. */
8084 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8087 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8089 vfp3_const_double_rtx (rtx x)
8094 return vfp3_const_double_index (x) != -1;
8097 /* Recognize immediates which can be used in various Neon instructions. Legal
8098 immediates are described by the following table (for VMVN variants, the
8099 bitwise inverse of the constant shown is recognized. In either case, VMOV
8100 is output and the correct instruction to use for a given constant is chosen
8101 by the assembler). The constant shown is replicated across all elements of
8102 the destination vector.
8104 insn elems variant constant (binary)
8105 ---- ----- ------- -----------------
8106 vmov i32 0 00000000 00000000 00000000 abcdefgh
8107 vmov i32 1 00000000 00000000 abcdefgh 00000000
8108 vmov i32 2 00000000 abcdefgh 00000000 00000000
8109 vmov i32 3 abcdefgh 00000000 00000000 00000000
8110 vmov i16 4 00000000 abcdefgh
8111 vmov i16 5 abcdefgh 00000000
8112 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8113 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8114 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8115 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8116 vmvn i16 10 00000000 abcdefgh
8117 vmvn i16 11 abcdefgh 00000000
8118 vmov i32 12 00000000 00000000 abcdefgh 11111111
8119 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8120 vmov i32 14 00000000 abcdefgh 11111111 11111111
8121 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8123 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8124 eeeeeeee ffffffff gggggggg hhhhhhhh
8125 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8127 For case 18, B = !b. Representable values are exactly those accepted by
8128 vfp3_const_double_index, but are output as floating-point numbers rather
8131 Variants 0-5 (inclusive) may also be used as immediates for the second
8132 operand of VORR/VBIC instructions.
8134 The INVERSE argument causes the bitwise inverse of the given operand to be
8135 recognized instead (used for recognizing legal immediates for the VAND/VORN
8136 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8137 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8138 output, rather than the real insns vbic/vorr).
8140 INVERSE makes no difference to the recognition of float vectors.
8142 The return value is the variant of immediate as shown in the above table, or
8143 -1 if the given value doesn't match any of the listed patterns.
8146 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8147 rtx *modconst, int *elementwidth)
8149 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8151 for (i = 0; i < idx; i += (STRIDE)) \
8156 immtype = (CLASS); \
8157 elsize = (ELSIZE); \
8161 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8162 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8163 unsigned char bytes[16];
8164 int immtype = -1, matches;
8165 unsigned int invmask = inverse ? 0xff : 0;
8167 /* Vectors of float constants. */
8168 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8170 rtx el0 = CONST_VECTOR_ELT (op, 0);
8173 if (!vfp3_const_double_rtx (el0))
8176 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8178 for (i = 1; i < n_elts; i++)
8180 rtx elt = CONST_VECTOR_ELT (op, i);
8183 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8185 if (!REAL_VALUES_EQUAL (r0, re))
8190 *modconst = CONST_VECTOR_ELT (op, 0);
8198 /* Splat vector constant out into a byte vector. */
8199 for (i = 0; i < n_elts; i++)
8201 rtx el = CONST_VECTOR_ELT (op, i);
8202 unsigned HOST_WIDE_INT elpart;
8203 unsigned int part, parts;
8205 if (GET_CODE (el) == CONST_INT)
8207 elpart = INTVAL (el);
8210 else if (GET_CODE (el) == CONST_DOUBLE)
8212 elpart = CONST_DOUBLE_LOW (el);
8218 for (part = 0; part < parts; part++)
8221 for (byte = 0; byte < innersize; byte++)
8223 bytes[idx++] = (elpart & 0xff) ^ invmask;
8224 elpart >>= BITS_PER_UNIT;
8226 if (GET_CODE (el) == CONST_DOUBLE)
8227 elpart = CONST_DOUBLE_HIGH (el);
8232 gcc_assert (idx == GET_MODE_SIZE (mode));
8236 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8237 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8239 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8240 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8242 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8243 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8245 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8246 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8248 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8250 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8252 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8253 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8255 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8256 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8258 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8259 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8261 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8262 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8264 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8266 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8268 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8269 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8271 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8272 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8274 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8275 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8277 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8278 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8280 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8282 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8283 && bytes[i] == bytes[(i + 8) % idx]);
8291 *elementwidth = elsize;
8295 unsigned HOST_WIDE_INT imm = 0;
8297 /* Un-invert bytes of recognized vector, if necessary. */
8299 for (i = 0; i < idx; i++)
8300 bytes[i] ^= invmask;
8304 /* FIXME: Broken on 32-bit H_W_I hosts. */
8305 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8307 for (i = 0; i < 8; i++)
8308 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8309 << (i * BITS_PER_UNIT);
8311 *modconst = GEN_INT (imm);
8315 unsigned HOST_WIDE_INT imm = 0;
8317 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8318 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8320 *modconst = GEN_INT (imm);
8328 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8329 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8330 float elements), and a modified constant (whatever should be output for a
8331 VMOV) in *MODCONST. */
8334 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8335 rtx *modconst, int *elementwidth)
8339 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8345 *modconst = tmpconst;
8348 *elementwidth = tmpwidth;
8353 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8354 the immediate is valid, write a constant suitable for using as an operand
8355 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8356 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8359 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8360 rtx *modconst, int *elementwidth)
8364 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8366 if (retval < 0 || retval > 5)
8370 *modconst = tmpconst;
8373 *elementwidth = tmpwidth;
8378 /* Return a string suitable for output of Neon immediate logic operation
8382 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8383 int inverse, int quad)
8385 int width, is_valid;
8386 static char templ[40];
8388 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8390 gcc_assert (is_valid != 0);
8393 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8395 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8400 /* Output a sequence of pairwise operations to implement a reduction.
8401 NOTE: We do "too much work" here, because pairwise operations work on two
8402 registers-worth of operands in one go. Unfortunately we can't exploit those
8403 extra calculations to do the full operation in fewer steps, I don't think.
8404 Although all vector elements of the result but the first are ignored, we
8405 actually calculate the same result in each of the elements. An alternative
8406 such as initially loading a vector with zero to use as each of the second
8407 operands would use up an additional register and take an extra instruction,
8408 for no particular gain. */
8411 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8412 rtx (*reduc) (rtx, rtx, rtx))
8414 enum machine_mode inner = GET_MODE_INNER (mode);
8415 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8418 for (i = parts / 2; i >= 1; i /= 2)
8420 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8421 emit_insn (reduc (dest, tmpsum, tmpsum));
8426 /* If VALS is a vector constant that can be loaded into a register
8427 using VDUP, generate instructions to do so and return an RTX to
8428 assign to the register. Otherwise return NULL_RTX. */
8431 neon_vdup_constant (rtx vals)
8433 enum machine_mode mode = GET_MODE (vals);
8434 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8435 int n_elts = GET_MODE_NUNITS (mode);
8436 bool all_same = true;
8440 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8443 for (i = 0; i < n_elts; ++i)
8445 x = XVECEXP (vals, 0, i);
8446 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8451 /* The elements are not all the same. We could handle repeating
8452 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8453 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8457 /* We can load this constant by using VDUP and a constant in a
8458 single ARM register. This will be cheaper than a vector
8461 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8462 return gen_rtx_VEC_DUPLICATE (mode, x);
8465 /* Generate code to load VALS, which is a PARALLEL containing only
8466 constants (for vec_init) or CONST_VECTOR, efficiently into a
8467 register. Returns an RTX to copy into the register, or NULL_RTX
8468 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8471 neon_make_constant (rtx vals)
8473 enum machine_mode mode = GET_MODE (vals);
8475 rtx const_vec = NULL_RTX;
8476 int n_elts = GET_MODE_NUNITS (mode);
8480 if (GET_CODE (vals) == CONST_VECTOR)
8482 else if (GET_CODE (vals) == PARALLEL)
8484 /* A CONST_VECTOR must contain only CONST_INTs and
8485 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8486 Only store valid constants in a CONST_VECTOR. */
8487 for (i = 0; i < n_elts; ++i)
8489 rtx x = XVECEXP (vals, 0, i);
8490 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8493 if (n_const == n_elts)
8494 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8499 if (const_vec != NULL
8500 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8501 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8503 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8504 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8505 pipeline cycle; creating the constant takes one or two ARM
8508 else if (const_vec != NULL_RTX)
8509 /* Load from constant pool. On Cortex-A8 this takes two cycles
8510 (for either double or quad vectors). We can not take advantage
8511 of single-cycle VLD1 because we need a PC-relative addressing
8515 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8516 We can not construct an initializer. */
8520 /* Initialize vector TARGET to VALS. */
8523 neon_expand_vector_init (rtx target, rtx vals)
8525 enum machine_mode mode = GET_MODE (target);
8526 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8527 int n_elts = GET_MODE_NUNITS (mode);
8528 int n_var = 0, one_var = -1;
8529 bool all_same = true;
8533 for (i = 0; i < n_elts; ++i)
8535 x = XVECEXP (vals, 0, i);
8536 if (!CONSTANT_P (x))
8537 ++n_var, one_var = i;
8539 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8545 rtx constant = neon_make_constant (vals);
8546 if (constant != NULL_RTX)
8548 emit_move_insn (target, constant);
8553 /* Splat a single non-constant element if we can. */
8554 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8556 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8557 emit_insn (gen_rtx_SET (VOIDmode, target,
8558 gen_rtx_VEC_DUPLICATE (mode, x)));
8562 /* One field is non-constant. Load constant then overwrite varying
8563 field. This is more efficient than using the stack. */
8566 rtx copy = copy_rtx (vals);
8567 rtx index = GEN_INT (one_var);
8569 /* Load constant part of vector, substitute neighboring value for
8571 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8572 neon_expand_vector_init (target, copy);
8574 /* Insert variable. */
8575 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8579 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8582 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8585 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8588 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8591 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8594 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8597 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8600 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8603 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8611 /* Construct the vector in memory one field at a time
8612 and load the whole vector. */
8613 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8614 for (i = 0; i < n_elts; i++)
8615 emit_move_insn (adjust_address_nv (mem, inner_mode,
8616 i * GET_MODE_SIZE (inner_mode)),
8617 XVECEXP (vals, 0, i));
8618 emit_move_insn (target, mem);
8621 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8622 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8623 reported source locations are bogus. */
8626 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8631 gcc_assert (GET_CODE (operand) == CONST_INT);
8633 lane = INTVAL (operand);
8635 if (lane < low || lane >= high)
8639 /* Bounds-check lanes. */
8642 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8644 bounds_check (operand, low, high, "lane out of range");
8647 /* Bounds-check constants. */
8650 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8652 bounds_check (operand, low, high, "constant out of range");
8656 neon_element_bits (enum machine_mode mode)
8659 return GET_MODE_BITSIZE (mode);
8661 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8665 /* Predicates for `match_operand' and `match_operator'. */
8667 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8669 cirrus_memory_offset (rtx op)
8671 /* Reject eliminable registers. */
8672 if (! (reload_in_progress || reload_completed)
8673 && ( reg_mentioned_p (frame_pointer_rtx, op)
8674 || reg_mentioned_p (arg_pointer_rtx, op)
8675 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8676 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8677 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8678 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8681 if (GET_CODE (op) == MEM)
8687 /* Match: (mem (reg)). */
8688 if (GET_CODE (ind) == REG)
8694 if (GET_CODE (ind) == PLUS
8695 && GET_CODE (XEXP (ind, 0)) == REG
8696 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8697 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8704 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8705 WB is true if full writeback address modes are allowed and is false
8706 if limited writeback address modes (POST_INC and PRE_DEC) are
8710 arm_coproc_mem_operand (rtx op, bool wb)
8714 /* Reject eliminable registers. */
8715 if (! (reload_in_progress || reload_completed)
8716 && ( reg_mentioned_p (frame_pointer_rtx, op)
8717 || reg_mentioned_p (arg_pointer_rtx, op)
8718 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8719 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8720 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8721 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8724 /* Constants are converted into offsets from labels. */
8725 if (GET_CODE (op) != MEM)
8730 if (reload_completed
8731 && (GET_CODE (ind) == LABEL_REF
8732 || (GET_CODE (ind) == CONST
8733 && GET_CODE (XEXP (ind, 0)) == PLUS
8734 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8735 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8738 /* Match: (mem (reg)). */
8739 if (GET_CODE (ind) == REG)
8740 return arm_address_register_rtx_p (ind, 0);
8742 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8743 acceptable in any case (subject to verification by
8744 arm_address_register_rtx_p). We need WB to be true to accept
8745 PRE_INC and POST_DEC. */
8746 if (GET_CODE (ind) == POST_INC
8747 || GET_CODE (ind) == PRE_DEC
8749 && (GET_CODE (ind) == PRE_INC
8750 || GET_CODE (ind) == POST_DEC)))
8751 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8754 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8755 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8756 && GET_CODE (XEXP (ind, 1)) == PLUS
8757 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8758 ind = XEXP (ind, 1);
8763 if (GET_CODE (ind) == PLUS
8764 && GET_CODE (XEXP (ind, 0)) == REG
8765 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8766 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8767 && INTVAL (XEXP (ind, 1)) > -1024
8768 && INTVAL (XEXP (ind, 1)) < 1024
8769 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8775 /* Return TRUE if OP is a memory operand which we can load or store a vector
8776 to/from. TYPE is one of the following values:
8777 0 - Vector load/stor (vldr)
8778 1 - Core registers (ldm)
8779 2 - Element/structure loads (vld1)
8782 neon_vector_mem_operand (rtx op, int type)
8786 /* Reject eliminable registers. */
8787 if (! (reload_in_progress || reload_completed)
8788 && ( reg_mentioned_p (frame_pointer_rtx, op)
8789 || reg_mentioned_p (arg_pointer_rtx, op)
8790 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8791 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8792 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8793 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8796 /* Constants are converted into offsets from labels. */
8797 if (GET_CODE (op) != MEM)
8802 if (reload_completed
8803 && (GET_CODE (ind) == LABEL_REF
8804 || (GET_CODE (ind) == CONST
8805 && GET_CODE (XEXP (ind, 0)) == PLUS
8806 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8807 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8810 /* Match: (mem (reg)). */
8811 if (GET_CODE (ind) == REG)
8812 return arm_address_register_rtx_p (ind, 0);
8814 /* Allow post-increment with Neon registers. */
8815 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8816 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8818 /* FIXME: vld1 allows register post-modify. */
8824 && GET_CODE (ind) == PLUS
8825 && GET_CODE (XEXP (ind, 0)) == REG
8826 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8827 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8828 && INTVAL (XEXP (ind, 1)) > -1024
8829 && INTVAL (XEXP (ind, 1)) < 1016
8830 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8836 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8839 neon_struct_mem_operand (rtx op)
8843 /* Reject eliminable registers. */
8844 if (! (reload_in_progress || reload_completed)
8845 && ( reg_mentioned_p (frame_pointer_rtx, op)
8846 || reg_mentioned_p (arg_pointer_rtx, op)
8847 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8848 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8849 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8850 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8853 /* Constants are converted into offsets from labels. */
8854 if (GET_CODE (op) != MEM)
8859 if (reload_completed
8860 && (GET_CODE (ind) == LABEL_REF
8861 || (GET_CODE (ind) == CONST
8862 && GET_CODE (XEXP (ind, 0)) == PLUS
8863 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8864 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8867 /* Match: (mem (reg)). */
8868 if (GET_CODE (ind) == REG)
8869 return arm_address_register_rtx_p (ind, 0);
8874 /* Return true if X is a register that will be eliminated later on. */
8876 arm_eliminable_register (rtx x)
8878 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8879 || REGNO (x) == ARG_POINTER_REGNUM
8880 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8881 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8884 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8885 coprocessor registers. Otherwise return NO_REGS. */
8888 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8892 if (!TARGET_NEON_FP16)
8893 return GENERAL_REGS;
8894 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8896 return GENERAL_REGS;
8900 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8901 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8902 && neon_vector_mem_operand (x, 0))
8905 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8908 return GENERAL_REGS;
8911 /* Values which must be returned in the most-significant end of the return
8915 arm_return_in_msb (const_tree valtype)
8917 return (TARGET_AAPCS_BASED
8919 && (AGGREGATE_TYPE_P (valtype)
8920 || TREE_CODE (valtype) == COMPLEX_TYPE));
8923 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8924 Use by the Cirrus Maverick code which has to workaround
8925 a hardware bug triggered by such instructions. */
8927 arm_memory_load_p (rtx insn)
8929 rtx body, lhs, rhs;;
8931 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8934 body = PATTERN (insn);
8936 if (GET_CODE (body) != SET)
8939 lhs = XEXP (body, 0);
8940 rhs = XEXP (body, 1);
8942 lhs = REG_OR_SUBREG_RTX (lhs);
8944 /* If the destination is not a general purpose
8945 register we do not have to worry. */
8946 if (GET_CODE (lhs) != REG
8947 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8950 /* As well as loads from memory we also have to react
8951 to loads of invalid constants which will be turned
8952 into loads from the minipool. */
8953 return (GET_CODE (rhs) == MEM
8954 || GET_CODE (rhs) == SYMBOL_REF
8955 || note_invalid_constants (insn, -1, false));
8958 /* Return TRUE if INSN is a Cirrus instruction. */
8960 arm_cirrus_insn_p (rtx insn)
8962 enum attr_cirrus attr;
8964 /* get_attr cannot accept USE or CLOBBER. */
8966 || GET_CODE (insn) != INSN
8967 || GET_CODE (PATTERN (insn)) == USE
8968 || GET_CODE (PATTERN (insn)) == CLOBBER)
8971 attr = get_attr_cirrus (insn);
8973 return attr != CIRRUS_NOT;
8976 /* Cirrus reorg for invalid instruction combinations. */
8978 cirrus_reorg (rtx first)
8980 enum attr_cirrus attr;
8981 rtx body = PATTERN (first);
8985 /* Any branch must be followed by 2 non Cirrus instructions. */
8986 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8989 t = next_nonnote_insn (first);
8991 if (arm_cirrus_insn_p (t))
8994 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8998 emit_insn_after (gen_nop (), first);
9003 /* (float (blah)) is in parallel with a clobber. */
9004 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9005 body = XVECEXP (body, 0, 0);
9007 if (GET_CODE (body) == SET)
9009 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9011 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9012 be followed by a non Cirrus insn. */
9013 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9015 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9016 emit_insn_after (gen_nop (), first);
9020 else if (arm_memory_load_p (first))
9022 unsigned int arm_regno;
9024 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9025 ldr/cfmv64hr combination where the Rd field is the same
9026 in both instructions must be split with a non Cirrus
9033 /* Get Arm register number for ldr insn. */
9034 if (GET_CODE (lhs) == REG)
9035 arm_regno = REGNO (lhs);
9038 gcc_assert (GET_CODE (rhs) == REG);
9039 arm_regno = REGNO (rhs);
9043 first = next_nonnote_insn (first);
9045 if (! arm_cirrus_insn_p (first))
9048 body = PATTERN (first);
9050 /* (float (blah)) is in parallel with a clobber. */
9051 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9052 body = XVECEXP (body, 0, 0);
9054 if (GET_CODE (body) == FLOAT)
9055 body = XEXP (body, 0);
9057 if (get_attr_cirrus (first) == CIRRUS_MOVE
9058 && GET_CODE (XEXP (body, 1)) == REG
9059 && arm_regno == REGNO (XEXP (body, 1)))
9060 emit_insn_after (gen_nop (), first);
9066 /* get_attr cannot accept USE or CLOBBER. */
9068 || GET_CODE (first) != INSN
9069 || GET_CODE (PATTERN (first)) == USE
9070 || GET_CODE (PATTERN (first)) == CLOBBER)
9073 attr = get_attr_cirrus (first);
9075 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9076 must be followed by a non-coprocessor instruction. */
9077 if (attr == CIRRUS_COMPARE)
9081 t = next_nonnote_insn (first);
9083 if (arm_cirrus_insn_p (t))
9086 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9090 emit_insn_after (gen_nop (), first);
9096 /* Return TRUE if X references a SYMBOL_REF. */
9098 symbol_mentioned_p (rtx x)
9103 if (GET_CODE (x) == SYMBOL_REF)
9106 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9107 are constant offsets, not symbols. */
9108 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9111 fmt = GET_RTX_FORMAT (GET_CODE (x));
9113 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9119 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9120 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9123 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9130 /* Return TRUE if X references a LABEL_REF. */
9132 label_mentioned_p (rtx x)
9137 if (GET_CODE (x) == LABEL_REF)
9140 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9141 instruction, but they are constant offsets, not symbols. */
9142 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9145 fmt = GET_RTX_FORMAT (GET_CODE (x));
9146 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9152 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9153 if (label_mentioned_p (XVECEXP (x, i, j)))
9156 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9164 tls_mentioned_p (rtx x)
9166 switch (GET_CODE (x))
9169 return tls_mentioned_p (XEXP (x, 0));
9172 if (XINT (x, 1) == UNSPEC_TLS)
9180 /* Must not copy any rtx that uses a pc-relative address. */
9183 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9185 if (GET_CODE (*x) == UNSPEC
9186 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9192 arm_cannot_copy_insn_p (rtx insn)
9194 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9200 enum rtx_code code = GET_CODE (x);
9217 /* Return 1 if memory locations are adjacent. */
9219 adjacent_mem_locations (rtx a, rtx b)
9221 /* We don't guarantee to preserve the order of these memory refs. */
9222 if (volatile_refs_p (a) || volatile_refs_p (b))
9225 if ((GET_CODE (XEXP (a, 0)) == REG
9226 || (GET_CODE (XEXP (a, 0)) == PLUS
9227 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9228 && (GET_CODE (XEXP (b, 0)) == REG
9229 || (GET_CODE (XEXP (b, 0)) == PLUS
9230 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9232 HOST_WIDE_INT val0 = 0, val1 = 0;
9236 if (GET_CODE (XEXP (a, 0)) == PLUS)
9238 reg0 = XEXP (XEXP (a, 0), 0);
9239 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9244 if (GET_CODE (XEXP (b, 0)) == PLUS)
9246 reg1 = XEXP (XEXP (b, 0), 0);
9247 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9252 /* Don't accept any offset that will require multiple
9253 instructions to handle, since this would cause the
9254 arith_adjacentmem pattern to output an overlong sequence. */
9255 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9258 /* Don't allow an eliminable register: register elimination can make
9259 the offset too large. */
9260 if (arm_eliminable_register (reg0))
9263 val_diff = val1 - val0;
9267 /* If the target has load delay slots, then there's no benefit
9268 to using an ldm instruction unless the offset is zero and
9269 we are optimizing for size. */
9270 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9271 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9272 && (val_diff == 4 || val_diff == -4));
9275 return ((REGNO (reg0) == REGNO (reg1))
9276 && (val_diff == 4 || val_diff == -4));
9282 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9283 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9284 instruction. ADD_OFFSET is nonzero if the base address register needs
9285 to be modified with an add instruction before we can use it. */
9288 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9289 int nops, HOST_WIDE_INT add_offset)
9291 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9292 if the offset isn't small enough. The reason 2 ldrs are faster
9293 is because these ARMs are able to do more than one cache access
9294 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9295 whilst the ARM8 has a double bandwidth cache. This means that
9296 these cores can do both an instruction fetch and a data fetch in
9297 a single cycle, so the trick of calculating the address into a
9298 scratch register (one of the result regs) and then doing a load
9299 multiple actually becomes slower (and no smaller in code size).
9300 That is the transformation
9302 ldr rd1, [rbase + offset]
9303 ldr rd2, [rbase + offset + 4]
9307 add rd1, rbase, offset
9308 ldmia rd1, {rd1, rd2}
9310 produces worse code -- '3 cycles + any stalls on rd2' instead of
9311 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9312 access per cycle, the first sequence could never complete in less
9313 than 6 cycles, whereas the ldm sequence would only take 5 and
9314 would make better use of sequential accesses if not hitting the
9317 We cheat here and test 'arm_ld_sched' which we currently know to
9318 only be true for the ARM8, ARM9 and StrongARM. If this ever
9319 changes, then the test below needs to be reworked. */
9320 if (nops == 2 && arm_ld_sched && add_offset != 0)
9323 /* XScale has load-store double instructions, but they have stricter
9324 alignment requirements than load-store multiple, so we cannot
9327 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9328 the pipeline until completion.
9336 An ldr instruction takes 1-3 cycles, but does not block the
9345 Best case ldr will always win. However, the more ldr instructions
9346 we issue, the less likely we are to be able to schedule them well.
9347 Using ldr instructions also increases code size.
9349 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9350 for counts of 3 or 4 regs. */
9351 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9356 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9357 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9358 an array ORDER which describes the sequence to use when accessing the
9359 offsets that produces an ascending order. In this sequence, each
9360 offset must be larger by exactly 4 than the previous one. ORDER[0]
9361 must have been filled in with the lowest offset by the caller.
9362 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9363 we use to verify that ORDER produces an ascending order of registers.
9364 Return true if it was possible to construct such an order, false if
9368 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9372 for (i = 1; i < nops; i++)
9376 order[i] = order[i - 1];
9377 for (j = 0; j < nops; j++)
9378 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9380 /* We must find exactly one offset that is higher than the
9381 previous one by 4. */
9382 if (order[i] != order[i - 1])
9386 if (order[i] == order[i - 1])
9388 /* The register numbers must be ascending. */
9389 if (unsorted_regs != NULL
9390 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9396 /* Used to determine in a peephole whether a sequence of load
9397 instructions can be changed into a load-multiple instruction.
9398 NOPS is the number of separate load instructions we are examining. The
9399 first NOPS entries in OPERANDS are the destination registers, the
9400 next NOPS entries are memory operands. If this function is
9401 successful, *BASE is set to the common base register of the memory
9402 accesses; *LOAD_OFFSET is set to the first memory location's offset
9403 from that base register.
9404 REGS is an array filled in with the destination register numbers.
9405 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9406 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9407 the sequence of registers in REGS matches the loads from ascending memory
9408 locations, and the function verifies that the register numbers are
9409 themselves ascending. If CHECK_REGS is false, the register numbers
9410 are stored in the order they are found in the operands. */
9412 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9413 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9415 int unsorted_regs[MAX_LDM_STM_OPS];
9416 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9417 int order[MAX_LDM_STM_OPS];
9418 rtx base_reg_rtx = NULL;
9422 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9423 easily extended if required. */
9424 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9426 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9428 /* Loop over the operands and check that the memory references are
9429 suitable (i.e. immediate offsets from the same base register). At
9430 the same time, extract the target register, and the memory
9432 for (i = 0; i < nops; i++)
9437 /* Convert a subreg of a mem into the mem itself. */
9438 if (GET_CODE (operands[nops + i]) == SUBREG)
9439 operands[nops + i] = alter_subreg (operands + (nops + i));
9441 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9443 /* Don't reorder volatile memory references; it doesn't seem worth
9444 looking for the case where the order is ok anyway. */
9445 if (MEM_VOLATILE_P (operands[nops + i]))
9448 offset = const0_rtx;
9450 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9451 || (GET_CODE (reg) == SUBREG
9452 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9453 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9454 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9456 || (GET_CODE (reg) == SUBREG
9457 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9458 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9463 base_reg = REGNO (reg);
9465 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9468 else if (base_reg != (int) REGNO (reg))
9469 /* Not addressed from the same base register. */
9472 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9473 ? REGNO (operands[i])
9474 : REGNO (SUBREG_REG (operands[i])));
9476 /* If it isn't an integer register, or if it overwrites the
9477 base register but isn't the last insn in the list, then
9478 we can't do this. */
9479 if (unsorted_regs[i] < 0
9480 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9481 || unsorted_regs[i] > 14
9482 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9485 unsorted_offsets[i] = INTVAL (offset);
9486 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9490 /* Not a suitable memory address. */
9494 /* All the useful information has now been extracted from the
9495 operands into unsorted_regs and unsorted_offsets; additionally,
9496 order[0] has been set to the lowest offset in the list. Sort
9497 the offsets into order, verifying that they are adjacent, and
9498 check that the register numbers are ascending. */
9499 if (!compute_offset_order (nops, unsorted_offsets, order,
9500 check_regs ? unsorted_regs : NULL))
9504 memcpy (saved_order, order, sizeof order);
9510 for (i = 0; i < nops; i++)
9511 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9513 *load_offset = unsorted_offsets[order[0]];
9517 && !peep2_reg_dead_p (nops, base_reg_rtx))
9520 if (unsorted_offsets[order[0]] == 0)
9521 ldm_case = 1; /* ldmia */
9522 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9523 ldm_case = 2; /* ldmib */
9524 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9525 ldm_case = 3; /* ldmda */
9526 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9527 ldm_case = 4; /* ldmdb */
9528 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9529 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9534 if (!multiple_operation_profitable_p (false, nops,
9536 ? unsorted_offsets[order[0]] : 0))
9542 /* Used to determine in a peephole whether a sequence of store instructions can
9543 be changed into a store-multiple instruction.
9544 NOPS is the number of separate store instructions we are examining.
9545 NOPS_TOTAL is the total number of instructions recognized by the peephole
9547 The first NOPS entries in OPERANDS are the source registers, the next
9548 NOPS entries are memory operands. If this function is successful, *BASE is
9549 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9550 to the first memory location's offset from that base register. REGS is an
9551 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9552 likewise filled with the corresponding rtx's.
9553 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9554 numbers to to an ascending order of stores.
9555 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9556 from ascending memory locations, and the function verifies that the register
9557 numbers are themselves ascending. If CHECK_REGS is false, the register
9558 numbers are stored in the order they are found in the operands. */
9560 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9561 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9562 HOST_WIDE_INT *load_offset, bool check_regs)
9564 int unsorted_regs[MAX_LDM_STM_OPS];
9565 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9566 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9567 int order[MAX_LDM_STM_OPS];
9569 rtx base_reg_rtx = NULL;
9572 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9573 easily extended if required. */
9574 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9576 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9578 /* Loop over the operands and check that the memory references are
9579 suitable (i.e. immediate offsets from the same base register). At
9580 the same time, extract the target register, and the memory
9582 for (i = 0; i < nops; i++)
9587 /* Convert a subreg of a mem into the mem itself. */
9588 if (GET_CODE (operands[nops + i]) == SUBREG)
9589 operands[nops + i] = alter_subreg (operands + (nops + i));
9591 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9593 /* Don't reorder volatile memory references; it doesn't seem worth
9594 looking for the case where the order is ok anyway. */
9595 if (MEM_VOLATILE_P (operands[nops + i]))
9598 offset = const0_rtx;
9600 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9601 || (GET_CODE (reg) == SUBREG
9602 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9603 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9604 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9606 || (GET_CODE (reg) == SUBREG
9607 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9608 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9611 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9612 ? operands[i] : SUBREG_REG (operands[i]));
9613 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9617 base_reg = REGNO (reg);
9619 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9622 else if (base_reg != (int) REGNO (reg))
9623 /* Not addressed from the same base register. */
9626 /* If it isn't an integer register, then we can't do this. */
9627 if (unsorted_regs[i] < 0
9628 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9629 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9630 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9631 || unsorted_regs[i] > 14)
9634 unsorted_offsets[i] = INTVAL (offset);
9635 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9639 /* Not a suitable memory address. */
9643 /* All the useful information has now been extracted from the
9644 operands into unsorted_regs and unsorted_offsets; additionally,
9645 order[0] has been set to the lowest offset in the list. Sort
9646 the offsets into order, verifying that they are adjacent, and
9647 check that the register numbers are ascending. */
9648 if (!compute_offset_order (nops, unsorted_offsets, order,
9649 check_regs ? unsorted_regs : NULL))
9653 memcpy (saved_order, order, sizeof order);
9659 for (i = 0; i < nops; i++)
9661 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9663 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9666 *load_offset = unsorted_offsets[order[0]];
9670 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9673 if (unsorted_offsets[order[0]] == 0)
9674 stm_case = 1; /* stmia */
9675 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9676 stm_case = 2; /* stmib */
9677 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9678 stm_case = 3; /* stmda */
9679 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9680 stm_case = 4; /* stmdb */
9684 if (!multiple_operation_profitable_p (false, nops, 0))
9690 /* Routines for use in generating RTL. */
9692 /* Generate a load-multiple instruction. COUNT is the number of loads in
9693 the instruction; REGS and MEMS are arrays containing the operands.
9694 BASEREG is the base register to be used in addressing the memory operands.
9695 WBACK_OFFSET is nonzero if the instruction should update the base
9699 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9700 HOST_WIDE_INT wback_offset)
9705 if (!multiple_operation_profitable_p (false, count, 0))
9711 for (i = 0; i < count; i++)
9712 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9714 if (wback_offset != 0)
9715 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9723 result = gen_rtx_PARALLEL (VOIDmode,
9724 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9725 if (wback_offset != 0)
9727 XVECEXP (result, 0, 0)
9728 = gen_rtx_SET (VOIDmode, basereg,
9729 plus_constant (basereg, wback_offset));
9734 for (j = 0; i < count; i++, j++)
9735 XVECEXP (result, 0, i)
9736 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9741 /* Generate a store-multiple instruction. COUNT is the number of stores in
9742 the instruction; REGS and MEMS are arrays containing the operands.
9743 BASEREG is the base register to be used in addressing the memory operands.
9744 WBACK_OFFSET is nonzero if the instruction should update the base
9748 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9749 HOST_WIDE_INT wback_offset)
9754 if (GET_CODE (basereg) == PLUS)
9755 basereg = XEXP (basereg, 0);
9757 if (!multiple_operation_profitable_p (false, count, 0))
9763 for (i = 0; i < count; i++)
9764 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9766 if (wback_offset != 0)
9767 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9775 result = gen_rtx_PARALLEL (VOIDmode,
9776 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9777 if (wback_offset != 0)
9779 XVECEXP (result, 0, 0)
9780 = gen_rtx_SET (VOIDmode, basereg,
9781 plus_constant (basereg, wback_offset));
9786 for (j = 0; i < count; i++, j++)
9787 XVECEXP (result, 0, i)
9788 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9793 /* Generate either a load-multiple or a store-multiple instruction. This
9794 function can be used in situations where we can start with a single MEM
9795 rtx and adjust its address upwards.
9796 COUNT is the number of operations in the instruction, not counting a
9797 possible update of the base register. REGS is an array containing the
9799 BASEREG is the base register to be used in addressing the memory operands,
9800 which are constructed from BASEMEM.
9801 WRITE_BACK specifies whether the generated instruction should include an
9802 update of the base register.
9803 OFFSETP is used to pass an offset to and from this function; this offset
9804 is not used when constructing the address (instead BASEMEM should have an
9805 appropriate offset in its address), it is used only for setting
9806 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9809 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9810 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9812 rtx mems[MAX_LDM_STM_OPS];
9813 HOST_WIDE_INT offset = *offsetp;
9816 gcc_assert (count <= MAX_LDM_STM_OPS);
9818 if (GET_CODE (basereg) == PLUS)
9819 basereg = XEXP (basereg, 0);
9821 for (i = 0; i < count; i++)
9823 rtx addr = plus_constant (basereg, i * 4);
9824 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9832 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9833 write_back ? 4 * count : 0);
9835 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9836 write_back ? 4 * count : 0);
9840 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9841 rtx basemem, HOST_WIDE_INT *offsetp)
9843 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9848 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9849 rtx basemem, HOST_WIDE_INT *offsetp)
9851 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9855 /* Called from a peephole2 expander to turn a sequence of loads into an
9856 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9857 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9858 is true if we can reorder the registers because they are used commutatively
9860 Returns true iff we could generate a new instruction. */
9863 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9865 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9866 rtx mems[MAX_LDM_STM_OPS];
9869 HOST_WIDE_INT offset;
9870 int write_back = FALSE;
9874 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9875 &base_reg, &offset, !sort_regs);
9881 for (i = 0; i < nops - 1; i++)
9882 for (j = i + 1; j < nops; j++)
9883 if (regs[i] > regs[j])
9889 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9893 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9894 gcc_assert (ldm_case == 1 || ldm_case == 5);
9900 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
9901 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
9906 base_reg_rtx = newbase;
9910 for (i = 0; i < nops; i++)
9912 addr = plus_constant (base_reg_rtx, offset + i * 4);
9913 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9916 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
9917 write_back ? offset + i * 4 : 0));
9921 /* Called from a peephole2 expander to turn a sequence of stores into an
9922 STM instruction. OPERANDS are the operands found by the peephole matcher;
9923 NOPS indicates how many separate stores we are trying to combine.
9924 Returns true iff we could generate a new instruction. */
9927 gen_stm_seq (rtx *operands, int nops)
9930 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9931 rtx mems[MAX_LDM_STM_OPS];
9934 HOST_WIDE_INT offset;
9935 int write_back = FALSE;
9940 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
9941 mem_order, &base_reg, &offset, true);
9946 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9948 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
9951 gcc_assert (base_reg_dies);
9957 gcc_assert (base_reg_dies);
9958 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
9962 addr = plus_constant (base_reg_rtx, offset);
9964 for (i = 0; i < nops; i++)
9966 addr = plus_constant (base_reg_rtx, offset + i * 4);
9967 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9970 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
9971 write_back ? offset + i * 4 : 0));
9975 /* Called from a peephole2 expander to turn a sequence of stores that are
9976 preceded by constant loads into an STM instruction. OPERANDS are the
9977 operands found by the peephole matcher; NOPS indicates how many
9978 separate stores we are trying to combine; there are 2 * NOPS
9979 instructions in the peephole.
9980 Returns true iff we could generate a new instruction. */
9983 gen_const_stm_seq (rtx *operands, int nops)
9985 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
9986 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9987 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
9988 rtx mems[MAX_LDM_STM_OPS];
9991 HOST_WIDE_INT offset;
9992 int write_back = FALSE;
9997 HARD_REG_SET allocated;
9999 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10000 mem_order, &base_reg, &offset, false);
10005 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10007 /* If the same register is used more than once, try to find a free
10009 CLEAR_HARD_REG_SET (allocated);
10010 for (i = 0; i < nops; i++)
10012 for (j = i + 1; j < nops; j++)
10013 if (regs[i] == regs[j])
10015 rtx t = peep2_find_free_register (0, nops * 2,
10016 TARGET_THUMB1 ? "l" : "r",
10017 SImode, &allocated);
10021 regs[i] = REGNO (t);
10025 /* Compute an ordering that maps the register numbers to an ascending
10028 for (i = 0; i < nops; i++)
10029 if (regs[i] < regs[reg_order[0]])
10032 for (i = 1; i < nops; i++)
10034 int this_order = reg_order[i - 1];
10035 for (j = 0; j < nops; j++)
10036 if (regs[j] > regs[reg_order[i - 1]]
10037 && (this_order == reg_order[i - 1]
10038 || regs[j] < regs[this_order]))
10040 reg_order[i] = this_order;
10043 /* Ensure that registers that must be live after the instruction end
10044 up with the correct value. */
10045 for (i = 0; i < nops; i++)
10047 int this_order = reg_order[i];
10048 if ((this_order != mem_order[i]
10049 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10050 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10054 /* Load the constants. */
10055 for (i = 0; i < nops; i++)
10057 rtx op = operands[2 * nops + mem_order[i]];
10058 sorted_regs[i] = regs[reg_order[i]];
10059 emit_move_insn (reg_rtxs[reg_order[i]], op);
10062 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10064 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10067 gcc_assert (base_reg_dies);
10073 gcc_assert (base_reg_dies);
10074 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10078 addr = plus_constant (base_reg_rtx, offset);
10080 for (i = 0; i < nops; i++)
10082 addr = plus_constant (base_reg_rtx, offset + i * 4);
10083 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10086 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10087 write_back ? offset + i * 4 : 0));
10092 arm_gen_movmemqi (rtx *operands)
10094 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10095 HOST_WIDE_INT srcoffset, dstoffset;
10097 rtx src, dst, srcbase, dstbase;
10098 rtx part_bytes_reg = NULL;
10101 if (GET_CODE (operands[2]) != CONST_INT
10102 || GET_CODE (operands[3]) != CONST_INT
10103 || INTVAL (operands[2]) > 64
10104 || INTVAL (operands[3]) & 3)
10107 dstbase = operands[0];
10108 srcbase = operands[1];
10110 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10111 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10113 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10114 out_words_to_go = INTVAL (operands[2]) / 4;
10115 last_bytes = INTVAL (operands[2]) & 3;
10116 dstoffset = srcoffset = 0;
10118 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10119 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10121 for (i = 0; in_words_to_go >= 2; i+=4)
10123 if (in_words_to_go > 4)
10124 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10125 TRUE, srcbase, &srcoffset));
10127 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10128 src, FALSE, srcbase,
10131 if (out_words_to_go)
10133 if (out_words_to_go > 4)
10134 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10135 TRUE, dstbase, &dstoffset));
10136 else if (out_words_to_go != 1)
10137 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10138 out_words_to_go, dst,
10141 dstbase, &dstoffset));
10144 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10145 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10146 if (last_bytes != 0)
10148 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10154 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10155 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10158 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10159 if (out_words_to_go)
10163 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10164 sreg = copy_to_reg (mem);
10166 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10167 emit_move_insn (mem, sreg);
10170 gcc_assert (!in_words_to_go); /* Sanity check */
10173 if (in_words_to_go)
10175 gcc_assert (in_words_to_go > 0);
10177 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10178 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10181 gcc_assert (!last_bytes || part_bytes_reg);
10183 if (BYTES_BIG_ENDIAN && last_bytes)
10185 rtx tmp = gen_reg_rtx (SImode);
10187 /* The bytes we want are in the top end of the word. */
10188 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10189 GEN_INT (8 * (4 - last_bytes))));
10190 part_bytes_reg = tmp;
10194 mem = adjust_automodify_address (dstbase, QImode,
10195 plus_constant (dst, last_bytes - 1),
10196 dstoffset + last_bytes - 1);
10197 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10201 tmp = gen_reg_rtx (SImode);
10202 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10203 part_bytes_reg = tmp;
10210 if (last_bytes > 1)
10212 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10213 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10217 rtx tmp = gen_reg_rtx (SImode);
10218 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10219 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10220 part_bytes_reg = tmp;
10227 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10228 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10235 /* Select a dominance comparison mode if possible for a test of the general
10236 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10237 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10238 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10239 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10240 In all cases OP will be either EQ or NE, but we don't need to know which
10241 here. If we are unable to support a dominance comparison we return
10242 CC mode. This will then fail to match for the RTL expressions that
10243 generate this call. */
10245 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10247 enum rtx_code cond1, cond2;
10250 /* Currently we will probably get the wrong result if the individual
10251 comparisons are not simple. This also ensures that it is safe to
10252 reverse a comparison if necessary. */
10253 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10255 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10259 /* The if_then_else variant of this tests the second condition if the
10260 first passes, but is true if the first fails. Reverse the first
10261 condition to get a true "inclusive-or" expression. */
10262 if (cond_or == DOM_CC_NX_OR_Y)
10263 cond1 = reverse_condition (cond1);
10265 /* If the comparisons are not equal, and one doesn't dominate the other,
10266 then we can't do this. */
10268 && !comparison_dominates_p (cond1, cond2)
10269 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10274 enum rtx_code temp = cond1;
10282 if (cond_or == DOM_CC_X_AND_Y)
10287 case EQ: return CC_DEQmode;
10288 case LE: return CC_DLEmode;
10289 case LEU: return CC_DLEUmode;
10290 case GE: return CC_DGEmode;
10291 case GEU: return CC_DGEUmode;
10292 default: gcc_unreachable ();
10296 if (cond_or == DOM_CC_X_AND_Y)
10308 gcc_unreachable ();
10312 if (cond_or == DOM_CC_X_AND_Y)
10324 gcc_unreachable ();
10328 if (cond_or == DOM_CC_X_AND_Y)
10329 return CC_DLTUmode;
10334 return CC_DLTUmode;
10336 return CC_DLEUmode;
10340 gcc_unreachable ();
10344 if (cond_or == DOM_CC_X_AND_Y)
10345 return CC_DGTUmode;
10350 return CC_DGTUmode;
10352 return CC_DGEUmode;
10356 gcc_unreachable ();
10359 /* The remaining cases only occur when both comparisons are the
10362 gcc_assert (cond1 == cond2);
10366 gcc_assert (cond1 == cond2);
10370 gcc_assert (cond1 == cond2);
10374 gcc_assert (cond1 == cond2);
10375 return CC_DLEUmode;
10378 gcc_assert (cond1 == cond2);
10379 return CC_DGEUmode;
10382 gcc_unreachable ();
10387 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10389 /* All floating point compares return CCFP if it is an equality
10390 comparison, and CCFPE otherwise. */
10391 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10411 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10416 gcc_unreachable ();
10420 /* A compare with a shifted operand. Because of canonicalization, the
10421 comparison will have to be swapped when we emit the assembler. */
10422 if (GET_MODE (y) == SImode
10423 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10424 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10425 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10426 || GET_CODE (x) == ROTATERT))
10429 /* This operation is performed swapped, but since we only rely on the Z
10430 flag we don't need an additional mode. */
10431 if (GET_MODE (y) == SImode
10432 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10433 && GET_CODE (x) == NEG
10434 && (op == EQ || op == NE))
10437 /* This is a special case that is used by combine to allow a
10438 comparison of a shifted byte load to be split into a zero-extend
10439 followed by a comparison of the shifted integer (only valid for
10440 equalities and unsigned inequalities). */
10441 if (GET_MODE (x) == SImode
10442 && GET_CODE (x) == ASHIFT
10443 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10444 && GET_CODE (XEXP (x, 0)) == SUBREG
10445 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10446 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10447 && (op == EQ || op == NE
10448 || op == GEU || op == GTU || op == LTU || op == LEU)
10449 && GET_CODE (y) == CONST_INT)
10452 /* A construct for a conditional compare, if the false arm contains
10453 0, then both conditions must be true, otherwise either condition
10454 must be true. Not all conditions are possible, so CCmode is
10455 returned if it can't be done. */
10456 if (GET_CODE (x) == IF_THEN_ELSE
10457 && (XEXP (x, 2) == const0_rtx
10458 || XEXP (x, 2) == const1_rtx)
10459 && COMPARISON_P (XEXP (x, 0))
10460 && COMPARISON_P (XEXP (x, 1)))
10461 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10462 INTVAL (XEXP (x, 2)));
10464 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10465 if (GET_CODE (x) == AND
10466 && COMPARISON_P (XEXP (x, 0))
10467 && COMPARISON_P (XEXP (x, 1)))
10468 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10471 if (GET_CODE (x) == IOR
10472 && COMPARISON_P (XEXP (x, 0))
10473 && COMPARISON_P (XEXP (x, 1)))
10474 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10477 /* An operation (on Thumb) where we want to test for a single bit.
10478 This is done by shifting that bit up into the top bit of a
10479 scratch register; we can then branch on the sign bit. */
10481 && GET_MODE (x) == SImode
10482 && (op == EQ || op == NE)
10483 && GET_CODE (x) == ZERO_EXTRACT
10484 && XEXP (x, 1) == const1_rtx)
10487 /* An operation that sets the condition codes as a side-effect, the
10488 V flag is not set correctly, so we can only use comparisons where
10489 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10491 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10492 if (GET_MODE (x) == SImode
10494 && (op == EQ || op == NE || op == LT || op == GE)
10495 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10496 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10497 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10498 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10499 || GET_CODE (x) == LSHIFTRT
10500 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10501 || GET_CODE (x) == ROTATERT
10502 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10503 return CC_NOOVmode;
10505 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10508 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10509 && GET_CODE (x) == PLUS
10510 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10513 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10515 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10517 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10524 /* A DImode comparison against zero can be implemented by
10525 or'ing the two halves together. */
10526 if (y == const0_rtx)
10529 /* We can do an equality test in three Thumb instructions. */
10539 /* DImode unsigned comparisons can be implemented by cmp +
10540 cmpeq without a scratch register. Not worth doing in
10551 /* DImode signed and unsigned comparisons can be implemented
10552 by cmp + sbcs with a scratch register, but that does not
10553 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10554 gcc_assert (op != EQ && op != NE);
10558 gcc_unreachable ();
10565 /* X and Y are two things to compare using CODE. Emit the compare insn and
10566 return the rtx for register 0 in the proper mode. FP means this is a
10567 floating point compare: I don't think that it is needed on the arm. */
10569 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10571 enum machine_mode mode;
10573 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10575 /* We might have X as a constant, Y as a register because of the predicates
10576 used for cmpdi. If so, force X to a register here. */
10577 if (dimode_comparison && !REG_P (x))
10578 x = force_reg (DImode, x);
10580 mode = SELECT_CC_MODE (code, x, y);
10581 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10583 if (dimode_comparison
10584 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10585 && mode != CC_CZmode)
10589 /* To compare two non-zero values for equality, XOR them and
10590 then compare against zero. Not used for ARM mode; there
10591 CC_CZmode is cheaper. */
10592 if (mode == CC_Zmode && y != const0_rtx)
10594 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10597 /* A scratch register is required. */
10598 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10599 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10600 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10603 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10608 /* Generate a sequence of insns that will generate the correct return
10609 address mask depending on the physical architecture that the program
10612 arm_gen_return_addr_mask (void)
10614 rtx reg = gen_reg_rtx (Pmode);
10616 emit_insn (gen_return_addr_mask (reg));
10621 arm_reload_in_hi (rtx *operands)
10623 rtx ref = operands[1];
10625 HOST_WIDE_INT offset = 0;
10627 if (GET_CODE (ref) == SUBREG)
10629 offset = SUBREG_BYTE (ref);
10630 ref = SUBREG_REG (ref);
10633 if (GET_CODE (ref) == REG)
10635 /* We have a pseudo which has been spilt onto the stack; there
10636 are two cases here: the first where there is a simple
10637 stack-slot replacement and a second where the stack-slot is
10638 out of range, or is used as a subreg. */
10639 if (reg_equiv_mem[REGNO (ref)])
10641 ref = reg_equiv_mem[REGNO (ref)];
10642 base = find_replacement (&XEXP (ref, 0));
10645 /* The slot is out of range, or was dressed up in a SUBREG. */
10646 base = reg_equiv_address[REGNO (ref)];
10649 base = find_replacement (&XEXP (ref, 0));
10651 /* Handle the case where the address is too complex to be offset by 1. */
10652 if (GET_CODE (base) == MINUS
10653 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10655 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10657 emit_set_insn (base_plus, base);
10660 else if (GET_CODE (base) == PLUS)
10662 /* The addend must be CONST_INT, or we would have dealt with it above. */
10663 HOST_WIDE_INT hi, lo;
10665 offset += INTVAL (XEXP (base, 1));
10666 base = XEXP (base, 0);
10668 /* Rework the address into a legal sequence of insns. */
10669 /* Valid range for lo is -4095 -> 4095 */
10672 : -((-offset) & 0xfff));
10674 /* Corner case, if lo is the max offset then we would be out of range
10675 once we have added the additional 1 below, so bump the msb into the
10676 pre-loading insn(s). */
10680 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10681 ^ (HOST_WIDE_INT) 0x80000000)
10682 - (HOST_WIDE_INT) 0x80000000);
10684 gcc_assert (hi + lo == offset);
10688 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10690 /* Get the base address; addsi3 knows how to handle constants
10691 that require more than one insn. */
10692 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10698 /* Operands[2] may overlap operands[0] (though it won't overlap
10699 operands[1]), that's why we asked for a DImode reg -- so we can
10700 use the bit that does not overlap. */
10701 if (REGNO (operands[2]) == REGNO (operands[0]))
10702 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10704 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10706 emit_insn (gen_zero_extendqisi2 (scratch,
10707 gen_rtx_MEM (QImode,
10708 plus_constant (base,
10710 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10711 gen_rtx_MEM (QImode,
10712 plus_constant (base,
10714 if (!BYTES_BIG_ENDIAN)
10715 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10716 gen_rtx_IOR (SImode,
10719 gen_rtx_SUBREG (SImode, operands[0], 0),
10723 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10724 gen_rtx_IOR (SImode,
10725 gen_rtx_ASHIFT (SImode, scratch,
10727 gen_rtx_SUBREG (SImode, operands[0], 0)));
10730 /* Handle storing a half-word to memory during reload by synthesizing as two
10731 byte stores. Take care not to clobber the input values until after we
10732 have moved them somewhere safe. This code assumes that if the DImode
10733 scratch in operands[2] overlaps either the input value or output address
10734 in some way, then that value must die in this insn (we absolutely need
10735 two scratch registers for some corner cases). */
10737 arm_reload_out_hi (rtx *operands)
10739 rtx ref = operands[0];
10740 rtx outval = operands[1];
10742 HOST_WIDE_INT offset = 0;
10744 if (GET_CODE (ref) == SUBREG)
10746 offset = SUBREG_BYTE (ref);
10747 ref = SUBREG_REG (ref);
10750 if (GET_CODE (ref) == REG)
10752 /* We have a pseudo which has been spilt onto the stack; there
10753 are two cases here: the first where there is a simple
10754 stack-slot replacement and a second where the stack-slot is
10755 out of range, or is used as a subreg. */
10756 if (reg_equiv_mem[REGNO (ref)])
10758 ref = reg_equiv_mem[REGNO (ref)];
10759 base = find_replacement (&XEXP (ref, 0));
10762 /* The slot is out of range, or was dressed up in a SUBREG. */
10763 base = reg_equiv_address[REGNO (ref)];
10766 base = find_replacement (&XEXP (ref, 0));
10768 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10770 /* Handle the case where the address is too complex to be offset by 1. */
10771 if (GET_CODE (base) == MINUS
10772 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10774 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10776 /* Be careful not to destroy OUTVAL. */
10777 if (reg_overlap_mentioned_p (base_plus, outval))
10779 /* Updating base_plus might destroy outval, see if we can
10780 swap the scratch and base_plus. */
10781 if (!reg_overlap_mentioned_p (scratch, outval))
10784 scratch = base_plus;
10789 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10791 /* Be conservative and copy OUTVAL into the scratch now,
10792 this should only be necessary if outval is a subreg
10793 of something larger than a word. */
10794 /* XXX Might this clobber base? I can't see how it can,
10795 since scratch is known to overlap with OUTVAL, and
10796 must be wider than a word. */
10797 emit_insn (gen_movhi (scratch_hi, outval));
10798 outval = scratch_hi;
10802 emit_set_insn (base_plus, base);
10805 else if (GET_CODE (base) == PLUS)
10807 /* The addend must be CONST_INT, or we would have dealt with it above. */
10808 HOST_WIDE_INT hi, lo;
10810 offset += INTVAL (XEXP (base, 1));
10811 base = XEXP (base, 0);
10813 /* Rework the address into a legal sequence of insns. */
10814 /* Valid range for lo is -4095 -> 4095 */
10817 : -((-offset) & 0xfff));
10819 /* Corner case, if lo is the max offset then we would be out of range
10820 once we have added the additional 1 below, so bump the msb into the
10821 pre-loading insn(s). */
10825 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10826 ^ (HOST_WIDE_INT) 0x80000000)
10827 - (HOST_WIDE_INT) 0x80000000);
10829 gcc_assert (hi + lo == offset);
10833 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10835 /* Be careful not to destroy OUTVAL. */
10836 if (reg_overlap_mentioned_p (base_plus, outval))
10838 /* Updating base_plus might destroy outval, see if we
10839 can swap the scratch and base_plus. */
10840 if (!reg_overlap_mentioned_p (scratch, outval))
10843 scratch = base_plus;
10848 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10850 /* Be conservative and copy outval into scratch now,
10851 this should only be necessary if outval is a
10852 subreg of something larger than a word. */
10853 /* XXX Might this clobber base? I can't see how it
10854 can, since scratch is known to overlap with
10856 emit_insn (gen_movhi (scratch_hi, outval));
10857 outval = scratch_hi;
10861 /* Get the base address; addsi3 knows how to handle constants
10862 that require more than one insn. */
10863 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10869 if (BYTES_BIG_ENDIAN)
10871 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10872 plus_constant (base, offset + 1)),
10873 gen_lowpart (QImode, outval)));
10874 emit_insn (gen_lshrsi3 (scratch,
10875 gen_rtx_SUBREG (SImode, outval, 0),
10877 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10878 gen_lowpart (QImode, scratch)));
10882 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10883 gen_lowpart (QImode, outval)));
10884 emit_insn (gen_lshrsi3 (scratch,
10885 gen_rtx_SUBREG (SImode, outval, 0),
10887 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10888 plus_constant (base, offset + 1)),
10889 gen_lowpart (QImode, scratch)));
10893 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10894 (padded to the size of a word) should be passed in a register. */
10897 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10899 if (TARGET_AAPCS_BASED)
10900 return must_pass_in_stack_var_size (mode, type);
10902 return must_pass_in_stack_var_size_or_pad (mode, type);
10906 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10907 Return true if an argument passed on the stack should be padded upwards,
10908 i.e. if the least-significant byte has useful data.
10909 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10910 aggregate types are placed in the lowest memory address. */
10913 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10915 if (!TARGET_AAPCS_BASED)
10916 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10918 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10925 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10926 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10927 byte of the register has useful data, and return the opposite if the
10928 most significant byte does.
10929 For AAPCS, small aggregates and small complex types are always padded
10933 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10934 tree type, int first ATTRIBUTE_UNUSED)
10936 if (TARGET_AAPCS_BASED
10937 && BYTES_BIG_ENDIAN
10938 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10939 && int_size_in_bytes (type) <= 4)
10942 /* Otherwise, use default padding. */
10943 return !BYTES_BIG_ENDIAN;
10947 /* Print a symbolic form of X to the debug file, F. */
10949 arm_print_value (FILE *f, rtx x)
10951 switch (GET_CODE (x))
10954 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10958 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10966 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10968 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10969 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10977 fprintf (f, "\"%s\"", XSTR (x, 0));
10981 fprintf (f, "`%s'", XSTR (x, 0));
10985 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10989 arm_print_value (f, XEXP (x, 0));
10993 arm_print_value (f, XEXP (x, 0));
10995 arm_print_value (f, XEXP (x, 1));
11003 fprintf (f, "????");
11008 /* Routines for manipulation of the constant pool. */
11010 /* Arm instructions cannot load a large constant directly into a
11011 register; they have to come from a pc relative load. The constant
11012 must therefore be placed in the addressable range of the pc
11013 relative load. Depending on the precise pc relative load
11014 instruction the range is somewhere between 256 bytes and 4k. This
11015 means that we often have to dump a constant inside a function, and
11016 generate code to branch around it.
11018 It is important to minimize this, since the branches will slow
11019 things down and make the code larger.
11021 Normally we can hide the table after an existing unconditional
11022 branch so that there is no interruption of the flow, but in the
11023 worst case the code looks like this:
11041 We fix this by performing a scan after scheduling, which notices
11042 which instructions need to have their operands fetched from the
11043 constant table and builds the table.
11045 The algorithm starts by building a table of all the constants that
11046 need fixing up and all the natural barriers in the function (places
11047 where a constant table can be dropped without breaking the flow).
11048 For each fixup we note how far the pc-relative replacement will be
11049 able to reach and the offset of the instruction into the function.
11051 Having built the table we then group the fixes together to form
11052 tables that are as large as possible (subject to addressing
11053 constraints) and emit each table of constants after the last
11054 barrier that is within range of all the instructions in the group.
11055 If a group does not contain a barrier, then we forcibly create one
11056 by inserting a jump instruction into the flow. Once the table has
11057 been inserted, the insns are then modified to reference the
11058 relevant entry in the pool.
11060 Possible enhancements to the algorithm (not implemented) are:
11062 1) For some processors and object formats, there may be benefit in
11063 aligning the pools to the start of cache lines; this alignment
11064 would need to be taken into account when calculating addressability
11067 /* These typedefs are located at the start of this file, so that
11068 they can be used in the prototypes there. This comment is to
11069 remind readers of that fact so that the following structures
11070 can be understood more easily.
11072 typedef struct minipool_node Mnode;
11073 typedef struct minipool_fixup Mfix; */
11075 struct minipool_node
11077 /* Doubly linked chain of entries. */
11080 /* The maximum offset into the code that this entry can be placed. While
11081 pushing fixes for forward references, all entries are sorted in order
11082 of increasing max_address. */
11083 HOST_WIDE_INT max_address;
11084 /* Similarly for an entry inserted for a backwards ref. */
11085 HOST_WIDE_INT min_address;
11086 /* The number of fixes referencing this entry. This can become zero
11087 if we "unpush" an entry. In this case we ignore the entry when we
11088 come to emit the code. */
11090 /* The offset from the start of the minipool. */
11091 HOST_WIDE_INT offset;
11092 /* The value in table. */
11094 /* The mode of value. */
11095 enum machine_mode mode;
11096 /* The size of the value. With iWMMXt enabled
11097 sizes > 4 also imply an alignment of 8-bytes. */
11101 struct minipool_fixup
11105 HOST_WIDE_INT address;
11107 enum machine_mode mode;
11111 HOST_WIDE_INT forwards;
11112 HOST_WIDE_INT backwards;
11115 /* Fixes less than a word need padding out to a word boundary. */
11116 #define MINIPOOL_FIX_SIZE(mode) \
11117 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11119 static Mnode * minipool_vector_head;
11120 static Mnode * minipool_vector_tail;
11121 static rtx minipool_vector_label;
11122 static int minipool_pad;
11124 /* The linked list of all minipool fixes required for this function. */
11125 Mfix * minipool_fix_head;
11126 Mfix * minipool_fix_tail;
11127 /* The fix entry for the current minipool, once it has been placed. */
11128 Mfix * minipool_barrier;
11130 /* Determines if INSN is the start of a jump table. Returns the end
11131 of the TABLE or NULL_RTX. */
11133 is_jump_table (rtx insn)
11137 if (GET_CODE (insn) == JUMP_INSN
11138 && JUMP_LABEL (insn) != NULL
11139 && ((table = next_real_insn (JUMP_LABEL (insn)))
11140 == next_real_insn (insn))
11142 && GET_CODE (table) == JUMP_INSN
11143 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11144 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11150 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11151 #define JUMP_TABLES_IN_TEXT_SECTION 0
11154 static HOST_WIDE_INT
11155 get_jump_table_size (rtx insn)
11157 /* ADDR_VECs only take room if read-only data does into the text
11159 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11161 rtx body = PATTERN (insn);
11162 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11163 HOST_WIDE_INT size;
11164 HOST_WIDE_INT modesize;
11166 modesize = GET_MODE_SIZE (GET_MODE (body));
11167 size = modesize * XVECLEN (body, elt);
11171 /* Round up size of TBB table to a halfword boundary. */
11172 size = (size + 1) & ~(HOST_WIDE_INT)1;
11175 /* No padding necessary for TBH. */
11178 /* Add two bytes for alignment on Thumb. */
11183 gcc_unreachable ();
11191 /* Move a minipool fix MP from its current location to before MAX_MP.
11192 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11193 constraints may need updating. */
11195 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11196 HOST_WIDE_INT max_address)
11198 /* The code below assumes these are different. */
11199 gcc_assert (mp != max_mp);
11201 if (max_mp == NULL)
11203 if (max_address < mp->max_address)
11204 mp->max_address = max_address;
11208 if (max_address > max_mp->max_address - mp->fix_size)
11209 mp->max_address = max_mp->max_address - mp->fix_size;
11211 mp->max_address = max_address;
11213 /* Unlink MP from its current position. Since max_mp is non-null,
11214 mp->prev must be non-null. */
11215 mp->prev->next = mp->next;
11216 if (mp->next != NULL)
11217 mp->next->prev = mp->prev;
11219 minipool_vector_tail = mp->prev;
11221 /* Re-insert it before MAX_MP. */
11223 mp->prev = max_mp->prev;
11226 if (mp->prev != NULL)
11227 mp->prev->next = mp;
11229 minipool_vector_head = mp;
11232 /* Save the new entry. */
11235 /* Scan over the preceding entries and adjust their addresses as
11237 while (mp->prev != NULL
11238 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11240 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11247 /* Add a constant to the minipool for a forward reference. Returns the
11248 node added or NULL if the constant will not fit in this pool. */
11250 add_minipool_forward_ref (Mfix *fix)
11252 /* If set, max_mp is the first pool_entry that has a lower
11253 constraint than the one we are trying to add. */
11254 Mnode * max_mp = NULL;
11255 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11258 /* If the minipool starts before the end of FIX->INSN then this FIX
11259 can not be placed into the current pool. Furthermore, adding the
11260 new constant pool entry may cause the pool to start FIX_SIZE bytes
11262 if (minipool_vector_head &&
11263 (fix->address + get_attr_length (fix->insn)
11264 >= minipool_vector_head->max_address - fix->fix_size))
11267 /* Scan the pool to see if a constant with the same value has
11268 already been added. While we are doing this, also note the
11269 location where we must insert the constant if it doesn't already
11271 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11273 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11274 && fix->mode == mp->mode
11275 && (GET_CODE (fix->value) != CODE_LABEL
11276 || (CODE_LABEL_NUMBER (fix->value)
11277 == CODE_LABEL_NUMBER (mp->value)))
11278 && rtx_equal_p (fix->value, mp->value))
11280 /* More than one fix references this entry. */
11282 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11285 /* Note the insertion point if necessary. */
11287 && mp->max_address > max_address)
11290 /* If we are inserting an 8-bytes aligned quantity and
11291 we have not already found an insertion point, then
11292 make sure that all such 8-byte aligned quantities are
11293 placed at the start of the pool. */
11294 if (ARM_DOUBLEWORD_ALIGN
11296 && fix->fix_size >= 8
11297 && mp->fix_size < 8)
11300 max_address = mp->max_address;
11304 /* The value is not currently in the minipool, so we need to create
11305 a new entry for it. If MAX_MP is NULL, the entry will be put on
11306 the end of the list since the placement is less constrained than
11307 any existing entry. Otherwise, we insert the new fix before
11308 MAX_MP and, if necessary, adjust the constraints on the other
11311 mp->fix_size = fix->fix_size;
11312 mp->mode = fix->mode;
11313 mp->value = fix->value;
11315 /* Not yet required for a backwards ref. */
11316 mp->min_address = -65536;
11318 if (max_mp == NULL)
11320 mp->max_address = max_address;
11322 mp->prev = minipool_vector_tail;
11324 if (mp->prev == NULL)
11326 minipool_vector_head = mp;
11327 minipool_vector_label = gen_label_rtx ();
11330 mp->prev->next = mp;
11332 minipool_vector_tail = mp;
11336 if (max_address > max_mp->max_address - mp->fix_size)
11337 mp->max_address = max_mp->max_address - mp->fix_size;
11339 mp->max_address = max_address;
11342 mp->prev = max_mp->prev;
11344 if (mp->prev != NULL)
11345 mp->prev->next = mp;
11347 minipool_vector_head = mp;
11350 /* Save the new entry. */
11353 /* Scan over the preceding entries and adjust their addresses as
11355 while (mp->prev != NULL
11356 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11358 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11366 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11367 HOST_WIDE_INT min_address)
11369 HOST_WIDE_INT offset;
11371 /* The code below assumes these are different. */
11372 gcc_assert (mp != min_mp);
11374 if (min_mp == NULL)
11376 if (min_address > mp->min_address)
11377 mp->min_address = min_address;
11381 /* We will adjust this below if it is too loose. */
11382 mp->min_address = min_address;
11384 /* Unlink MP from its current position. Since min_mp is non-null,
11385 mp->next must be non-null. */
11386 mp->next->prev = mp->prev;
11387 if (mp->prev != NULL)
11388 mp->prev->next = mp->next;
11390 minipool_vector_head = mp->next;
11392 /* Reinsert it after MIN_MP. */
11394 mp->next = min_mp->next;
11396 if (mp->next != NULL)
11397 mp->next->prev = mp;
11399 minipool_vector_tail = mp;
11405 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11407 mp->offset = offset;
11408 if (mp->refcount > 0)
11409 offset += mp->fix_size;
11411 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11412 mp->next->min_address = mp->min_address + mp->fix_size;
11418 /* Add a constant to the minipool for a backward reference. Returns the
11419 node added or NULL if the constant will not fit in this pool.
11421 Note that the code for insertion for a backwards reference can be
11422 somewhat confusing because the calculated offsets for each fix do
11423 not take into account the size of the pool (which is still under
11426 add_minipool_backward_ref (Mfix *fix)
11428 /* If set, min_mp is the last pool_entry that has a lower constraint
11429 than the one we are trying to add. */
11430 Mnode *min_mp = NULL;
11431 /* This can be negative, since it is only a constraint. */
11432 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11435 /* If we can't reach the current pool from this insn, or if we can't
11436 insert this entry at the end of the pool without pushing other
11437 fixes out of range, then we don't try. This ensures that we
11438 can't fail later on. */
11439 if (min_address >= minipool_barrier->address
11440 || (minipool_vector_tail->min_address + fix->fix_size
11441 >= minipool_barrier->address))
11444 /* Scan the pool to see if a constant with the same value has
11445 already been added. While we are doing this, also note the
11446 location where we must insert the constant if it doesn't already
11448 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11450 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11451 && fix->mode == mp->mode
11452 && (GET_CODE (fix->value) != CODE_LABEL
11453 || (CODE_LABEL_NUMBER (fix->value)
11454 == CODE_LABEL_NUMBER (mp->value)))
11455 && rtx_equal_p (fix->value, mp->value)
11456 /* Check that there is enough slack to move this entry to the
11457 end of the table (this is conservative). */
11458 && (mp->max_address
11459 > (minipool_barrier->address
11460 + minipool_vector_tail->offset
11461 + minipool_vector_tail->fix_size)))
11464 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11467 if (min_mp != NULL)
11468 mp->min_address += fix->fix_size;
11471 /* Note the insertion point if necessary. */
11472 if (mp->min_address < min_address)
11474 /* For now, we do not allow the insertion of 8-byte alignment
11475 requiring nodes anywhere but at the start of the pool. */
11476 if (ARM_DOUBLEWORD_ALIGN
11477 && fix->fix_size >= 8 && mp->fix_size < 8)
11482 else if (mp->max_address
11483 < minipool_barrier->address + mp->offset + fix->fix_size)
11485 /* Inserting before this entry would push the fix beyond
11486 its maximum address (which can happen if we have
11487 re-located a forwards fix); force the new fix to come
11489 if (ARM_DOUBLEWORD_ALIGN
11490 && fix->fix_size >= 8 && mp->fix_size < 8)
11495 min_address = mp->min_address + fix->fix_size;
11498 /* Do not insert a non-8-byte aligned quantity before 8-byte
11499 aligned quantities. */
11500 else if (ARM_DOUBLEWORD_ALIGN
11501 && fix->fix_size < 8
11502 && mp->fix_size >= 8)
11505 min_address = mp->min_address + fix->fix_size;
11510 /* We need to create a new entry. */
11512 mp->fix_size = fix->fix_size;
11513 mp->mode = fix->mode;
11514 mp->value = fix->value;
11516 mp->max_address = minipool_barrier->address + 65536;
11518 mp->min_address = min_address;
11520 if (min_mp == NULL)
11523 mp->next = minipool_vector_head;
11525 if (mp->next == NULL)
11527 minipool_vector_tail = mp;
11528 minipool_vector_label = gen_label_rtx ();
11531 mp->next->prev = mp;
11533 minipool_vector_head = mp;
11537 mp->next = min_mp->next;
11541 if (mp->next != NULL)
11542 mp->next->prev = mp;
11544 minipool_vector_tail = mp;
11547 /* Save the new entry. */
11555 /* Scan over the following entries and adjust their offsets. */
11556 while (mp->next != NULL)
11558 if (mp->next->min_address < mp->min_address + mp->fix_size)
11559 mp->next->min_address = mp->min_address + mp->fix_size;
11562 mp->next->offset = mp->offset + mp->fix_size;
11564 mp->next->offset = mp->offset;
11573 assign_minipool_offsets (Mfix *barrier)
11575 HOST_WIDE_INT offset = 0;
11578 minipool_barrier = barrier;
11580 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11582 mp->offset = offset;
11584 if (mp->refcount > 0)
11585 offset += mp->fix_size;
11589 /* Output the literal table */
11591 dump_minipool (rtx scan)
11597 if (ARM_DOUBLEWORD_ALIGN)
11598 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11599 if (mp->refcount > 0 && mp->fix_size >= 8)
11606 fprintf (dump_file,
11607 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11608 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11610 scan = emit_label_after (gen_label_rtx (), scan);
11611 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11612 scan = emit_label_after (minipool_vector_label, scan);
11614 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11616 if (mp->refcount > 0)
11620 fprintf (dump_file,
11621 ";; Offset %u, min %ld, max %ld ",
11622 (unsigned) mp->offset, (unsigned long) mp->min_address,
11623 (unsigned long) mp->max_address);
11624 arm_print_value (dump_file, mp->value);
11625 fputc ('\n', dump_file);
11628 switch (mp->fix_size)
11630 #ifdef HAVE_consttable_1
11632 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11636 #ifdef HAVE_consttable_2
11638 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11642 #ifdef HAVE_consttable_4
11644 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11648 #ifdef HAVE_consttable_8
11650 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11654 #ifdef HAVE_consttable_16
11656 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11661 gcc_unreachable ();
11669 minipool_vector_head = minipool_vector_tail = NULL;
11670 scan = emit_insn_after (gen_consttable_end (), scan);
11671 scan = emit_barrier_after (scan);
11674 /* Return the cost of forcibly inserting a barrier after INSN. */
11676 arm_barrier_cost (rtx insn)
11678 /* Basing the location of the pool on the loop depth is preferable,
11679 but at the moment, the basic block information seems to be
11680 corrupt by this stage of the compilation. */
11681 int base_cost = 50;
11682 rtx next = next_nonnote_insn (insn);
11684 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11687 switch (GET_CODE (insn))
11690 /* It will always be better to place the table before the label, rather
11699 return base_cost - 10;
11702 return base_cost + 10;
11706 /* Find the best place in the insn stream in the range
11707 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11708 Create the barrier by inserting a jump and add a new fix entry for
11711 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11713 HOST_WIDE_INT count = 0;
11715 rtx from = fix->insn;
11716 /* The instruction after which we will insert the jump. */
11717 rtx selected = NULL;
11719 /* The address at which the jump instruction will be placed. */
11720 HOST_WIDE_INT selected_address;
11722 HOST_WIDE_INT max_count = max_address - fix->address;
11723 rtx label = gen_label_rtx ();
11725 selected_cost = arm_barrier_cost (from);
11726 selected_address = fix->address;
11728 while (from && count < max_count)
11733 /* This code shouldn't have been called if there was a natural barrier
11735 gcc_assert (GET_CODE (from) != BARRIER);
11737 /* Count the length of this insn. */
11738 count += get_attr_length (from);
11740 /* If there is a jump table, add its length. */
11741 tmp = is_jump_table (from);
11744 count += get_jump_table_size (tmp);
11746 /* Jump tables aren't in a basic block, so base the cost on
11747 the dispatch insn. If we select this location, we will
11748 still put the pool after the table. */
11749 new_cost = arm_barrier_cost (from);
11751 if (count < max_count
11752 && (!selected || new_cost <= selected_cost))
11755 selected_cost = new_cost;
11756 selected_address = fix->address + count;
11759 /* Continue after the dispatch table. */
11760 from = NEXT_INSN (tmp);
11764 new_cost = arm_barrier_cost (from);
11766 if (count < max_count
11767 && (!selected || new_cost <= selected_cost))
11770 selected_cost = new_cost;
11771 selected_address = fix->address + count;
11774 from = NEXT_INSN (from);
11777 /* Make sure that we found a place to insert the jump. */
11778 gcc_assert (selected);
11780 /* Create a new JUMP_INSN that branches around a barrier. */
11781 from = emit_jump_insn_after (gen_jump (label), selected);
11782 JUMP_LABEL (from) = label;
11783 barrier = emit_barrier_after (from);
11784 emit_label_after (label, barrier);
11786 /* Create a minipool barrier entry for the new barrier. */
11787 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11788 new_fix->insn = barrier;
11789 new_fix->address = selected_address;
11790 new_fix->next = fix->next;
11791 fix->next = new_fix;
11796 /* Record that there is a natural barrier in the insn stream at
11799 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11801 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11804 fix->address = address;
11807 if (minipool_fix_head != NULL)
11808 minipool_fix_tail->next = fix;
11810 minipool_fix_head = fix;
11812 minipool_fix_tail = fix;
11815 /* Record INSN, which will need fixing up to load a value from the
11816 minipool. ADDRESS is the offset of the insn since the start of the
11817 function; LOC is a pointer to the part of the insn which requires
11818 fixing; VALUE is the constant that must be loaded, which is of type
11821 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11822 enum machine_mode mode, rtx value)
11824 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11827 fix->address = address;
11830 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11831 fix->value = value;
11832 fix->forwards = get_attr_pool_range (insn);
11833 fix->backwards = get_attr_neg_pool_range (insn);
11834 fix->minipool = NULL;
11836 /* If an insn doesn't have a range defined for it, then it isn't
11837 expecting to be reworked by this code. Better to stop now than
11838 to generate duff assembly code. */
11839 gcc_assert (fix->forwards || fix->backwards);
11841 /* If an entry requires 8-byte alignment then assume all constant pools
11842 require 4 bytes of padding. Trying to do this later on a per-pool
11843 basis is awkward because existing pool entries have to be modified. */
11844 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11849 fprintf (dump_file,
11850 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11851 GET_MODE_NAME (mode),
11852 INSN_UID (insn), (unsigned long) address,
11853 -1 * (long)fix->backwards, (long)fix->forwards);
11854 arm_print_value (dump_file, fix->value);
11855 fprintf (dump_file, "\n");
11858 /* Add it to the chain of fixes. */
11861 if (minipool_fix_head != NULL)
11862 minipool_fix_tail->next = fix;
11864 minipool_fix_head = fix;
11866 minipool_fix_tail = fix;
11869 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11870 Returns the number of insns needed, or 99 if we don't know how to
11873 arm_const_double_inline_cost (rtx val)
11875 rtx lowpart, highpart;
11876 enum machine_mode mode;
11878 mode = GET_MODE (val);
11880 if (mode == VOIDmode)
11883 gcc_assert (GET_MODE_SIZE (mode) == 8);
11885 lowpart = gen_lowpart (SImode, val);
11886 highpart = gen_highpart_mode (SImode, mode, val);
11888 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11889 gcc_assert (GET_CODE (highpart) == CONST_INT);
11891 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11892 NULL_RTX, NULL_RTX, 0, 0)
11893 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11894 NULL_RTX, NULL_RTX, 0, 0));
11897 /* Return true if it is worthwhile to split a 64-bit constant into two
11898 32-bit operations. This is the case if optimizing for size, or
11899 if we have load delay slots, or if one 32-bit part can be done with
11900 a single data operation. */
11902 arm_const_double_by_parts (rtx val)
11904 enum machine_mode mode = GET_MODE (val);
11907 if (optimize_size || arm_ld_sched)
11910 if (mode == VOIDmode)
11913 part = gen_highpart_mode (SImode, mode, val);
11915 gcc_assert (GET_CODE (part) == CONST_INT);
11917 if (const_ok_for_arm (INTVAL (part))
11918 || const_ok_for_arm (~INTVAL (part)))
11921 part = gen_lowpart (SImode, val);
11923 gcc_assert (GET_CODE (part) == CONST_INT);
11925 if (const_ok_for_arm (INTVAL (part))
11926 || const_ok_for_arm (~INTVAL (part)))
11932 /* Return true if it is possible to inline both the high and low parts
11933 of a 64-bit constant into 32-bit data processing instructions. */
11935 arm_const_double_by_immediates (rtx val)
11937 enum machine_mode mode = GET_MODE (val);
11940 if (mode == VOIDmode)
11943 part = gen_highpart_mode (SImode, mode, val);
11945 gcc_assert (GET_CODE (part) == CONST_INT);
11947 if (!const_ok_for_arm (INTVAL (part)))
11950 part = gen_lowpart (SImode, val);
11952 gcc_assert (GET_CODE (part) == CONST_INT);
11954 if (!const_ok_for_arm (INTVAL (part)))
11960 /* Scan INSN and note any of its operands that need fixing.
11961 If DO_PUSHES is false we do not actually push any of the fixups
11962 needed. The function returns TRUE if any fixups were needed/pushed.
11963 This is used by arm_memory_load_p() which needs to know about loads
11964 of constants that will be converted into minipool loads. */
11966 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11968 bool result = false;
11971 extract_insn (insn);
11973 if (!constrain_operands (1))
11974 fatal_insn_not_found (insn);
11976 if (recog_data.n_alternatives == 0)
11979 /* Fill in recog_op_alt with information about the constraints of
11981 preprocess_constraints ();
11983 for (opno = 0; opno < recog_data.n_operands; opno++)
11985 /* Things we need to fix can only occur in inputs. */
11986 if (recog_data.operand_type[opno] != OP_IN)
11989 /* If this alternative is a memory reference, then any mention
11990 of constants in this alternative is really to fool reload
11991 into allowing us to accept one there. We need to fix them up
11992 now so that we output the right code. */
11993 if (recog_op_alt[opno][which_alternative].memory_ok)
11995 rtx op = recog_data.operand[opno];
11997 if (CONSTANT_P (op))
12000 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12001 recog_data.operand_mode[opno], op);
12004 else if (GET_CODE (op) == MEM
12005 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12006 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12010 rtx cop = avoid_constant_pool_reference (op);
12012 /* Casting the address of something to a mode narrower
12013 than a word can cause avoid_constant_pool_reference()
12014 to return the pool reference itself. That's no good to
12015 us here. Lets just hope that we can use the
12016 constant pool value directly. */
12018 cop = get_pool_constant (XEXP (op, 0));
12020 push_minipool_fix (insn, address,
12021 recog_data.operand_loc[opno],
12022 recog_data.operand_mode[opno], cop);
12033 /* Convert instructions to their cc-clobbering variant if possible, since
12034 that allows us to use smaller encodings. */
12037 thumb2_reorg (void)
12042 INIT_REG_SET (&live);
12044 /* We are freeing block_for_insn in the toplev to keep compatibility
12045 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12046 compute_bb_for_insn ();
12052 COPY_REG_SET (&live, DF_LR_OUT (bb));
12053 df_simulate_initialize_backwards (bb, &live);
12054 FOR_BB_INSNS_REVERSE (bb, insn)
12056 if (NONJUMP_INSN_P (insn)
12057 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12059 rtx pat = PATTERN (insn);
12060 if (GET_CODE (pat) == SET
12061 && low_register_operand (XEXP (pat, 0), SImode)
12062 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12063 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12064 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12066 rtx dst = XEXP (pat, 0);
12067 rtx src = XEXP (pat, 1);
12068 rtx op0 = XEXP (src, 0);
12069 if (rtx_equal_p (dst, op0)
12070 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12072 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12073 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12074 rtvec vec = gen_rtvec (2, pat, clobber);
12075 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12076 INSN_CODE (insn) = -1;
12080 if (NONDEBUG_INSN_P (insn))
12081 df_simulate_one_insn_backwards (bb, insn, &live);
12084 CLEAR_REG_SET (&live);
12087 /* Gcc puts the pool in the wrong place for ARM, since we can only
12088 load addresses a limited distance around the pc. We do some
12089 special munging to move the constant pool values to the correct
12090 point in the code. */
12095 HOST_WIDE_INT address = 0;
12101 minipool_fix_head = minipool_fix_tail = NULL;
12103 /* The first insn must always be a note, or the code below won't
12104 scan it properly. */
12105 insn = get_insns ();
12106 gcc_assert (GET_CODE (insn) == NOTE);
12109 /* Scan all the insns and record the operands that will need fixing. */
12110 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12112 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12113 && (arm_cirrus_insn_p (insn)
12114 || GET_CODE (insn) == JUMP_INSN
12115 || arm_memory_load_p (insn)))
12116 cirrus_reorg (insn);
12118 if (GET_CODE (insn) == BARRIER)
12119 push_minipool_barrier (insn, address);
12120 else if (INSN_P (insn))
12124 note_invalid_constants (insn, address, true);
12125 address += get_attr_length (insn);
12127 /* If the insn is a vector jump, add the size of the table
12128 and skip the table. */
12129 if ((table = is_jump_table (insn)) != NULL)
12131 address += get_jump_table_size (table);
12137 fix = minipool_fix_head;
12139 /* Now scan the fixups and perform the required changes. */
12144 Mfix * last_added_fix;
12145 Mfix * last_barrier = NULL;
12148 /* Skip any further barriers before the next fix. */
12149 while (fix && GET_CODE (fix->insn) == BARRIER)
12152 /* No more fixes. */
12156 last_added_fix = NULL;
12158 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12160 if (GET_CODE (ftmp->insn) == BARRIER)
12162 if (ftmp->address >= minipool_vector_head->max_address)
12165 last_barrier = ftmp;
12167 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12170 last_added_fix = ftmp; /* Keep track of the last fix added. */
12173 /* If we found a barrier, drop back to that; any fixes that we
12174 could have reached but come after the barrier will now go in
12175 the next mini-pool. */
12176 if (last_barrier != NULL)
12178 /* Reduce the refcount for those fixes that won't go into this
12180 for (fdel = last_barrier->next;
12181 fdel && fdel != ftmp;
12184 fdel->minipool->refcount--;
12185 fdel->minipool = NULL;
12188 ftmp = last_barrier;
12192 /* ftmp is first fix that we can't fit into this pool and
12193 there no natural barriers that we could use. Insert a
12194 new barrier in the code somewhere between the previous
12195 fix and this one, and arrange to jump around it. */
12196 HOST_WIDE_INT max_address;
12198 /* The last item on the list of fixes must be a barrier, so
12199 we can never run off the end of the list of fixes without
12200 last_barrier being set. */
12203 max_address = minipool_vector_head->max_address;
12204 /* Check that there isn't another fix that is in range that
12205 we couldn't fit into this pool because the pool was
12206 already too large: we need to put the pool before such an
12207 instruction. The pool itself may come just after the
12208 fix because create_fix_barrier also allows space for a
12209 jump instruction. */
12210 if (ftmp->address < max_address)
12211 max_address = ftmp->address + 1;
12213 last_barrier = create_fix_barrier (last_added_fix, max_address);
12216 assign_minipool_offsets (last_barrier);
12220 if (GET_CODE (ftmp->insn) != BARRIER
12221 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12228 /* Scan over the fixes we have identified for this pool, fixing them
12229 up and adding the constants to the pool itself. */
12230 for (this_fix = fix; this_fix && ftmp != this_fix;
12231 this_fix = this_fix->next)
12232 if (GET_CODE (this_fix->insn) != BARRIER)
12235 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12236 minipool_vector_label),
12237 this_fix->minipool->offset);
12238 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12241 dump_minipool (last_barrier->insn);
12245 /* From now on we must synthesize any constants that we can't handle
12246 directly. This can happen if the RTL gets split during final
12247 instruction generation. */
12248 after_arm_reorg = 1;
12250 /* Free the minipool memory. */
12251 obstack_free (&minipool_obstack, minipool_startobj);
12254 /* Routines to output assembly language. */
12256 /* If the rtx is the correct value then return the string of the number.
12257 In this way we can ensure that valid double constants are generated even
12258 when cross compiling. */
12260 fp_immediate_constant (rtx x)
12265 if (!fp_consts_inited)
12268 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12269 for (i = 0; i < 8; i++)
12270 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12271 return strings_fp[i];
12273 gcc_unreachable ();
12276 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12277 static const char *
12278 fp_const_from_val (REAL_VALUE_TYPE *r)
12282 if (!fp_consts_inited)
12285 for (i = 0; i < 8; i++)
12286 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12287 return strings_fp[i];
12289 gcc_unreachable ();
12292 /* Output the operands of a LDM/STM instruction to STREAM.
12293 MASK is the ARM register set mask of which only bits 0-15 are important.
12294 REG is the base register, either the frame pointer or the stack pointer,
12295 INSTR is the possibly suffixed load or store instruction.
12296 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12299 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12300 unsigned long mask, int rfe)
12303 bool not_first = FALSE;
12305 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12306 fputc ('\t', stream);
12307 asm_fprintf (stream, instr, reg);
12308 fputc ('{', stream);
12310 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12311 if (mask & (1 << i))
12314 fprintf (stream, ", ");
12316 asm_fprintf (stream, "%r", i);
12321 fprintf (stream, "}^\n");
12323 fprintf (stream, "}\n");
12327 /* Output a FLDMD instruction to STREAM.
12328 BASE if the register containing the address.
12329 REG and COUNT specify the register range.
12330 Extra registers may be added to avoid hardware bugs.
12332 We output FLDMD even for ARMv5 VFP implementations. Although
12333 FLDMD is technically not supported until ARMv6, it is believed
12334 that all VFP implementations support its use in this context. */
12337 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12341 /* Workaround ARM10 VFPr1 bug. */
12342 if (count == 2 && !arm_arch6)
12349 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12350 load into multiple parts if we have to handle more than 16 registers. */
12353 vfp_output_fldmd (stream, base, reg, 16);
12354 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12358 fputc ('\t', stream);
12359 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12361 for (i = reg; i < reg + count; i++)
12364 fputs (", ", stream);
12365 asm_fprintf (stream, "d%d", i);
12367 fputs ("}\n", stream);
12372 /* Output the assembly for a store multiple. */
12375 vfp_output_fstmd (rtx * operands)
12382 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12383 p = strlen (pattern);
12385 gcc_assert (GET_CODE (operands[1]) == REG);
12387 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12388 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12390 p += sprintf (&pattern[p], ", d%d", base + i);
12392 strcpy (&pattern[p], "}");
12394 output_asm_insn (pattern, operands);
12399 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12400 number of bytes pushed. */
12403 vfp_emit_fstmd (int base_reg, int count)
12410 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12411 register pairs are stored by a store multiple insn. We avoid this
12412 by pushing an extra pair. */
12413 if (count == 2 && !arm_arch6)
12415 if (base_reg == LAST_VFP_REGNUM - 3)
12420 /* FSTMD may not store more than 16 doubleword registers at once. Split
12421 larger stores into multiple parts (up to a maximum of two, in
12426 /* NOTE: base_reg is an internal register number, so each D register
12428 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12429 saved += vfp_emit_fstmd (base_reg, 16);
12433 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12434 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12436 reg = gen_rtx_REG (DFmode, base_reg);
12439 XVECEXP (par, 0, 0)
12440 = gen_rtx_SET (VOIDmode,
12443 gen_rtx_PRE_MODIFY (Pmode,
12446 (stack_pointer_rtx,
12449 gen_rtx_UNSPEC (BLKmode,
12450 gen_rtvec (1, reg),
12451 UNSPEC_PUSH_MULT));
12453 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12454 plus_constant (stack_pointer_rtx, -(count * 8)));
12455 RTX_FRAME_RELATED_P (tmp) = 1;
12456 XVECEXP (dwarf, 0, 0) = tmp;
12458 tmp = gen_rtx_SET (VOIDmode,
12459 gen_frame_mem (DFmode, stack_pointer_rtx),
12461 RTX_FRAME_RELATED_P (tmp) = 1;
12462 XVECEXP (dwarf, 0, 1) = tmp;
12464 for (i = 1; i < count; i++)
12466 reg = gen_rtx_REG (DFmode, base_reg);
12468 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12470 tmp = gen_rtx_SET (VOIDmode,
12471 gen_frame_mem (DFmode,
12472 plus_constant (stack_pointer_rtx,
12475 RTX_FRAME_RELATED_P (tmp) = 1;
12476 XVECEXP (dwarf, 0, i + 1) = tmp;
12479 par = emit_insn (par);
12480 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12481 RTX_FRAME_RELATED_P (par) = 1;
12486 /* Emit a call instruction with pattern PAT. ADDR is the address of
12487 the call target. */
12490 arm_emit_call_insn (rtx pat, rtx addr)
12494 insn = emit_call_insn (pat);
12496 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12497 If the call might use such an entry, add a use of the PIC register
12498 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12499 if (TARGET_VXWORKS_RTP
12501 && GET_CODE (addr) == SYMBOL_REF
12502 && (SYMBOL_REF_DECL (addr)
12503 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12504 : !SYMBOL_REF_LOCAL_P (addr)))
12506 require_pic_register ();
12507 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12511 /* Output a 'call' insn. */
12513 output_call (rtx *operands)
12515 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12517 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12518 if (REGNO (operands[0]) == LR_REGNUM)
12520 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12521 output_asm_insn ("mov%?\t%0, %|lr", operands);
12524 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12526 if (TARGET_INTERWORK || arm_arch4t)
12527 output_asm_insn ("bx%?\t%0", operands);
12529 output_asm_insn ("mov%?\t%|pc, %0", operands);
12534 /* Output a 'call' insn that is a reference in memory. This is
12535 disabled for ARMv5 and we prefer a blx instead because otherwise
12536 there's a significant performance overhead. */
12538 output_call_mem (rtx *operands)
12540 gcc_assert (!arm_arch5);
12541 if (TARGET_INTERWORK)
12543 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12544 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12545 output_asm_insn ("bx%?\t%|ip", operands);
12547 else if (regno_use_in (LR_REGNUM, operands[0]))
12549 /* LR is used in the memory address. We load the address in the
12550 first instruction. It's safe to use IP as the target of the
12551 load since the call will kill it anyway. */
12552 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12553 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12555 output_asm_insn ("bx%?\t%|ip", operands);
12557 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12561 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12562 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12569 /* Output a move from arm registers to an fpa registers.
12570 OPERANDS[0] is an fpa register.
12571 OPERANDS[1] is the first registers of an arm register pair. */
12573 output_mov_long_double_fpa_from_arm (rtx *operands)
12575 int arm_reg0 = REGNO (operands[1]);
12578 gcc_assert (arm_reg0 != IP_REGNUM);
12580 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12581 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12582 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12584 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12585 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12590 /* Output a move from an fpa register to arm registers.
12591 OPERANDS[0] is the first registers of an arm register pair.
12592 OPERANDS[1] is an fpa register. */
12594 output_mov_long_double_arm_from_fpa (rtx *operands)
12596 int arm_reg0 = REGNO (operands[0]);
12599 gcc_assert (arm_reg0 != IP_REGNUM);
12601 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12602 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12603 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12605 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12606 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12610 /* Output a move from arm registers to arm registers of a long double
12611 OPERANDS[0] is the destination.
12612 OPERANDS[1] is the source. */
12614 output_mov_long_double_arm_from_arm (rtx *operands)
12616 /* We have to be careful here because the two might overlap. */
12617 int dest_start = REGNO (operands[0]);
12618 int src_start = REGNO (operands[1]);
12622 if (dest_start < src_start)
12624 for (i = 0; i < 3; i++)
12626 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12627 ops[1] = gen_rtx_REG (SImode, src_start + i);
12628 output_asm_insn ("mov%?\t%0, %1", ops);
12633 for (i = 2; i >= 0; i--)
12635 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12636 ops[1] = gen_rtx_REG (SImode, src_start + i);
12637 output_asm_insn ("mov%?\t%0, %1", ops);
12645 arm_emit_movpair (rtx dest, rtx src)
12647 /* If the src is an immediate, simplify it. */
12648 if (CONST_INT_P (src))
12650 HOST_WIDE_INT val = INTVAL (src);
12651 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12652 if ((val >> 16) & 0x0000ffff)
12653 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12655 GEN_INT ((val >> 16) & 0x0000ffff));
12658 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12659 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12662 /* Output a move from arm registers to an fpa registers.
12663 OPERANDS[0] is an fpa register.
12664 OPERANDS[1] is the first registers of an arm register pair. */
12666 output_mov_double_fpa_from_arm (rtx *operands)
12668 int arm_reg0 = REGNO (operands[1]);
12671 gcc_assert (arm_reg0 != IP_REGNUM);
12673 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12674 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12675 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12676 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12680 /* Output a move from an fpa register to arm registers.
12681 OPERANDS[0] is the first registers of an arm register pair.
12682 OPERANDS[1] is an fpa register. */
12684 output_mov_double_arm_from_fpa (rtx *operands)
12686 int arm_reg0 = REGNO (operands[0]);
12689 gcc_assert (arm_reg0 != IP_REGNUM);
12691 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12692 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12693 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12694 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12698 /* Output a move between double words.
12699 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12700 or MEM<-REG and all MEMs must be offsettable addresses. */
12702 output_move_double (rtx *operands)
12704 enum rtx_code code0 = GET_CODE (operands[0]);
12705 enum rtx_code code1 = GET_CODE (operands[1]);
12710 unsigned int reg0 = REGNO (operands[0]);
12712 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12714 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12716 switch (GET_CODE (XEXP (operands[1], 0)))
12720 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12721 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12723 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12727 gcc_assert (TARGET_LDRD);
12728 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12733 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12735 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12740 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12742 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12746 gcc_assert (TARGET_LDRD);
12747 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12752 /* Autoicrement addressing modes should never have overlapping
12753 base and destination registers, and overlapping index registers
12754 are already prohibited, so this doesn't need to worry about
12756 otherops[0] = operands[0];
12757 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12758 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12760 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12762 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12764 /* Registers overlap so split out the increment. */
12765 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12766 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12770 /* Use a single insn if we can.
12771 FIXME: IWMMXT allows offsets larger than ldrd can
12772 handle, fix these up with a pair of ldr. */
12774 || GET_CODE (otherops[2]) != CONST_INT
12775 || (INTVAL (otherops[2]) > -256
12776 && INTVAL (otherops[2]) < 256))
12777 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12780 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12781 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12787 /* Use a single insn if we can.
12788 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12789 fix these up with a pair of ldr. */
12791 || GET_CODE (otherops[2]) != CONST_INT
12792 || (INTVAL (otherops[2]) > -256
12793 && INTVAL (otherops[2]) < 256))
12794 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12797 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12798 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12805 /* We might be able to use ldrd %0, %1 here. However the range is
12806 different to ldr/adr, and it is broken on some ARMv7-M
12807 implementations. */
12808 /* Use the second register of the pair to avoid problematic
12810 otherops[1] = operands[1];
12811 output_asm_insn ("adr%?\t%0, %1", otherops);
12812 operands[1] = otherops[0];
12814 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12816 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12819 /* ??? This needs checking for thumb2. */
12821 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12822 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12824 otherops[0] = operands[0];
12825 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12826 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12828 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12830 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12832 switch ((int) INTVAL (otherops[2]))
12835 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12840 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12845 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12849 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12850 operands[1] = otherops[0];
12852 && (GET_CODE (otherops[2]) == REG
12854 || (GET_CODE (otherops[2]) == CONST_INT
12855 && INTVAL (otherops[2]) > -256
12856 && INTVAL (otherops[2]) < 256)))
12858 if (reg_overlap_mentioned_p (operands[0],
12862 /* Swap base and index registers over to
12863 avoid a conflict. */
12865 otherops[1] = otherops[2];
12868 /* If both registers conflict, it will usually
12869 have been fixed by a splitter. */
12870 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12871 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12873 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12874 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12878 otherops[0] = operands[0];
12879 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12884 if (GET_CODE (otherops[2]) == CONST_INT)
12886 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12887 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12889 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12892 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12895 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12898 return "ldr%(d%)\t%0, [%1]";
12900 return "ldm%(ia%)\t%1, %M0";
12904 otherops[1] = adjust_address (operands[1], SImode, 4);
12905 /* Take care of overlapping base/data reg. */
12906 if (reg_mentioned_p (operands[0], operands[1]))
12908 output_asm_insn ("ldr%?\t%0, %1", otherops);
12909 output_asm_insn ("ldr%?\t%0, %1", operands);
12913 output_asm_insn ("ldr%?\t%0, %1", operands);
12914 output_asm_insn ("ldr%?\t%0, %1", otherops);
12921 /* Constraints should ensure this. */
12922 gcc_assert (code0 == MEM && code1 == REG);
12923 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12925 switch (GET_CODE (XEXP (operands[0], 0)))
12929 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12931 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12935 gcc_assert (TARGET_LDRD);
12936 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12941 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12943 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12948 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12950 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12954 gcc_assert (TARGET_LDRD);
12955 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12960 otherops[0] = operands[1];
12961 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12962 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12964 /* IWMMXT allows offsets larger than ldrd can handle,
12965 fix these up with a pair of ldr. */
12967 && GET_CODE (otherops[2]) == CONST_INT
12968 && (INTVAL(otherops[2]) <= -256
12969 || INTVAL(otherops[2]) >= 256))
12971 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12973 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
12974 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12978 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12979 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
12982 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12983 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12985 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12989 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12990 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12992 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12995 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13001 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13007 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13012 && (GET_CODE (otherops[2]) == REG
13014 || (GET_CODE (otherops[2]) == CONST_INT
13015 && INTVAL (otherops[2]) > -256
13016 && INTVAL (otherops[2]) < 256)))
13018 otherops[0] = operands[1];
13019 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13020 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13026 otherops[0] = adjust_address (operands[0], SImode, 4);
13027 otherops[1] = operands[1];
13028 output_asm_insn ("str%?\t%1, %0", operands);
13029 output_asm_insn ("str%?\t%H1, %0", otherops);
13036 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13037 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13040 output_move_quad (rtx *operands)
13042 if (REG_P (operands[0]))
13044 /* Load, or reg->reg move. */
13046 if (MEM_P (operands[1]))
13048 switch (GET_CODE (XEXP (operands[1], 0)))
13051 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13056 output_asm_insn ("adr%?\t%0, %1", operands);
13057 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13061 gcc_unreachable ();
13069 gcc_assert (REG_P (operands[1]));
13071 dest = REGNO (operands[0]);
13072 src = REGNO (operands[1]);
13074 /* This seems pretty dumb, but hopefully GCC won't try to do it
13077 for (i = 0; i < 4; i++)
13079 ops[0] = gen_rtx_REG (SImode, dest + i);
13080 ops[1] = gen_rtx_REG (SImode, src + i);
13081 output_asm_insn ("mov%?\t%0, %1", ops);
13084 for (i = 3; i >= 0; i--)
13086 ops[0] = gen_rtx_REG (SImode, dest + i);
13087 ops[1] = gen_rtx_REG (SImode, src + i);
13088 output_asm_insn ("mov%?\t%0, %1", ops);
13094 gcc_assert (MEM_P (operands[0]));
13095 gcc_assert (REG_P (operands[1]));
13096 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13098 switch (GET_CODE (XEXP (operands[0], 0)))
13101 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13105 gcc_unreachable ();
13112 /* Output a VFP load or store instruction. */
13115 output_move_vfp (rtx *operands)
13117 rtx reg, mem, addr, ops[2];
13118 int load = REG_P (operands[0]);
13119 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13120 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13123 enum machine_mode mode;
13125 reg = operands[!load];
13126 mem = operands[load];
13128 mode = GET_MODE (reg);
13130 gcc_assert (REG_P (reg));
13131 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13132 gcc_assert (mode == SFmode
13136 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13137 gcc_assert (MEM_P (mem));
13139 addr = XEXP (mem, 0);
13141 switch (GET_CODE (addr))
13144 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13145 ops[0] = XEXP (addr, 0);
13150 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13151 ops[0] = XEXP (addr, 0);
13156 templ = "f%s%c%%?\t%%%s0, %%1%s";
13162 sprintf (buff, templ,
13163 load ? "ld" : "st",
13166 integer_p ? "\t%@ int" : "");
13167 output_asm_insn (buff, ops);
13172 /* Output a Neon quad-word load or store, or a load or store for
13173 larger structure modes.
13175 WARNING: The ordering of elements is weird in big-endian mode,
13176 because we use VSTM, as required by the EABI. GCC RTL defines
13177 element ordering based on in-memory order. This can be differ
13178 from the architectural ordering of elements within a NEON register.
13179 The intrinsics defined in arm_neon.h use the NEON register element
13180 ordering, not the GCC RTL element ordering.
13182 For example, the in-memory ordering of a big-endian a quadword
13183 vector with 16-bit elements when stored from register pair {d0,d1}
13184 will be (lowest address first, d0[N] is NEON register element N):
13186 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13188 When necessary, quadword registers (dN, dN+1) are moved to ARM
13189 registers from rN in the order:
13191 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13193 So that STM/LDM can be used on vectors in ARM registers, and the
13194 same memory layout will result as if VSTM/VLDM were used. */
13197 output_move_neon (rtx *operands)
13199 rtx reg, mem, addr, ops[2];
13200 int regno, load = REG_P (operands[0]);
13203 enum machine_mode mode;
13205 reg = operands[!load];
13206 mem = operands[load];
13208 mode = GET_MODE (reg);
13210 gcc_assert (REG_P (reg));
13211 regno = REGNO (reg);
13212 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13213 || NEON_REGNO_OK_FOR_QUAD (regno));
13214 gcc_assert (VALID_NEON_DREG_MODE (mode)
13215 || VALID_NEON_QREG_MODE (mode)
13216 || VALID_NEON_STRUCT_MODE (mode));
13217 gcc_assert (MEM_P (mem));
13219 addr = XEXP (mem, 0);
13221 /* Strip off const from addresses like (const (plus (...))). */
13222 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13223 addr = XEXP (addr, 0);
13225 switch (GET_CODE (addr))
13228 templ = "v%smia%%?\t%%0!, %%h1";
13229 ops[0] = XEXP (addr, 0);
13234 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13235 templ = "v%smdb%%?\t%%0!, %%h1";
13236 ops[0] = XEXP (addr, 0);
13241 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13242 gcc_unreachable ();
13247 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13250 for (i = 0; i < nregs; i++)
13252 /* We're only using DImode here because it's a convenient size. */
13253 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13254 ops[1] = adjust_address (mem, DImode, 8 * i);
13255 if (reg_overlap_mentioned_p (ops[0], mem))
13257 gcc_assert (overlap == -1);
13262 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13263 output_asm_insn (buff, ops);
13268 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13269 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13270 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13271 output_asm_insn (buff, ops);
13278 templ = "v%smia%%?\t%%m0, %%h1";
13283 sprintf (buff, templ, load ? "ld" : "st");
13284 output_asm_insn (buff, ops);
13289 /* Compute and return the length of neon_mov<mode>, where <mode> is
13290 one of VSTRUCT modes: EI, OI, CI or XI. */
13292 arm_attr_length_move_neon (rtx insn)
13294 rtx reg, mem, addr;
13296 enum machine_mode mode;
13298 extract_insn_cached (insn);
13300 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13302 mode = GET_MODE (recog_data.operand[0]);
13313 gcc_unreachable ();
13317 load = REG_P (recog_data.operand[0]);
13318 reg = recog_data.operand[!load];
13319 mem = recog_data.operand[load];
13321 gcc_assert (MEM_P (mem));
13323 mode = GET_MODE (reg);
13324 addr = XEXP (mem, 0);
13326 /* Strip off const from addresses like (const (plus (...))). */
13327 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13328 addr = XEXP (addr, 0);
13330 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13332 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13339 /* Output an ADD r, s, #n where n may be too big for one instruction.
13340 If adding zero to one register, output nothing. */
13342 output_add_immediate (rtx *operands)
13344 HOST_WIDE_INT n = INTVAL (operands[2]);
13346 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13349 output_multi_immediate (operands,
13350 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13353 output_multi_immediate (operands,
13354 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13361 /* Output a multiple immediate operation.
13362 OPERANDS is the vector of operands referred to in the output patterns.
13363 INSTR1 is the output pattern to use for the first constant.
13364 INSTR2 is the output pattern to use for subsequent constants.
13365 IMMED_OP is the index of the constant slot in OPERANDS.
13366 N is the constant value. */
13367 static const char *
13368 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13369 int immed_op, HOST_WIDE_INT n)
13371 #if HOST_BITS_PER_WIDE_INT > 32
13377 /* Quick and easy output. */
13378 operands[immed_op] = const0_rtx;
13379 output_asm_insn (instr1, operands);
13384 const char * instr = instr1;
13386 /* Note that n is never zero here (which would give no output). */
13387 for (i = 0; i < 32; i += 2)
13391 operands[immed_op] = GEN_INT (n & (255 << i));
13392 output_asm_insn (instr, operands);
13402 /* Return the name of a shifter operation. */
13403 static const char *
13404 arm_shift_nmem(enum rtx_code code)
13409 return ARM_LSL_NAME;
13425 /* Return the appropriate ARM instruction for the operation code.
13426 The returned result should not be overwritten. OP is the rtx of the
13427 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13430 arithmetic_instr (rtx op, int shift_first_arg)
13432 switch (GET_CODE (op))
13438 return shift_first_arg ? "rsb" : "sub";
13453 return arm_shift_nmem(GET_CODE(op));
13456 gcc_unreachable ();
13460 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13461 for the operation code. The returned result should not be overwritten.
13462 OP is the rtx code of the shift.
13463 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13465 static const char *
13466 shift_op (rtx op, HOST_WIDE_INT *amountp)
13469 enum rtx_code code = GET_CODE (op);
13471 switch (GET_CODE (XEXP (op, 1)))
13479 *amountp = INTVAL (XEXP (op, 1));
13483 gcc_unreachable ();
13489 gcc_assert (*amountp != -1);
13490 *amountp = 32 - *amountp;
13493 /* Fall through. */
13499 mnem = arm_shift_nmem(code);
13503 /* We never have to worry about the amount being other than a
13504 power of 2, since this case can never be reloaded from a reg. */
13505 gcc_assert (*amountp != -1);
13506 *amountp = int_log2 (*amountp);
13507 return ARM_LSL_NAME;
13510 gcc_unreachable ();
13513 if (*amountp != -1)
13515 /* This is not 100% correct, but follows from the desire to merge
13516 multiplication by a power of 2 with the recognizer for a
13517 shift. >=32 is not a valid shift for "lsl", so we must try and
13518 output a shift that produces the correct arithmetical result.
13519 Using lsr #32 is identical except for the fact that the carry bit
13520 is not set correctly if we set the flags; but we never use the
13521 carry bit from such an operation, so we can ignore that. */
13522 if (code == ROTATERT)
13523 /* Rotate is just modulo 32. */
13525 else if (*amountp != (*amountp & 31))
13527 if (code == ASHIFT)
13532 /* Shifts of 0 are no-ops. */
13540 /* Obtain the shift from the POWER of two. */
13542 static HOST_WIDE_INT
13543 int_log2 (HOST_WIDE_INT power)
13545 HOST_WIDE_INT shift = 0;
13547 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13549 gcc_assert (shift <= 31);
13556 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13557 because /bin/as is horribly restrictive. The judgement about
13558 whether or not each character is 'printable' (and can be output as
13559 is) or not (and must be printed with an octal escape) must be made
13560 with reference to the *host* character set -- the situation is
13561 similar to that discussed in the comments above pp_c_char in
13562 c-pretty-print.c. */
13564 #define MAX_ASCII_LEN 51
13567 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13570 int len_so_far = 0;
13572 fputs ("\t.ascii\t\"", stream);
13574 for (i = 0; i < len; i++)
13578 if (len_so_far >= MAX_ASCII_LEN)
13580 fputs ("\"\n\t.ascii\t\"", stream);
13586 if (c == '\\' || c == '\"')
13588 putc ('\\', stream);
13596 fprintf (stream, "\\%03o", c);
13601 fputs ("\"\n", stream);
13604 /* Compute the register save mask for registers 0 through 12
13605 inclusive. This code is used by arm_compute_save_reg_mask. */
13607 static unsigned long
13608 arm_compute_save_reg0_reg12_mask (void)
13610 unsigned long func_type = arm_current_func_type ();
13611 unsigned long save_reg_mask = 0;
13614 if (IS_INTERRUPT (func_type))
13616 unsigned int max_reg;
13617 /* Interrupt functions must not corrupt any registers,
13618 even call clobbered ones. If this is a leaf function
13619 we can just examine the registers used by the RTL, but
13620 otherwise we have to assume that whatever function is
13621 called might clobber anything, and so we have to save
13622 all the call-clobbered registers as well. */
13623 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13624 /* FIQ handlers have registers r8 - r12 banked, so
13625 we only need to check r0 - r7, Normal ISRs only
13626 bank r14 and r15, so we must check up to r12.
13627 r13 is the stack pointer which is always preserved,
13628 so we do not need to consider it here. */
13633 for (reg = 0; reg <= max_reg; reg++)
13634 if (df_regs_ever_live_p (reg)
13635 || (! current_function_is_leaf && call_used_regs[reg]))
13636 save_reg_mask |= (1 << reg);
13638 /* Also save the pic base register if necessary. */
13640 && !TARGET_SINGLE_PIC_BASE
13641 && arm_pic_register != INVALID_REGNUM
13642 && crtl->uses_pic_offset_table)
13643 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13645 else if (IS_VOLATILE(func_type))
13647 /* For noreturn functions we historically omitted register saves
13648 altogether. However this really messes up debugging. As a
13649 compromise save just the frame pointers. Combined with the link
13650 register saved elsewhere this should be sufficient to get
13652 if (frame_pointer_needed)
13653 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13654 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13655 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13656 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13657 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13661 /* In the normal case we only need to save those registers
13662 which are call saved and which are used by this function. */
13663 for (reg = 0; reg <= 11; reg++)
13664 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13665 save_reg_mask |= (1 << reg);
13667 /* Handle the frame pointer as a special case. */
13668 if (frame_pointer_needed)
13669 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13671 /* If we aren't loading the PIC register,
13672 don't stack it even though it may be live. */
13674 && !TARGET_SINGLE_PIC_BASE
13675 && arm_pic_register != INVALID_REGNUM
13676 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13677 || crtl->uses_pic_offset_table))
13678 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13680 /* The prologue will copy SP into R0, so save it. */
13681 if (IS_STACKALIGN (func_type))
13682 save_reg_mask |= 1;
13685 /* Save registers so the exception handler can modify them. */
13686 if (crtl->calls_eh_return)
13692 reg = EH_RETURN_DATA_REGNO (i);
13693 if (reg == INVALID_REGNUM)
13695 save_reg_mask |= 1 << reg;
13699 return save_reg_mask;
13703 /* Compute the number of bytes used to store the static chain register on the
13704 stack, above the stack frame. We need to know this accurately to get the
13705 alignment of the rest of the stack frame correct. */
13707 static int arm_compute_static_chain_stack_bytes (void)
13709 unsigned long func_type = arm_current_func_type ();
13710 int static_chain_stack_bytes = 0;
13712 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13713 IS_NESTED (func_type) &&
13714 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13715 static_chain_stack_bytes = 4;
13717 return static_chain_stack_bytes;
13721 /* Compute a bit mask of which registers need to be
13722 saved on the stack for the current function.
13723 This is used by arm_get_frame_offsets, which may add extra registers. */
13725 static unsigned long
13726 arm_compute_save_reg_mask (void)
13728 unsigned int save_reg_mask = 0;
13729 unsigned long func_type = arm_current_func_type ();
13732 if (IS_NAKED (func_type))
13733 /* This should never really happen. */
13736 /* If we are creating a stack frame, then we must save the frame pointer,
13737 IP (which will hold the old stack pointer), LR and the PC. */
13738 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13740 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13743 | (1 << PC_REGNUM);
13745 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13747 /* Decide if we need to save the link register.
13748 Interrupt routines have their own banked link register,
13749 so they never need to save it.
13750 Otherwise if we do not use the link register we do not need to save
13751 it. If we are pushing other registers onto the stack however, we
13752 can save an instruction in the epilogue by pushing the link register
13753 now and then popping it back into the PC. This incurs extra memory
13754 accesses though, so we only do it when optimizing for size, and only
13755 if we know that we will not need a fancy return sequence. */
13756 if (df_regs_ever_live_p (LR_REGNUM)
13759 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13760 && !crtl->calls_eh_return))
13761 save_reg_mask |= 1 << LR_REGNUM;
13763 if (cfun->machine->lr_save_eliminated)
13764 save_reg_mask &= ~ (1 << LR_REGNUM);
13766 if (TARGET_REALLY_IWMMXT
13767 && ((bit_count (save_reg_mask)
13768 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13769 arm_compute_static_chain_stack_bytes())
13772 /* The total number of registers that are going to be pushed
13773 onto the stack is odd. We need to ensure that the stack
13774 is 64-bit aligned before we start to save iWMMXt registers,
13775 and also before we start to create locals. (A local variable
13776 might be a double or long long which we will load/store using
13777 an iWMMXt instruction). Therefore we need to push another
13778 ARM register, so that the stack will be 64-bit aligned. We
13779 try to avoid using the arg registers (r0 -r3) as they might be
13780 used to pass values in a tail call. */
13781 for (reg = 4; reg <= 12; reg++)
13782 if ((save_reg_mask & (1 << reg)) == 0)
13786 save_reg_mask |= (1 << reg);
13789 cfun->machine->sibcall_blocked = 1;
13790 save_reg_mask |= (1 << 3);
13794 /* We may need to push an additional register for use initializing the
13795 PIC base register. */
13796 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13797 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13799 reg = thumb_find_work_register (1 << 4);
13800 if (!call_used_regs[reg])
13801 save_reg_mask |= (1 << reg);
13804 return save_reg_mask;
13808 /* Compute a bit mask of which registers need to be
13809 saved on the stack for the current function. */
13810 static unsigned long
13811 thumb1_compute_save_reg_mask (void)
13813 unsigned long mask;
13817 for (reg = 0; reg < 12; reg ++)
13818 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13822 && !TARGET_SINGLE_PIC_BASE
13823 && arm_pic_register != INVALID_REGNUM
13824 && crtl->uses_pic_offset_table)
13825 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13827 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13828 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13829 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13831 /* LR will also be pushed if any lo regs are pushed. */
13832 if (mask & 0xff || thumb_force_lr_save ())
13833 mask |= (1 << LR_REGNUM);
13835 /* Make sure we have a low work register if we need one.
13836 We will need one if we are going to push a high register,
13837 but we are not currently intending to push a low register. */
13838 if ((mask & 0xff) == 0
13839 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13841 /* Use thumb_find_work_register to choose which register
13842 we will use. If the register is live then we will
13843 have to push it. Use LAST_LO_REGNUM as our fallback
13844 choice for the register to select. */
13845 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13846 /* Make sure the register returned by thumb_find_work_register is
13847 not part of the return value. */
13848 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13849 reg = LAST_LO_REGNUM;
13851 if (! call_used_regs[reg])
13855 /* The 504 below is 8 bytes less than 512 because there are two possible
13856 alignment words. We can't tell here if they will be present or not so we
13857 have to play it safe and assume that they are. */
13858 if ((CALLER_INTERWORKING_SLOT_SIZE +
13859 ROUND_UP_WORD (get_frame_size ()) +
13860 crtl->outgoing_args_size) >= 504)
13862 /* This is the same as the code in thumb1_expand_prologue() which
13863 determines which register to use for stack decrement. */
13864 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13865 if (mask & (1 << reg))
13868 if (reg > LAST_LO_REGNUM)
13870 /* Make sure we have a register available for stack decrement. */
13871 mask |= 1 << LAST_LO_REGNUM;
13879 /* Return the number of bytes required to save VFP registers. */
13881 arm_get_vfp_saved_size (void)
13883 unsigned int regno;
13888 /* Space for saved VFP registers. */
13889 if (TARGET_HARD_FLOAT && TARGET_VFP)
13892 for (regno = FIRST_VFP_REGNUM;
13893 regno < LAST_VFP_REGNUM;
13896 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13897 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13901 /* Workaround ARM10 VFPr1 bug. */
13902 if (count == 2 && !arm_arch6)
13904 saved += count * 8;
13913 if (count == 2 && !arm_arch6)
13915 saved += count * 8;
13922 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13923 everything bar the final return instruction. */
13925 output_return_instruction (rtx operand, int really_return, int reverse)
13927 char conditional[10];
13930 unsigned long live_regs_mask;
13931 unsigned long func_type;
13932 arm_stack_offsets *offsets;
13934 func_type = arm_current_func_type ();
13936 if (IS_NAKED (func_type))
13939 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13941 /* If this function was declared non-returning, and we have
13942 found a tail call, then we have to trust that the called
13943 function won't return. */
13948 /* Otherwise, trap an attempted return by aborting. */
13950 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13952 assemble_external_libcall (ops[1]);
13953 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13959 gcc_assert (!cfun->calls_alloca || really_return);
13961 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13963 cfun->machine->return_used_this_function = 1;
13965 offsets = arm_get_frame_offsets ();
13966 live_regs_mask = offsets->saved_regs_mask;
13968 if (live_regs_mask)
13970 const char * return_reg;
13972 /* If we do not have any special requirements for function exit
13973 (e.g. interworking) then we can load the return address
13974 directly into the PC. Otherwise we must load it into LR. */
13976 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13977 return_reg = reg_names[PC_REGNUM];
13979 return_reg = reg_names[LR_REGNUM];
13981 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13983 /* There are three possible reasons for the IP register
13984 being saved. 1) a stack frame was created, in which case
13985 IP contains the old stack pointer, or 2) an ISR routine
13986 corrupted it, or 3) it was saved to align the stack on
13987 iWMMXt. In case 1, restore IP into SP, otherwise just
13989 if (frame_pointer_needed)
13991 live_regs_mask &= ~ (1 << IP_REGNUM);
13992 live_regs_mask |= (1 << SP_REGNUM);
13995 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13998 /* On some ARM architectures it is faster to use LDR rather than
13999 LDM to load a single register. On other architectures, the
14000 cost is the same. In 26 bit mode, or for exception handlers,
14001 we have to use LDM to load the PC so that the CPSR is also
14003 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14004 if (live_regs_mask == (1U << reg))
14007 if (reg <= LAST_ARM_REGNUM
14008 && (reg != LR_REGNUM
14010 || ! IS_INTERRUPT (func_type)))
14012 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14013 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14020 /* Generate the load multiple instruction to restore the
14021 registers. Note we can get here, even if
14022 frame_pointer_needed is true, but only if sp already
14023 points to the base of the saved core registers. */
14024 if (live_regs_mask & (1 << SP_REGNUM))
14026 unsigned HOST_WIDE_INT stack_adjust;
14028 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14029 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14031 if (stack_adjust && arm_arch5 && TARGET_ARM)
14032 if (TARGET_UNIFIED_ASM)
14033 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14035 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14038 /* If we can't use ldmib (SA110 bug),
14039 then try to pop r3 instead. */
14041 live_regs_mask |= 1 << 3;
14043 if (TARGET_UNIFIED_ASM)
14044 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14046 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14050 if (TARGET_UNIFIED_ASM)
14051 sprintf (instr, "pop%s\t{", conditional);
14053 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14055 p = instr + strlen (instr);
14057 for (reg = 0; reg <= SP_REGNUM; reg++)
14058 if (live_regs_mask & (1 << reg))
14060 int l = strlen (reg_names[reg]);
14066 memcpy (p, ", ", 2);
14070 memcpy (p, "%|", 2);
14071 memcpy (p + 2, reg_names[reg], l);
14075 if (live_regs_mask & (1 << LR_REGNUM))
14077 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14078 /* If returning from an interrupt, restore the CPSR. */
14079 if (IS_INTERRUPT (func_type))
14086 output_asm_insn (instr, & operand);
14088 /* See if we need to generate an extra instruction to
14089 perform the actual function return. */
14091 && func_type != ARM_FT_INTERWORKED
14092 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14094 /* The return has already been handled
14095 by loading the LR into the PC. */
14102 switch ((int) ARM_FUNC_TYPE (func_type))
14106 /* ??? This is wrong for unified assembly syntax. */
14107 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14110 case ARM_FT_INTERWORKED:
14111 sprintf (instr, "bx%s\t%%|lr", conditional);
14114 case ARM_FT_EXCEPTION:
14115 /* ??? This is wrong for unified assembly syntax. */
14116 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14120 /* Use bx if it's available. */
14121 if (arm_arch5 || arm_arch4t)
14122 sprintf (instr, "bx%s\t%%|lr", conditional);
14124 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14128 output_asm_insn (instr, & operand);
14134 /* Write the function name into the code section, directly preceding
14135 the function prologue.
14137 Code will be output similar to this:
14139 .ascii "arm_poke_function_name", 0
14142 .word 0xff000000 + (t1 - t0)
14143 arm_poke_function_name
14145 stmfd sp!, {fp, ip, lr, pc}
14148 When performing a stack backtrace, code can inspect the value
14149 of 'pc' stored at 'fp' + 0. If the trace function then looks
14150 at location pc - 12 and the top 8 bits are set, then we know
14151 that there is a function name embedded immediately preceding this
14152 location and has length ((pc[-3]) & 0xff000000).
14154 We assume that pc is declared as a pointer to an unsigned long.
14156 It is of no benefit to output the function name if we are assembling
14157 a leaf function. These function types will not contain a stack
14158 backtrace structure, therefore it is not possible to determine the
14161 arm_poke_function_name (FILE *stream, const char *name)
14163 unsigned long alignlength;
14164 unsigned long length;
14167 length = strlen (name) + 1;
14168 alignlength = ROUND_UP_WORD (length);
14170 ASM_OUTPUT_ASCII (stream, name, length);
14171 ASM_OUTPUT_ALIGN (stream, 2);
14172 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14173 assemble_aligned_integer (UNITS_PER_WORD, x);
14176 /* Place some comments into the assembler stream
14177 describing the current function. */
14179 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14181 unsigned long func_type;
14185 thumb1_output_function_prologue (f, frame_size);
14189 /* Sanity check. */
14190 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14192 func_type = arm_current_func_type ();
14194 switch ((int) ARM_FUNC_TYPE (func_type))
14197 case ARM_FT_NORMAL:
14199 case ARM_FT_INTERWORKED:
14200 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14203 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14206 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14208 case ARM_FT_EXCEPTION:
14209 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14213 if (IS_NAKED (func_type))
14214 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14216 if (IS_VOLATILE (func_type))
14217 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14219 if (IS_NESTED (func_type))
14220 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14221 if (IS_STACKALIGN (func_type))
14222 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14224 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14226 crtl->args.pretend_args_size, frame_size);
14228 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14229 frame_pointer_needed,
14230 cfun->machine->uses_anonymous_args);
14232 if (cfun->machine->lr_save_eliminated)
14233 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14235 if (crtl->calls_eh_return)
14236 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14241 arm_output_epilogue (rtx sibling)
14244 unsigned long saved_regs_mask;
14245 unsigned long func_type;
14246 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14247 frame that is $fp + 4 for a non-variadic function. */
14248 int floats_offset = 0;
14250 FILE * f = asm_out_file;
14251 unsigned int lrm_count = 0;
14252 int really_return = (sibling == NULL);
14254 arm_stack_offsets *offsets;
14256 /* If we have already generated the return instruction
14257 then it is futile to generate anything else. */
14258 if (use_return_insn (FALSE, sibling) &&
14259 (cfun->machine->return_used_this_function != 0))
14262 func_type = arm_current_func_type ();
14264 if (IS_NAKED (func_type))
14265 /* Naked functions don't have epilogues. */
14268 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14272 /* A volatile function should never return. Call abort. */
14273 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14274 assemble_external_libcall (op);
14275 output_asm_insn ("bl\t%a0", &op);
14280 /* If we are throwing an exception, then we really must be doing a
14281 return, so we can't tail-call. */
14282 gcc_assert (!crtl->calls_eh_return || really_return);
14284 offsets = arm_get_frame_offsets ();
14285 saved_regs_mask = offsets->saved_regs_mask;
14288 lrm_count = bit_count (saved_regs_mask);
14290 floats_offset = offsets->saved_args;
14291 /* Compute how far away the floats will be. */
14292 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14293 if (saved_regs_mask & (1 << reg))
14294 floats_offset += 4;
14296 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14298 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14299 int vfp_offset = offsets->frame;
14301 if (TARGET_FPA_EMU2)
14303 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14304 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14306 floats_offset += 12;
14307 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14308 reg, FP_REGNUM, floats_offset - vfp_offset);
14313 start_reg = LAST_FPA_REGNUM;
14315 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14317 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14319 floats_offset += 12;
14321 /* We can't unstack more than four registers at once. */
14322 if (start_reg - reg == 3)
14324 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14325 reg, FP_REGNUM, floats_offset - vfp_offset);
14326 start_reg = reg - 1;
14331 if (reg != start_reg)
14332 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14333 reg + 1, start_reg - reg,
14334 FP_REGNUM, floats_offset - vfp_offset);
14335 start_reg = reg - 1;
14339 /* Just in case the last register checked also needs unstacking. */
14340 if (reg != start_reg)
14341 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14342 reg + 1, start_reg - reg,
14343 FP_REGNUM, floats_offset - vfp_offset);
14346 if (TARGET_HARD_FLOAT && TARGET_VFP)
14350 /* The fldmd insns do not have base+offset addressing
14351 modes, so we use IP to hold the address. */
14352 saved_size = arm_get_vfp_saved_size ();
14354 if (saved_size > 0)
14356 floats_offset += saved_size;
14357 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14358 FP_REGNUM, floats_offset - vfp_offset);
14360 start_reg = FIRST_VFP_REGNUM;
14361 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14363 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14364 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14366 if (start_reg != reg)
14367 vfp_output_fldmd (f, IP_REGNUM,
14368 (start_reg - FIRST_VFP_REGNUM) / 2,
14369 (reg - start_reg) / 2);
14370 start_reg = reg + 2;
14373 if (start_reg != reg)
14374 vfp_output_fldmd (f, IP_REGNUM,
14375 (start_reg - FIRST_VFP_REGNUM) / 2,
14376 (reg - start_reg) / 2);
14381 /* The frame pointer is guaranteed to be non-double-word aligned.
14382 This is because it is set to (old_stack_pointer - 4) and the
14383 old_stack_pointer was double word aligned. Thus the offset to
14384 the iWMMXt registers to be loaded must also be non-double-word
14385 sized, so that the resultant address *is* double-word aligned.
14386 We can ignore floats_offset since that was already included in
14387 the live_regs_mask. */
14388 lrm_count += (lrm_count % 2 ? 2 : 1);
14390 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14391 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14393 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14394 reg, FP_REGNUM, lrm_count * 4);
14399 /* saved_regs_mask should contain the IP, which at the time of stack
14400 frame generation actually contains the old stack pointer. So a
14401 quick way to unwind the stack is just pop the IP register directly
14402 into the stack pointer. */
14403 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14404 saved_regs_mask &= ~ (1 << IP_REGNUM);
14405 saved_regs_mask |= (1 << SP_REGNUM);
14407 /* There are two registers left in saved_regs_mask - LR and PC. We
14408 only need to restore the LR register (the return address), but to
14409 save time we can load it directly into the PC, unless we need a
14410 special function exit sequence, or we are not really returning. */
14412 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14413 && !crtl->calls_eh_return)
14414 /* Delete the LR from the register mask, so that the LR on
14415 the stack is loaded into the PC in the register mask. */
14416 saved_regs_mask &= ~ (1 << LR_REGNUM);
14418 saved_regs_mask &= ~ (1 << PC_REGNUM);
14420 /* We must use SP as the base register, because SP is one of the
14421 registers being restored. If an interrupt or page fault
14422 happens in the ldm instruction, the SP might or might not
14423 have been restored. That would be bad, as then SP will no
14424 longer indicate the safe area of stack, and we can get stack
14425 corruption. Using SP as the base register means that it will
14426 be reset correctly to the original value, should an interrupt
14427 occur. If the stack pointer already points at the right
14428 place, then omit the subtraction. */
14429 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14430 || cfun->calls_alloca)
14431 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14432 4 * bit_count (saved_regs_mask));
14433 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14435 if (IS_INTERRUPT (func_type))
14436 /* Interrupt handlers will have pushed the
14437 IP onto the stack, so restore it now. */
14438 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14442 /* This branch is executed for ARM mode (non-apcs frames) and
14443 Thumb-2 mode. Frame layout is essentially the same for those
14444 cases, except that in ARM mode frame pointer points to the
14445 first saved register, while in Thumb-2 mode the frame pointer points
14446 to the last saved register.
14448 It is possible to make frame pointer point to last saved
14449 register in both cases, and remove some conditionals below.
14450 That means that fp setup in prologue would be just "mov fp, sp"
14451 and sp restore in epilogue would be just "mov sp, fp", whereas
14452 now we have to use add/sub in those cases. However, the value
14453 of that would be marginal, as both mov and add/sub are 32-bit
14454 in ARM mode, and it would require extra conditionals
14455 in arm_expand_prologue to distingish ARM-apcs-frame case
14456 (where frame pointer is required to point at first register)
14457 and ARM-non-apcs-frame. Therefore, such change is postponed
14458 until real need arise. */
14459 unsigned HOST_WIDE_INT amount;
14461 /* Restore stack pointer if necessary. */
14462 if (TARGET_ARM && frame_pointer_needed)
14464 operands[0] = stack_pointer_rtx;
14465 operands[1] = hard_frame_pointer_rtx;
14467 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14468 output_add_immediate (operands);
14472 if (frame_pointer_needed)
14474 /* For Thumb-2 restore sp from the frame pointer.
14475 Operand restrictions mean we have to incrememnt FP, then copy
14477 amount = offsets->locals_base - offsets->saved_regs;
14478 operands[0] = hard_frame_pointer_rtx;
14482 unsigned long count;
14483 operands[0] = stack_pointer_rtx;
14484 amount = offsets->outgoing_args - offsets->saved_regs;
14485 /* pop call clobbered registers if it avoids a
14486 separate stack adjustment. */
14487 count = offsets->saved_regs - offsets->saved_args;
14490 && !crtl->calls_eh_return
14491 && bit_count(saved_regs_mask) * 4 == count
14492 && !IS_INTERRUPT (func_type)
14493 && !crtl->tail_call_emit)
14495 unsigned long mask;
14496 /* Preserve return values, of any size. */
14497 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14499 mask &= ~saved_regs_mask;
14501 while (bit_count (mask) * 4 > amount)
14503 while ((mask & (1 << reg)) == 0)
14505 mask &= ~(1 << reg);
14507 if (bit_count (mask) * 4 == amount) {
14509 saved_regs_mask |= mask;
14516 operands[1] = operands[0];
14517 operands[2] = GEN_INT (amount);
14518 output_add_immediate (operands);
14520 if (frame_pointer_needed)
14521 asm_fprintf (f, "\tmov\t%r, %r\n",
14522 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14525 if (TARGET_FPA_EMU2)
14527 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14528 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14529 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14534 start_reg = FIRST_FPA_REGNUM;
14536 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14538 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14540 if (reg - start_reg == 3)
14542 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14543 start_reg, SP_REGNUM);
14544 start_reg = reg + 1;
14549 if (reg != start_reg)
14550 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14551 start_reg, reg - start_reg,
14554 start_reg = reg + 1;
14558 /* Just in case the last register checked also needs unstacking. */
14559 if (reg != start_reg)
14560 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14561 start_reg, reg - start_reg, SP_REGNUM);
14564 if (TARGET_HARD_FLOAT && TARGET_VFP)
14566 int end_reg = LAST_VFP_REGNUM + 1;
14568 /* Scan the registers in reverse order. We need to match
14569 any groupings made in the prologue and generate matching
14571 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14573 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14574 && (!df_regs_ever_live_p (reg + 1)
14575 || call_used_regs[reg + 1]))
14577 if (end_reg > reg + 2)
14578 vfp_output_fldmd (f, SP_REGNUM,
14579 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14580 (end_reg - (reg + 2)) / 2);
14584 if (end_reg > reg + 2)
14585 vfp_output_fldmd (f, SP_REGNUM, 0,
14586 (end_reg - (reg + 2)) / 2);
14590 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14591 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14592 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14594 /* If we can, restore the LR into the PC. */
14595 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14596 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14597 && !IS_STACKALIGN (func_type)
14599 && crtl->args.pretend_args_size == 0
14600 && saved_regs_mask & (1 << LR_REGNUM)
14601 && !crtl->calls_eh_return)
14603 saved_regs_mask &= ~ (1 << LR_REGNUM);
14604 saved_regs_mask |= (1 << PC_REGNUM);
14605 rfe = IS_INTERRUPT (func_type);
14610 /* Load the registers off the stack. If we only have one register
14611 to load use the LDR instruction - it is faster. For Thumb-2
14612 always use pop and the assembler will pick the best instruction.*/
14613 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14614 && !IS_INTERRUPT(func_type))
14616 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14618 else if (saved_regs_mask)
14620 if (saved_regs_mask & (1 << SP_REGNUM))
14621 /* Note - write back to the stack register is not enabled
14622 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14623 in the list of registers and if we add writeback the
14624 instruction becomes UNPREDICTABLE. */
14625 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14627 else if (TARGET_ARM)
14628 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14631 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14634 if (crtl->args.pretend_args_size)
14636 /* Unwind the pre-pushed regs. */
14637 operands[0] = operands[1] = stack_pointer_rtx;
14638 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14639 output_add_immediate (operands);
14643 /* We may have already restored PC directly from the stack. */
14644 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14647 /* Stack adjustment for exception handler. */
14648 if (crtl->calls_eh_return)
14649 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14650 ARM_EH_STACKADJ_REGNUM);
14652 /* Generate the return instruction. */
14653 switch ((int) ARM_FUNC_TYPE (func_type))
14657 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14660 case ARM_FT_EXCEPTION:
14661 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14664 case ARM_FT_INTERWORKED:
14665 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14669 if (IS_STACKALIGN (func_type))
14671 /* See comment in arm_expand_prologue. */
14672 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14674 if (arm_arch5 || arm_arch4t)
14675 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14677 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14685 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14686 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14688 arm_stack_offsets *offsets;
14694 /* Emit any call-via-reg trampolines that are needed for v4t support
14695 of call_reg and call_value_reg type insns. */
14696 for (regno = 0; regno < LR_REGNUM; regno++)
14698 rtx label = cfun->machine->call_via[regno];
14702 switch_to_section (function_section (current_function_decl));
14703 targetm.asm_out.internal_label (asm_out_file, "L",
14704 CODE_LABEL_NUMBER (label));
14705 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14709 /* ??? Probably not safe to set this here, since it assumes that a
14710 function will be emitted as assembly immediately after we generate
14711 RTL for it. This does not happen for inline functions. */
14712 cfun->machine->return_used_this_function = 0;
14714 else /* TARGET_32BIT */
14716 /* We need to take into account any stack-frame rounding. */
14717 offsets = arm_get_frame_offsets ();
14719 gcc_assert (!use_return_insn (FALSE, NULL)
14720 || (cfun->machine->return_used_this_function != 0)
14721 || offsets->saved_regs == offsets->outgoing_args
14722 || frame_pointer_needed);
14724 /* Reset the ARM-specific per-function variables. */
14725 after_arm_reorg = 0;
14729 /* Generate and emit an insn that we will recognize as a push_multi.
14730 Unfortunately, since this insn does not reflect very well the actual
14731 semantics of the operation, we need to annotate the insn for the benefit
14732 of DWARF2 frame unwind information. */
14734 emit_multi_reg_push (unsigned long mask)
14737 int num_dwarf_regs;
14741 int dwarf_par_index;
14744 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14745 if (mask & (1 << i))
14748 gcc_assert (num_regs && num_regs <= 16);
14750 /* We don't record the PC in the dwarf frame information. */
14751 num_dwarf_regs = num_regs;
14752 if (mask & (1 << PC_REGNUM))
14755 /* For the body of the insn we are going to generate an UNSPEC in
14756 parallel with several USEs. This allows the insn to be recognized
14757 by the push_multi pattern in the arm.md file.
14759 The body of the insn looks something like this:
14762 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14763 (const_int:SI <num>)))
14764 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14770 For the frame note however, we try to be more explicit and actually
14771 show each register being stored into the stack frame, plus a (single)
14772 decrement of the stack pointer. We do it this way in order to be
14773 friendly to the stack unwinding code, which only wants to see a single
14774 stack decrement per instruction. The RTL we generate for the note looks
14775 something like this:
14778 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14779 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14780 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14781 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14785 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14786 instead we'd have a parallel expression detailing all
14787 the stores to the various memory addresses so that debug
14788 information is more up-to-date. Remember however while writing
14789 this to take care of the constraints with the push instruction.
14791 Note also that this has to be taken care of for the VFP registers.
14793 For more see PR43399. */
14795 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14796 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14797 dwarf_par_index = 1;
14799 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14801 if (mask & (1 << i))
14803 reg = gen_rtx_REG (SImode, i);
14805 XVECEXP (par, 0, 0)
14806 = gen_rtx_SET (VOIDmode,
14809 gen_rtx_PRE_MODIFY (Pmode,
14812 (stack_pointer_rtx,
14815 gen_rtx_UNSPEC (BLKmode,
14816 gen_rtvec (1, reg),
14817 UNSPEC_PUSH_MULT));
14819 if (i != PC_REGNUM)
14821 tmp = gen_rtx_SET (VOIDmode,
14822 gen_frame_mem (SImode, stack_pointer_rtx),
14824 RTX_FRAME_RELATED_P (tmp) = 1;
14825 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14833 for (j = 1, i++; j < num_regs; i++)
14835 if (mask & (1 << i))
14837 reg = gen_rtx_REG (SImode, i);
14839 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14841 if (i != PC_REGNUM)
14844 = gen_rtx_SET (VOIDmode,
14847 plus_constant (stack_pointer_rtx,
14850 RTX_FRAME_RELATED_P (tmp) = 1;
14851 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14858 par = emit_insn (par);
14860 tmp = gen_rtx_SET (VOIDmode,
14862 plus_constant (stack_pointer_rtx, -4 * num_regs));
14863 RTX_FRAME_RELATED_P (tmp) = 1;
14864 XVECEXP (dwarf, 0, 0) = tmp;
14866 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14871 /* Calculate the size of the return value that is passed in registers. */
14873 arm_size_return_regs (void)
14875 enum machine_mode mode;
14877 if (crtl->return_rtx != 0)
14878 mode = GET_MODE (crtl->return_rtx);
14880 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14882 return GET_MODE_SIZE (mode);
14886 emit_sfm (int base_reg, int count)
14893 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14894 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14896 reg = gen_rtx_REG (XFmode, base_reg++);
14898 XVECEXP (par, 0, 0)
14899 = gen_rtx_SET (VOIDmode,
14902 gen_rtx_PRE_MODIFY (Pmode,
14905 (stack_pointer_rtx,
14908 gen_rtx_UNSPEC (BLKmode,
14909 gen_rtvec (1, reg),
14910 UNSPEC_PUSH_MULT));
14911 tmp = gen_rtx_SET (VOIDmode,
14912 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14913 RTX_FRAME_RELATED_P (tmp) = 1;
14914 XVECEXP (dwarf, 0, 1) = tmp;
14916 for (i = 1; i < count; i++)
14918 reg = gen_rtx_REG (XFmode, base_reg++);
14919 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14921 tmp = gen_rtx_SET (VOIDmode,
14922 gen_frame_mem (XFmode,
14923 plus_constant (stack_pointer_rtx,
14926 RTX_FRAME_RELATED_P (tmp) = 1;
14927 XVECEXP (dwarf, 0, i + 1) = tmp;
14930 tmp = gen_rtx_SET (VOIDmode,
14932 plus_constant (stack_pointer_rtx, -12 * count));
14934 RTX_FRAME_RELATED_P (tmp) = 1;
14935 XVECEXP (dwarf, 0, 0) = tmp;
14937 par = emit_insn (par);
14938 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14944 /* Return true if the current function needs to save/restore LR. */
14947 thumb_force_lr_save (void)
14949 return !cfun->machine->lr_save_eliminated
14950 && (!leaf_function_p ()
14951 || thumb_far_jump_used_p ()
14952 || df_regs_ever_live_p (LR_REGNUM));
14956 /* Compute the distance from register FROM to register TO.
14957 These can be the arg pointer (26), the soft frame pointer (25),
14958 the stack pointer (13) or the hard frame pointer (11).
14959 In thumb mode r7 is used as the soft frame pointer, if needed.
14960 Typical stack layout looks like this:
14962 old stack pointer -> | |
14965 | | saved arguments for
14966 | | vararg functions
14969 hard FP & arg pointer -> | | \
14977 soft frame pointer -> | | /
14982 locals base pointer -> | | /
14987 current stack pointer -> | | /
14990 For a given function some or all of these stack components
14991 may not be needed, giving rise to the possibility of
14992 eliminating some of the registers.
14994 The values returned by this function must reflect the behavior
14995 of arm_expand_prologue() and arm_compute_save_reg_mask().
14997 The sign of the number returned reflects the direction of stack
14998 growth, so the values are positive for all eliminations except
14999 from the soft frame pointer to the hard frame pointer.
15001 SFP may point just inside the local variables block to ensure correct
15005 /* Calculate stack offsets. These are used to calculate register elimination
15006 offsets and in prologue/epilogue code. Also calculates which registers
15007 should be saved. */
15009 static arm_stack_offsets *
15010 arm_get_frame_offsets (void)
15012 struct arm_stack_offsets *offsets;
15013 unsigned long func_type;
15017 HOST_WIDE_INT frame_size;
15020 offsets = &cfun->machine->stack_offsets;
15022 /* We need to know if we are a leaf function. Unfortunately, it
15023 is possible to be called after start_sequence has been called,
15024 which causes get_insns to return the insns for the sequence,
15025 not the function, which will cause leaf_function_p to return
15026 the incorrect result.
15028 to know about leaf functions once reload has completed, and the
15029 frame size cannot be changed after that time, so we can safely
15030 use the cached value. */
15032 if (reload_completed)
15035 /* Initially this is the size of the local variables. It will translated
15036 into an offset once we have determined the size of preceding data. */
15037 frame_size = ROUND_UP_WORD (get_frame_size ());
15039 leaf = leaf_function_p ();
15041 /* Space for variadic functions. */
15042 offsets->saved_args = crtl->args.pretend_args_size;
15044 /* In Thumb mode this is incorrect, but never used. */
15045 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15046 arm_compute_static_chain_stack_bytes();
15050 unsigned int regno;
15052 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15053 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15054 saved = core_saved;
15056 /* We know that SP will be doubleword aligned on entry, and we must
15057 preserve that condition at any subroutine call. We also require the
15058 soft frame pointer to be doubleword aligned. */
15060 if (TARGET_REALLY_IWMMXT)
15062 /* Check for the call-saved iWMMXt registers. */
15063 for (regno = FIRST_IWMMXT_REGNUM;
15064 regno <= LAST_IWMMXT_REGNUM;
15066 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15070 func_type = arm_current_func_type ();
15071 if (! IS_VOLATILE (func_type))
15073 /* Space for saved FPA registers. */
15074 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15075 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15078 /* Space for saved VFP registers. */
15079 if (TARGET_HARD_FLOAT && TARGET_VFP)
15080 saved += arm_get_vfp_saved_size ();
15083 else /* TARGET_THUMB1 */
15085 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15086 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15087 saved = core_saved;
15088 if (TARGET_BACKTRACE)
15092 /* Saved registers include the stack frame. */
15093 offsets->saved_regs = offsets->saved_args + saved +
15094 arm_compute_static_chain_stack_bytes();
15095 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15096 /* A leaf function does not need any stack alignment if it has nothing
15098 if (leaf && frame_size == 0)
15100 offsets->outgoing_args = offsets->soft_frame;
15101 offsets->locals_base = offsets->soft_frame;
15105 /* Ensure SFP has the correct alignment. */
15106 if (ARM_DOUBLEWORD_ALIGN
15107 && (offsets->soft_frame & 7))
15109 offsets->soft_frame += 4;
15110 /* Try to align stack by pushing an extra reg. Don't bother doing this
15111 when there is a stack frame as the alignment will be rolled into
15112 the normal stack adjustment. */
15113 if (frame_size + crtl->outgoing_args_size == 0)
15117 /* If it is safe to use r3, then do so. This sometimes
15118 generates better code on Thumb-2 by avoiding the need to
15119 use 32-bit push/pop instructions. */
15120 if (!crtl->tail_call_emit
15121 && arm_size_return_regs () <= 12
15122 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15127 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15129 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15138 offsets->saved_regs += 4;
15139 offsets->saved_regs_mask |= (1 << reg);
15144 offsets->locals_base = offsets->soft_frame + frame_size;
15145 offsets->outgoing_args = (offsets->locals_base
15146 + crtl->outgoing_args_size);
15148 if (ARM_DOUBLEWORD_ALIGN)
15150 /* Ensure SP remains doubleword aligned. */
15151 if (offsets->outgoing_args & 7)
15152 offsets->outgoing_args += 4;
15153 gcc_assert (!(offsets->outgoing_args & 7));
15160 /* Calculate the relative offsets for the different stack pointers. Positive
15161 offsets are in the direction of stack growth. */
15164 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15166 arm_stack_offsets *offsets;
15168 offsets = arm_get_frame_offsets ();
15170 /* OK, now we have enough information to compute the distances.
15171 There must be an entry in these switch tables for each pair
15172 of registers in ELIMINABLE_REGS, even if some of the entries
15173 seem to be redundant or useless. */
15176 case ARG_POINTER_REGNUM:
15179 case THUMB_HARD_FRAME_POINTER_REGNUM:
15182 case FRAME_POINTER_REGNUM:
15183 /* This is the reverse of the soft frame pointer
15184 to hard frame pointer elimination below. */
15185 return offsets->soft_frame - offsets->saved_args;
15187 case ARM_HARD_FRAME_POINTER_REGNUM:
15188 /* This is only non-zero in the case where the static chain register
15189 is stored above the frame. */
15190 return offsets->frame - offsets->saved_args - 4;
15192 case STACK_POINTER_REGNUM:
15193 /* If nothing has been pushed on the stack at all
15194 then this will return -4. This *is* correct! */
15195 return offsets->outgoing_args - (offsets->saved_args + 4);
15198 gcc_unreachable ();
15200 gcc_unreachable ();
15202 case FRAME_POINTER_REGNUM:
15205 case THUMB_HARD_FRAME_POINTER_REGNUM:
15208 case ARM_HARD_FRAME_POINTER_REGNUM:
15209 /* The hard frame pointer points to the top entry in the
15210 stack frame. The soft frame pointer to the bottom entry
15211 in the stack frame. If there is no stack frame at all,
15212 then they are identical. */
15214 return offsets->frame - offsets->soft_frame;
15216 case STACK_POINTER_REGNUM:
15217 return offsets->outgoing_args - offsets->soft_frame;
15220 gcc_unreachable ();
15222 gcc_unreachable ();
15225 /* You cannot eliminate from the stack pointer.
15226 In theory you could eliminate from the hard frame
15227 pointer to the stack pointer, but this will never
15228 happen, since if a stack frame is not needed the
15229 hard frame pointer will never be used. */
15230 gcc_unreachable ();
15234 /* Given FROM and TO register numbers, say whether this elimination is
15235 allowed. Frame pointer elimination is automatically handled.
15237 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15238 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15239 pointer, we must eliminate FRAME_POINTER_REGNUM into
15240 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15241 ARG_POINTER_REGNUM. */
15244 arm_can_eliminate (const int from, const int to)
15246 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15247 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15248 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15249 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15253 /* Emit RTL to save coprocessor registers on function entry. Returns the
15254 number of bytes pushed. */
15257 arm_save_coproc_regs(void)
15259 int saved_size = 0;
15261 unsigned start_reg;
15264 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15265 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15267 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15268 insn = gen_rtx_MEM (V2SImode, insn);
15269 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15270 RTX_FRAME_RELATED_P (insn) = 1;
15274 /* Save any floating point call-saved registers used by this
15276 if (TARGET_FPA_EMU2)
15278 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15279 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15281 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15282 insn = gen_rtx_MEM (XFmode, insn);
15283 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15284 RTX_FRAME_RELATED_P (insn) = 1;
15290 start_reg = LAST_FPA_REGNUM;
15292 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15294 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15296 if (start_reg - reg == 3)
15298 insn = emit_sfm (reg, 4);
15299 RTX_FRAME_RELATED_P (insn) = 1;
15301 start_reg = reg - 1;
15306 if (start_reg != reg)
15308 insn = emit_sfm (reg + 1, start_reg - reg);
15309 RTX_FRAME_RELATED_P (insn) = 1;
15310 saved_size += (start_reg - reg) * 12;
15312 start_reg = reg - 1;
15316 if (start_reg != reg)
15318 insn = emit_sfm (reg + 1, start_reg - reg);
15319 saved_size += (start_reg - reg) * 12;
15320 RTX_FRAME_RELATED_P (insn) = 1;
15323 if (TARGET_HARD_FLOAT && TARGET_VFP)
15325 start_reg = FIRST_VFP_REGNUM;
15327 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15329 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15330 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15332 if (start_reg != reg)
15333 saved_size += vfp_emit_fstmd (start_reg,
15334 (reg - start_reg) / 2);
15335 start_reg = reg + 2;
15338 if (start_reg != reg)
15339 saved_size += vfp_emit_fstmd (start_reg,
15340 (reg - start_reg) / 2);
15346 /* Set the Thumb frame pointer from the stack pointer. */
15349 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15351 HOST_WIDE_INT amount;
15354 amount = offsets->outgoing_args - offsets->locals_base;
15356 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15357 stack_pointer_rtx, GEN_INT (amount)));
15360 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15361 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15362 expects the first two operands to be the same. */
15365 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15367 hard_frame_pointer_rtx));
15371 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15372 hard_frame_pointer_rtx,
15373 stack_pointer_rtx));
15375 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15376 plus_constant (stack_pointer_rtx, amount));
15377 RTX_FRAME_RELATED_P (dwarf) = 1;
15378 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15381 RTX_FRAME_RELATED_P (insn) = 1;
15384 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15387 arm_expand_prologue (void)
15392 unsigned long live_regs_mask;
15393 unsigned long func_type;
15395 int saved_pretend_args = 0;
15396 int saved_regs = 0;
15397 unsigned HOST_WIDE_INT args_to_push;
15398 arm_stack_offsets *offsets;
15400 func_type = arm_current_func_type ();
15402 /* Naked functions don't have prologues. */
15403 if (IS_NAKED (func_type))
15406 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15407 args_to_push = crtl->args.pretend_args_size;
15409 /* Compute which register we will have to save onto the stack. */
15410 offsets = arm_get_frame_offsets ();
15411 live_regs_mask = offsets->saved_regs_mask;
15413 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15415 if (IS_STACKALIGN (func_type))
15420 /* Handle a word-aligned stack pointer. We generate the following:
15425 <save and restore r0 in normal prologue/epilogue>
15429 The unwinder doesn't need to know about the stack realignment.
15430 Just tell it we saved SP in r0. */
15431 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15433 r0 = gen_rtx_REG (SImode, 0);
15434 r1 = gen_rtx_REG (SImode, 1);
15435 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15436 compiler won't choke. */
15437 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15438 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15439 insn = gen_movsi (r0, stack_pointer_rtx);
15440 RTX_FRAME_RELATED_P (insn) = 1;
15441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15443 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15444 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15447 /* For APCS frames, if IP register is clobbered
15448 when creating frame, save that register in a special
15450 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15452 if (IS_INTERRUPT (func_type))
15454 /* Interrupt functions must not corrupt any registers.
15455 Creating a frame pointer however, corrupts the IP
15456 register, so we must push it first. */
15457 insn = emit_multi_reg_push (1 << IP_REGNUM);
15459 /* Do not set RTX_FRAME_RELATED_P on this insn.
15460 The dwarf stack unwinding code only wants to see one
15461 stack decrement per function, and this is not it. If
15462 this instruction is labeled as being part of the frame
15463 creation sequence then dwarf2out_frame_debug_expr will
15464 die when it encounters the assignment of IP to FP
15465 later on, since the use of SP here establishes SP as
15466 the CFA register and not IP.
15468 Anyway this instruction is not really part of the stack
15469 frame creation although it is part of the prologue. */
15471 else if (IS_NESTED (func_type))
15473 /* The Static chain register is the same as the IP register
15474 used as a scratch register during stack frame creation.
15475 To get around this need to find somewhere to store IP
15476 whilst the frame is being created. We try the following
15479 1. The last argument register.
15480 2. A slot on the stack above the frame. (This only
15481 works if the function is not a varargs function).
15482 3. Register r3, after pushing the argument registers
15485 Note - we only need to tell the dwarf2 backend about the SP
15486 adjustment in the second variant; the static chain register
15487 doesn't need to be unwound, as it doesn't contain a value
15488 inherited from the caller. */
15490 if (df_regs_ever_live_p (3) == false)
15491 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15492 else if (args_to_push == 0)
15496 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15499 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15500 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15503 /* Just tell the dwarf backend that we adjusted SP. */
15504 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15505 plus_constant (stack_pointer_rtx,
15507 RTX_FRAME_RELATED_P (insn) = 1;
15508 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15512 /* Store the args on the stack. */
15513 if (cfun->machine->uses_anonymous_args)
15514 insn = emit_multi_reg_push
15515 ((0xf0 >> (args_to_push / 4)) & 0xf);
15518 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15519 GEN_INT (- args_to_push)));
15521 RTX_FRAME_RELATED_P (insn) = 1;
15523 saved_pretend_args = 1;
15524 fp_offset = args_to_push;
15527 /* Now reuse r3 to preserve IP. */
15528 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15532 insn = emit_set_insn (ip_rtx,
15533 plus_constant (stack_pointer_rtx, fp_offset));
15534 RTX_FRAME_RELATED_P (insn) = 1;
15539 /* Push the argument registers, or reserve space for them. */
15540 if (cfun->machine->uses_anonymous_args)
15541 insn = emit_multi_reg_push
15542 ((0xf0 >> (args_to_push / 4)) & 0xf);
15545 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15546 GEN_INT (- args_to_push)));
15547 RTX_FRAME_RELATED_P (insn) = 1;
15550 /* If this is an interrupt service routine, and the link register
15551 is going to be pushed, and we're not generating extra
15552 push of IP (needed when frame is needed and frame layout if apcs),
15553 subtracting four from LR now will mean that the function return
15554 can be done with a single instruction. */
15555 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15556 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15557 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15560 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15562 emit_set_insn (lr, plus_constant (lr, -4));
15565 if (live_regs_mask)
15567 saved_regs += bit_count (live_regs_mask) * 4;
15568 if (optimize_size && !frame_pointer_needed
15569 && saved_regs == offsets->saved_regs - offsets->saved_args)
15571 /* If no coprocessor registers are being pushed and we don't have
15572 to worry about a frame pointer then push extra registers to
15573 create the stack frame. This is done is a way that does not
15574 alter the frame layout, so is independent of the epilogue. */
15578 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15580 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15581 if (frame && n * 4 >= frame)
15584 live_regs_mask |= (1 << n) - 1;
15585 saved_regs += frame;
15588 insn = emit_multi_reg_push (live_regs_mask);
15589 RTX_FRAME_RELATED_P (insn) = 1;
15592 if (! IS_VOLATILE (func_type))
15593 saved_regs += arm_save_coproc_regs ();
15595 if (frame_pointer_needed && TARGET_ARM)
15597 /* Create the new frame pointer. */
15598 if (TARGET_APCS_FRAME)
15600 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15601 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15602 RTX_FRAME_RELATED_P (insn) = 1;
15604 if (IS_NESTED (func_type))
15606 /* Recover the static chain register. */
15607 if (!df_regs_ever_live_p (3)
15608 || saved_pretend_args)
15609 insn = gen_rtx_REG (SImode, 3);
15610 else /* if (crtl->args.pretend_args_size == 0) */
15612 insn = plus_constant (hard_frame_pointer_rtx, 4);
15613 insn = gen_frame_mem (SImode, insn);
15615 emit_set_insn (ip_rtx, insn);
15616 /* Add a USE to stop propagate_one_insn() from barfing. */
15617 emit_insn (gen_prologue_use (ip_rtx));
15622 insn = GEN_INT (saved_regs - 4);
15623 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15624 stack_pointer_rtx, insn));
15625 RTX_FRAME_RELATED_P (insn) = 1;
15629 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15631 /* This add can produce multiple insns for a large constant, so we
15632 need to get tricky. */
15633 rtx last = get_last_insn ();
15635 amount = GEN_INT (offsets->saved_args + saved_regs
15636 - offsets->outgoing_args);
15638 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15642 last = last ? NEXT_INSN (last) : get_insns ();
15643 RTX_FRAME_RELATED_P (last) = 1;
15645 while (last != insn);
15647 /* If the frame pointer is needed, emit a special barrier that
15648 will prevent the scheduler from moving stores to the frame
15649 before the stack adjustment. */
15650 if (frame_pointer_needed)
15651 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15652 hard_frame_pointer_rtx));
15656 if (frame_pointer_needed && TARGET_THUMB2)
15657 thumb_set_frame_pointer (offsets);
15659 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15661 unsigned long mask;
15663 mask = live_regs_mask;
15664 mask &= THUMB2_WORK_REGS;
15665 if (!IS_NESTED (func_type))
15666 mask |= (1 << IP_REGNUM);
15667 arm_load_pic_register (mask);
15670 /* If we are profiling, make sure no instructions are scheduled before
15671 the call to mcount. Similarly if the user has requested no
15672 scheduling in the prolog. Similarly if we want non-call exceptions
15673 using the EABI unwinder, to prevent faulting instructions from being
15674 swapped with a stack adjustment. */
15675 if (crtl->profile || !TARGET_SCHED_PROLOG
15676 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15677 emit_insn (gen_blockage ());
15679 /* If the link register is being kept alive, with the return address in it,
15680 then make sure that it does not get reused by the ce2 pass. */
15681 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15682 cfun->machine->lr_save_eliminated = 1;
15685 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15687 arm_print_condition (FILE *stream)
15689 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15691 /* Branch conversion is not implemented for Thumb-2. */
15694 output_operand_lossage ("predicated Thumb instruction");
15697 if (current_insn_predicate != NULL)
15699 output_operand_lossage
15700 ("predicated instruction in conditional sequence");
15704 fputs (arm_condition_codes[arm_current_cc], stream);
15706 else if (current_insn_predicate)
15708 enum arm_cond_code code;
15712 output_operand_lossage ("predicated Thumb instruction");
15716 code = get_arm_condition_code (current_insn_predicate);
15717 fputs (arm_condition_codes[code], stream);
15722 /* If CODE is 'd', then the X is a condition operand and the instruction
15723 should only be executed if the condition is true.
15724 if CODE is 'D', then the X is a condition operand and the instruction
15725 should only be executed if the condition is false: however, if the mode
15726 of the comparison is CCFPEmode, then always execute the instruction -- we
15727 do this because in these circumstances !GE does not necessarily imply LT;
15728 in these cases the instruction pattern will take care to make sure that
15729 an instruction containing %d will follow, thereby undoing the effects of
15730 doing this instruction unconditionally.
15731 If CODE is 'N' then X is a floating point operand that must be negated
15733 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15734 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15736 arm_print_operand (FILE *stream, rtx x, int code)
15741 fputs (ASM_COMMENT_START, stream);
15745 fputs (user_label_prefix, stream);
15749 fputs (REGISTER_PREFIX, stream);
15753 arm_print_condition (stream);
15757 /* Nothing in unified syntax, otherwise the current condition code. */
15758 if (!TARGET_UNIFIED_ASM)
15759 arm_print_condition (stream);
15763 /* The current condition code in unified syntax, otherwise nothing. */
15764 if (TARGET_UNIFIED_ASM)
15765 arm_print_condition (stream);
15769 /* The current condition code for a condition code setting instruction.
15770 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15771 if (TARGET_UNIFIED_ASM)
15773 fputc('s', stream);
15774 arm_print_condition (stream);
15778 arm_print_condition (stream);
15779 fputc('s', stream);
15784 /* If the instruction is conditionally executed then print
15785 the current condition code, otherwise print 's'. */
15786 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15787 if (current_insn_predicate)
15788 arm_print_condition (stream);
15790 fputc('s', stream);
15793 /* %# is a "break" sequence. It doesn't output anything, but is used to
15794 separate e.g. operand numbers from following text, if that text consists
15795 of further digits which we don't want to be part of the operand
15803 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15804 r = real_value_negate (&r);
15805 fprintf (stream, "%s", fp_const_from_val (&r));
15809 /* An integer or symbol address without a preceding # sign. */
15811 switch (GET_CODE (x))
15814 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15818 output_addr_const (stream, x);
15822 gcc_unreachable ();
15827 if (GET_CODE (x) == CONST_INT)
15830 val = ARM_SIGN_EXTEND (~INTVAL (x));
15831 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15835 putc ('~', stream);
15836 output_addr_const (stream, x);
15841 /* The low 16 bits of an immediate constant. */
15842 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15846 fprintf (stream, "%s", arithmetic_instr (x, 1));
15849 /* Truncate Cirrus shift counts. */
15851 if (GET_CODE (x) == CONST_INT)
15853 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15856 arm_print_operand (stream, x, 0);
15860 fprintf (stream, "%s", arithmetic_instr (x, 0));
15868 if (!shift_operator (x, SImode))
15870 output_operand_lossage ("invalid shift operand");
15874 shift = shift_op (x, &val);
15878 fprintf (stream, ", %s ", shift);
15880 arm_print_operand (stream, XEXP (x, 1), 0);
15882 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15887 /* An explanation of the 'Q', 'R' and 'H' register operands:
15889 In a pair of registers containing a DI or DF value the 'Q'
15890 operand returns the register number of the register containing
15891 the least significant part of the value. The 'R' operand returns
15892 the register number of the register containing the most
15893 significant part of the value.
15895 The 'H' operand returns the higher of the two register numbers.
15896 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15897 same as the 'Q' operand, since the most significant part of the
15898 value is held in the lower number register. The reverse is true
15899 on systems where WORDS_BIG_ENDIAN is false.
15901 The purpose of these operands is to distinguish between cases
15902 where the endian-ness of the values is important (for example
15903 when they are added together), and cases where the endian-ness
15904 is irrelevant, but the order of register operations is important.
15905 For example when loading a value from memory into a register
15906 pair, the endian-ness does not matter. Provided that the value
15907 from the lower memory address is put into the lower numbered
15908 register, and the value from the higher address is put into the
15909 higher numbered register, the load will work regardless of whether
15910 the value being loaded is big-wordian or little-wordian. The
15911 order of the two register loads can matter however, if the address
15912 of the memory location is actually held in one of the registers
15913 being overwritten by the load.
15915 The 'Q' and 'R' constraints are also available for 64-bit
15918 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15920 rtx part = gen_lowpart (SImode, x);
15921 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15925 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15927 output_operand_lossage ("invalid operand for code '%c'", code);
15931 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15935 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15937 enum machine_mode mode = GET_MODE (x);
15940 if (mode == VOIDmode)
15942 part = gen_highpart_mode (SImode, mode, x);
15943 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15947 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15949 output_operand_lossage ("invalid operand for code '%c'", code);
15953 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15957 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15959 output_operand_lossage ("invalid operand for code '%c'", code);
15963 asm_fprintf (stream, "%r", REGNO (x) + 1);
15967 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15969 output_operand_lossage ("invalid operand for code '%c'", code);
15973 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15977 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15979 output_operand_lossage ("invalid operand for code '%c'", code);
15983 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15987 asm_fprintf (stream, "%r",
15988 GET_CODE (XEXP (x, 0)) == REG
15989 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15993 asm_fprintf (stream, "{%r-%r}",
15995 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15998 /* Like 'M', but writing doubleword vector registers, for use by Neon
16002 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16003 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16005 asm_fprintf (stream, "{d%d}", regno);
16007 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16012 /* CONST_TRUE_RTX means always -- that's the default. */
16013 if (x == const_true_rtx)
16016 if (!COMPARISON_P (x))
16018 output_operand_lossage ("invalid operand for code '%c'", code);
16022 fputs (arm_condition_codes[get_arm_condition_code (x)],
16027 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16028 want to do that. */
16029 if (x == const_true_rtx)
16031 output_operand_lossage ("instruction never executed");
16034 if (!COMPARISON_P (x))
16036 output_operand_lossage ("invalid operand for code '%c'", code);
16040 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16041 (get_arm_condition_code (x))],
16045 /* Cirrus registers can be accessed in a variety of ways:
16046 single floating point (f)
16047 double floating point (d)
16049 64bit integer (dx). */
16050 case 'W': /* Cirrus register in F mode. */
16051 case 'X': /* Cirrus register in D mode. */
16052 case 'Y': /* Cirrus register in FX mode. */
16053 case 'Z': /* Cirrus register in DX mode. */
16054 gcc_assert (GET_CODE (x) == REG
16055 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16057 fprintf (stream, "mv%s%s",
16059 : code == 'X' ? "d"
16060 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16064 /* Print cirrus register in the mode specified by the register's mode. */
16067 int mode = GET_MODE (x);
16069 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16071 output_operand_lossage ("invalid operand for code '%c'", code);
16075 fprintf (stream, "mv%s%s",
16076 mode == DFmode ? "d"
16077 : mode == SImode ? "fx"
16078 : mode == DImode ? "dx"
16079 : "f", reg_names[REGNO (x)] + 2);
16085 if (GET_CODE (x) != REG
16086 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16087 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16088 /* Bad value for wCG register number. */
16090 output_operand_lossage ("invalid operand for code '%c'", code);
16095 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16098 /* Print an iWMMXt control register name. */
16100 if (GET_CODE (x) != CONST_INT
16102 || INTVAL (x) >= 16)
16103 /* Bad value for wC register number. */
16105 output_operand_lossage ("invalid operand for code '%c'", code);
16111 static const char * wc_reg_names [16] =
16113 "wCID", "wCon", "wCSSF", "wCASF",
16114 "wC4", "wC5", "wC6", "wC7",
16115 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16116 "wC12", "wC13", "wC14", "wC15"
16119 fprintf (stream, wc_reg_names [INTVAL (x)]);
16123 /* Print the high single-precision register of a VFP double-precision
16127 int mode = GET_MODE (x);
16130 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16132 output_operand_lossage ("invalid operand for code '%c'", code);
16137 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16139 output_operand_lossage ("invalid operand for code '%c'", code);
16143 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16147 /* Print a VFP/Neon double precision or quad precision register name. */
16151 int mode = GET_MODE (x);
16152 int is_quad = (code == 'q');
16155 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16157 output_operand_lossage ("invalid operand for code '%c'", code);
16161 if (GET_CODE (x) != REG
16162 || !IS_VFP_REGNUM (REGNO (x)))
16164 output_operand_lossage ("invalid operand for code '%c'", code);
16169 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16170 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16172 output_operand_lossage ("invalid operand for code '%c'", code);
16176 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16177 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16181 /* These two codes print the low/high doubleword register of a Neon quad
16182 register, respectively. For pair-structure types, can also print
16183 low/high quadword registers. */
16187 int mode = GET_MODE (x);
16190 if ((GET_MODE_SIZE (mode) != 16
16191 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16193 output_operand_lossage ("invalid operand for code '%c'", code);
16198 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16200 output_operand_lossage ("invalid operand for code '%c'", code);
16204 if (GET_MODE_SIZE (mode) == 16)
16205 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16206 + (code == 'f' ? 1 : 0));
16208 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16209 + (code == 'f' ? 1 : 0));
16213 /* Print a VFPv3 floating-point constant, represented as an integer
16217 int index = vfp3_const_double_index (x);
16218 gcc_assert (index != -1);
16219 fprintf (stream, "%d", index);
16223 /* Print bits representing opcode features for Neon.
16225 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16226 and polynomials as unsigned.
16228 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16230 Bit 2 is 1 for rounding functions, 0 otherwise. */
16232 /* Identify the type as 's', 'u', 'p' or 'f'. */
16235 HOST_WIDE_INT bits = INTVAL (x);
16236 fputc ("uspf"[bits & 3], stream);
16240 /* Likewise, but signed and unsigned integers are both 'i'. */
16243 HOST_WIDE_INT bits = INTVAL (x);
16244 fputc ("iipf"[bits & 3], stream);
16248 /* As for 'T', but emit 'u' instead of 'p'. */
16251 HOST_WIDE_INT bits = INTVAL (x);
16252 fputc ("usuf"[bits & 3], stream);
16256 /* Bit 2: rounding (vs none). */
16259 HOST_WIDE_INT bits = INTVAL (x);
16260 fputs ((bits & 4) != 0 ? "r" : "", stream);
16264 /* Memory operand for vld1/vst1 instruction. */
16268 bool postinc = FALSE;
16269 gcc_assert (GET_CODE (x) == MEM);
16270 addr = XEXP (x, 0);
16271 if (GET_CODE (addr) == POST_INC)
16274 addr = XEXP (addr, 0);
16276 asm_fprintf (stream, "[%r]", REGNO (addr));
16278 fputs("!", stream);
16286 gcc_assert (GET_CODE (x) == MEM);
16287 addr = XEXP (x, 0);
16288 gcc_assert (GET_CODE (addr) == REG);
16289 asm_fprintf (stream, "[%r]", REGNO (addr));
16293 /* Translate an S register number into a D register number and element index. */
16296 int mode = GET_MODE (x);
16299 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16301 output_operand_lossage ("invalid operand for code '%c'", code);
16306 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16308 output_operand_lossage ("invalid operand for code '%c'", code);
16312 regno = regno - FIRST_VFP_REGNUM;
16313 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16317 /* Register specifier for vld1.16/vst1.16. Translate the S register
16318 number into a D register number and element index. */
16321 int mode = GET_MODE (x);
16324 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16326 output_operand_lossage ("invalid operand for code '%c'", code);
16331 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16333 output_operand_lossage ("invalid operand for code '%c'", code);
16337 regno = regno - FIRST_VFP_REGNUM;
16338 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16345 output_operand_lossage ("missing operand");
16349 switch (GET_CODE (x))
16352 asm_fprintf (stream, "%r", REGNO (x));
16356 output_memory_reference_mode = GET_MODE (x);
16357 output_address (XEXP (x, 0));
16364 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16365 sizeof (fpstr), 0, 1);
16366 fprintf (stream, "#%s", fpstr);
16369 fprintf (stream, "#%s", fp_immediate_constant (x));
16373 gcc_assert (GET_CODE (x) != NEG);
16374 fputc ('#', stream);
16375 if (GET_CODE (x) == HIGH)
16377 fputs (":lower16:", stream);
16381 output_addr_const (stream, x);
16387 /* Target hook for printing a memory address. */
16389 arm_print_operand_address (FILE *stream, rtx x)
16393 int is_minus = GET_CODE (x) == MINUS;
16395 if (GET_CODE (x) == REG)
16396 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16397 else if (GET_CODE (x) == PLUS || is_minus)
16399 rtx base = XEXP (x, 0);
16400 rtx index = XEXP (x, 1);
16401 HOST_WIDE_INT offset = 0;
16402 if (GET_CODE (base) != REG
16403 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16405 /* Ensure that BASE is a register. */
16406 /* (one of them must be). */
16407 /* Also ensure the SP is not used as in index register. */
16412 switch (GET_CODE (index))
16415 offset = INTVAL (index);
16418 asm_fprintf (stream, "[%r, #%wd]",
16419 REGNO (base), offset);
16423 asm_fprintf (stream, "[%r, %s%r]",
16424 REGNO (base), is_minus ? "-" : "",
16434 asm_fprintf (stream, "[%r, %s%r",
16435 REGNO (base), is_minus ? "-" : "",
16436 REGNO (XEXP (index, 0)));
16437 arm_print_operand (stream, index, 'S');
16438 fputs ("]", stream);
16443 gcc_unreachable ();
16446 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16447 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16449 extern enum machine_mode output_memory_reference_mode;
16451 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16453 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16454 asm_fprintf (stream, "[%r, #%s%d]!",
16455 REGNO (XEXP (x, 0)),
16456 GET_CODE (x) == PRE_DEC ? "-" : "",
16457 GET_MODE_SIZE (output_memory_reference_mode));
16459 asm_fprintf (stream, "[%r], #%s%d",
16460 REGNO (XEXP (x, 0)),
16461 GET_CODE (x) == POST_DEC ? "-" : "",
16462 GET_MODE_SIZE (output_memory_reference_mode));
16464 else if (GET_CODE (x) == PRE_MODIFY)
16466 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16467 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16468 asm_fprintf (stream, "#%wd]!",
16469 INTVAL (XEXP (XEXP (x, 1), 1)));
16471 asm_fprintf (stream, "%r]!",
16472 REGNO (XEXP (XEXP (x, 1), 1)));
16474 else if (GET_CODE (x) == POST_MODIFY)
16476 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16477 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16478 asm_fprintf (stream, "#%wd",
16479 INTVAL (XEXP (XEXP (x, 1), 1)));
16481 asm_fprintf (stream, "%r",
16482 REGNO (XEXP (XEXP (x, 1), 1)));
16484 else output_addr_const (stream, x);
16488 if (GET_CODE (x) == REG)
16489 asm_fprintf (stream, "[%r]", REGNO (x));
16490 else if (GET_CODE (x) == POST_INC)
16491 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16492 else if (GET_CODE (x) == PLUS)
16494 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16495 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16496 asm_fprintf (stream, "[%r, #%wd]",
16497 REGNO (XEXP (x, 0)),
16498 INTVAL (XEXP (x, 1)));
16500 asm_fprintf (stream, "[%r, %r]",
16501 REGNO (XEXP (x, 0)),
16502 REGNO (XEXP (x, 1)));
16505 output_addr_const (stream, x);
16509 /* Target hook for indicating whether a punctuation character for
16510 TARGET_PRINT_OPERAND is valid. */
16512 arm_print_operand_punct_valid_p (unsigned char code)
16514 return (code == '@' || code == '|' || code == '.'
16515 || code == '(' || code == ')' || code == '#'
16516 || (TARGET_32BIT && (code == '?'))
16517 || (TARGET_THUMB2 && (code == '!'))
16518 || (TARGET_THUMB && (code == '_')));
16521 /* Target hook for assembling integer objects. The ARM version needs to
16522 handle word-sized values specially. */
16524 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16526 enum machine_mode mode;
16528 if (size == UNITS_PER_WORD && aligned_p)
16530 fputs ("\t.word\t", asm_out_file);
16531 output_addr_const (asm_out_file, x);
16533 /* Mark symbols as position independent. We only do this in the
16534 .text segment, not in the .data segment. */
16535 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16536 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16538 /* See legitimize_pic_address for an explanation of the
16539 TARGET_VXWORKS_RTP check. */
16540 if (TARGET_VXWORKS_RTP
16541 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16542 fputs ("(GOT)", asm_out_file);
16544 fputs ("(GOTOFF)", asm_out_file);
16546 fputc ('\n', asm_out_file);
16550 mode = GET_MODE (x);
16552 if (arm_vector_mode_supported_p (mode))
16556 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16558 units = CONST_VECTOR_NUNITS (x);
16559 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16561 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16562 for (i = 0; i < units; i++)
16564 rtx elt = CONST_VECTOR_ELT (x, i);
16566 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16569 for (i = 0; i < units; i++)
16571 rtx elt = CONST_VECTOR_ELT (x, i);
16572 REAL_VALUE_TYPE rval;
16574 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16577 (rval, GET_MODE_INNER (mode),
16578 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16584 return default_assemble_integer (x, size, aligned_p);
16588 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16592 if (!TARGET_AAPCS_BASED)
16595 default_named_section_asm_out_constructor
16596 : default_named_section_asm_out_destructor) (symbol, priority);
16600 /* Put these in the .init_array section, using a special relocation. */
16601 if (priority != DEFAULT_INIT_PRIORITY)
16604 sprintf (buf, "%s.%.5u",
16605 is_ctor ? ".init_array" : ".fini_array",
16607 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16614 switch_to_section (s);
16615 assemble_align (POINTER_SIZE);
16616 fputs ("\t.word\t", asm_out_file);
16617 output_addr_const (asm_out_file, symbol);
16618 fputs ("(target1)\n", asm_out_file);
16621 /* Add a function to the list of static constructors. */
16624 arm_elf_asm_constructor (rtx symbol, int priority)
16626 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16629 /* Add a function to the list of static destructors. */
16632 arm_elf_asm_destructor (rtx symbol, int priority)
16634 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16637 /* A finite state machine takes care of noticing whether or not instructions
16638 can be conditionally executed, and thus decrease execution time and code
16639 size by deleting branch instructions. The fsm is controlled by
16640 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16642 /* The state of the fsm controlling condition codes are:
16643 0: normal, do nothing special
16644 1: make ASM_OUTPUT_OPCODE not output this instruction
16645 2: make ASM_OUTPUT_OPCODE not output this instruction
16646 3: make instructions conditional
16647 4: make instructions conditional
16649 State transitions (state->state by whom under condition):
16650 0 -> 1 final_prescan_insn if the `target' is a label
16651 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16652 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16653 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16654 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16655 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16656 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16657 (the target insn is arm_target_insn).
16659 If the jump clobbers the conditions then we use states 2 and 4.
16661 A similar thing can be done with conditional return insns.
16663 XXX In case the `target' is an unconditional branch, this conditionalising
16664 of the instructions always reduces code size, but not always execution
16665 time. But then, I want to reduce the code size to somewhere near what
16666 /bin/cc produces. */
16668 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16669 instructions. When a COND_EXEC instruction is seen the subsequent
16670 instructions are scanned so that multiple conditional instructions can be
16671 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16672 specify the length and true/false mask for the IT block. These will be
16673 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16675 /* Returns the index of the ARM condition code string in
16676 `arm_condition_codes'. COMPARISON should be an rtx like
16677 `(eq (...) (...))'. */
16678 static enum arm_cond_code
16679 get_arm_condition_code (rtx comparison)
16681 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16682 enum arm_cond_code code;
16683 enum rtx_code comp_code = GET_CODE (comparison);
16685 if (GET_MODE_CLASS (mode) != MODE_CC)
16686 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16687 XEXP (comparison, 1));
16691 case CC_DNEmode: code = ARM_NE; goto dominance;
16692 case CC_DEQmode: code = ARM_EQ; goto dominance;
16693 case CC_DGEmode: code = ARM_GE; goto dominance;
16694 case CC_DGTmode: code = ARM_GT; goto dominance;
16695 case CC_DLEmode: code = ARM_LE; goto dominance;
16696 case CC_DLTmode: code = ARM_LT; goto dominance;
16697 case CC_DGEUmode: code = ARM_CS; goto dominance;
16698 case CC_DGTUmode: code = ARM_HI; goto dominance;
16699 case CC_DLEUmode: code = ARM_LS; goto dominance;
16700 case CC_DLTUmode: code = ARM_CC;
16703 gcc_assert (comp_code == EQ || comp_code == NE);
16705 if (comp_code == EQ)
16706 return ARM_INVERSE_CONDITION_CODE (code);
16712 case NE: return ARM_NE;
16713 case EQ: return ARM_EQ;
16714 case GE: return ARM_PL;
16715 case LT: return ARM_MI;
16716 default: gcc_unreachable ();
16722 case NE: return ARM_NE;
16723 case EQ: return ARM_EQ;
16724 default: gcc_unreachable ();
16730 case NE: return ARM_MI;
16731 case EQ: return ARM_PL;
16732 default: gcc_unreachable ();
16737 /* These encodings assume that AC=1 in the FPA system control
16738 byte. This allows us to handle all cases except UNEQ and
16742 case GE: return ARM_GE;
16743 case GT: return ARM_GT;
16744 case LE: return ARM_LS;
16745 case LT: return ARM_MI;
16746 case NE: return ARM_NE;
16747 case EQ: return ARM_EQ;
16748 case ORDERED: return ARM_VC;
16749 case UNORDERED: return ARM_VS;
16750 case UNLT: return ARM_LT;
16751 case UNLE: return ARM_LE;
16752 case UNGT: return ARM_HI;
16753 case UNGE: return ARM_PL;
16754 /* UNEQ and LTGT do not have a representation. */
16755 case UNEQ: /* Fall through. */
16756 case LTGT: /* Fall through. */
16757 default: gcc_unreachable ();
16763 case NE: return ARM_NE;
16764 case EQ: return ARM_EQ;
16765 case GE: return ARM_LE;
16766 case GT: return ARM_LT;
16767 case LE: return ARM_GE;
16768 case LT: return ARM_GT;
16769 case GEU: return ARM_LS;
16770 case GTU: return ARM_CC;
16771 case LEU: return ARM_CS;
16772 case LTU: return ARM_HI;
16773 default: gcc_unreachable ();
16779 case LTU: return ARM_CS;
16780 case GEU: return ARM_CC;
16781 default: gcc_unreachable ();
16787 case NE: return ARM_NE;
16788 case EQ: return ARM_EQ;
16789 case GEU: return ARM_CS;
16790 case GTU: return ARM_HI;
16791 case LEU: return ARM_LS;
16792 case LTU: return ARM_CC;
16793 default: gcc_unreachable ();
16799 case GE: return ARM_GE;
16800 case LT: return ARM_LT;
16801 case GEU: return ARM_CS;
16802 case LTU: return ARM_CC;
16803 default: gcc_unreachable ();
16809 case NE: return ARM_NE;
16810 case EQ: return ARM_EQ;
16811 case GE: return ARM_GE;
16812 case GT: return ARM_GT;
16813 case LE: return ARM_LE;
16814 case LT: return ARM_LT;
16815 case GEU: return ARM_CS;
16816 case GTU: return ARM_HI;
16817 case LEU: return ARM_LS;
16818 case LTU: return ARM_CC;
16819 default: gcc_unreachable ();
16822 default: gcc_unreachable ();
16826 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16829 thumb2_final_prescan_insn (rtx insn)
16831 rtx first_insn = insn;
16832 rtx body = PATTERN (insn);
16834 enum arm_cond_code code;
16838 /* Remove the previous insn from the count of insns to be output. */
16839 if (arm_condexec_count)
16840 arm_condexec_count--;
16842 /* Nothing to do if we are already inside a conditional block. */
16843 if (arm_condexec_count)
16846 if (GET_CODE (body) != COND_EXEC)
16849 /* Conditional jumps are implemented directly. */
16850 if (GET_CODE (insn) == JUMP_INSN)
16853 predicate = COND_EXEC_TEST (body);
16854 arm_current_cc = get_arm_condition_code (predicate);
16856 n = get_attr_ce_count (insn);
16857 arm_condexec_count = 1;
16858 arm_condexec_mask = (1 << n) - 1;
16859 arm_condexec_masklen = n;
16860 /* See if subsequent instructions can be combined into the same block. */
16863 insn = next_nonnote_insn (insn);
16865 /* Jumping into the middle of an IT block is illegal, so a label or
16866 barrier terminates the block. */
16867 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16870 body = PATTERN (insn);
16871 /* USE and CLOBBER aren't really insns, so just skip them. */
16872 if (GET_CODE (body) == USE
16873 || GET_CODE (body) == CLOBBER)
16876 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16877 if (GET_CODE (body) != COND_EXEC)
16879 /* Allow up to 4 conditionally executed instructions in a block. */
16880 n = get_attr_ce_count (insn);
16881 if (arm_condexec_masklen + n > 4)
16884 predicate = COND_EXEC_TEST (body);
16885 code = get_arm_condition_code (predicate);
16886 mask = (1 << n) - 1;
16887 if (arm_current_cc == code)
16888 arm_condexec_mask |= (mask << arm_condexec_masklen);
16889 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16892 arm_condexec_count++;
16893 arm_condexec_masklen += n;
16895 /* A jump must be the last instruction in a conditional block. */
16896 if (GET_CODE(insn) == JUMP_INSN)
16899 /* Restore recog_data (getting the attributes of other insns can
16900 destroy this array, but final.c assumes that it remains intact
16901 across this call). */
16902 extract_constrain_insn_cached (first_insn);
16906 arm_final_prescan_insn (rtx insn)
16908 /* BODY will hold the body of INSN. */
16909 rtx body = PATTERN (insn);
16911 /* This will be 1 if trying to repeat the trick, and things need to be
16912 reversed if it appears to fail. */
16915 /* If we start with a return insn, we only succeed if we find another one. */
16916 int seeking_return = 0;
16918 /* START_INSN will hold the insn from where we start looking. This is the
16919 first insn after the following code_label if REVERSE is true. */
16920 rtx start_insn = insn;
16922 /* If in state 4, check if the target branch is reached, in order to
16923 change back to state 0. */
16924 if (arm_ccfsm_state == 4)
16926 if (insn == arm_target_insn)
16928 arm_target_insn = NULL;
16929 arm_ccfsm_state = 0;
16934 /* If in state 3, it is possible to repeat the trick, if this insn is an
16935 unconditional branch to a label, and immediately following this branch
16936 is the previous target label which is only used once, and the label this
16937 branch jumps to is not too far off. */
16938 if (arm_ccfsm_state == 3)
16940 if (simplejump_p (insn))
16942 start_insn = next_nonnote_insn (start_insn);
16943 if (GET_CODE (start_insn) == BARRIER)
16945 /* XXX Isn't this always a barrier? */
16946 start_insn = next_nonnote_insn (start_insn);
16948 if (GET_CODE (start_insn) == CODE_LABEL
16949 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16950 && LABEL_NUSES (start_insn) == 1)
16955 else if (GET_CODE (body) == RETURN)
16957 start_insn = next_nonnote_insn (start_insn);
16958 if (GET_CODE (start_insn) == BARRIER)
16959 start_insn = next_nonnote_insn (start_insn);
16960 if (GET_CODE (start_insn) == CODE_LABEL
16961 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16962 && LABEL_NUSES (start_insn) == 1)
16965 seeking_return = 1;
16974 gcc_assert (!arm_ccfsm_state || reverse);
16975 if (GET_CODE (insn) != JUMP_INSN)
16978 /* This jump might be paralleled with a clobber of the condition codes
16979 the jump should always come first */
16980 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16981 body = XVECEXP (body, 0, 0);
16984 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16985 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16988 int fail = FALSE, succeed = FALSE;
16989 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16990 int then_not_else = TRUE;
16991 rtx this_insn = start_insn, label = 0;
16993 /* Register the insn jumped to. */
16996 if (!seeking_return)
16997 label = XEXP (SET_SRC (body), 0);
16999 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17000 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17001 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17003 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17004 then_not_else = FALSE;
17006 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17007 seeking_return = 1;
17008 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17010 seeking_return = 1;
17011 then_not_else = FALSE;
17014 gcc_unreachable ();
17016 /* See how many insns this branch skips, and what kind of insns. If all
17017 insns are okay, and the label or unconditional branch to the same
17018 label is not too far away, succeed. */
17019 for (insns_skipped = 0;
17020 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17024 this_insn = next_nonnote_insn (this_insn);
17028 switch (GET_CODE (this_insn))
17031 /* Succeed if it is the target label, otherwise fail since
17032 control falls in from somewhere else. */
17033 if (this_insn == label)
17035 arm_ccfsm_state = 1;
17043 /* Succeed if the following insn is the target label.
17045 If return insns are used then the last insn in a function
17046 will be a barrier. */
17047 this_insn = next_nonnote_insn (this_insn);
17048 if (this_insn && this_insn == label)
17050 arm_ccfsm_state = 1;
17058 /* The AAPCS says that conditional calls should not be
17059 used since they make interworking inefficient (the
17060 linker can't transform BL<cond> into BLX). That's
17061 only a problem if the machine has BLX. */
17068 /* Succeed if the following insn is the target label, or
17069 if the following two insns are a barrier and the
17071 this_insn = next_nonnote_insn (this_insn);
17072 if (this_insn && GET_CODE (this_insn) == BARRIER)
17073 this_insn = next_nonnote_insn (this_insn);
17075 if (this_insn && this_insn == label
17076 && insns_skipped < max_insns_skipped)
17078 arm_ccfsm_state = 1;
17086 /* If this is an unconditional branch to the same label, succeed.
17087 If it is to another label, do nothing. If it is conditional,
17089 /* XXX Probably, the tests for SET and the PC are
17092 scanbody = PATTERN (this_insn);
17093 if (GET_CODE (scanbody) == SET
17094 && GET_CODE (SET_DEST (scanbody)) == PC)
17096 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17097 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17099 arm_ccfsm_state = 2;
17102 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17105 /* Fail if a conditional return is undesirable (e.g. on a
17106 StrongARM), but still allow this if optimizing for size. */
17107 else if (GET_CODE (scanbody) == RETURN
17108 && !use_return_insn (TRUE, NULL)
17111 else if (GET_CODE (scanbody) == RETURN
17114 arm_ccfsm_state = 2;
17117 else if (GET_CODE (scanbody) == PARALLEL)
17119 switch (get_attr_conds (this_insn))
17129 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17134 /* Instructions using or affecting the condition codes make it
17136 scanbody = PATTERN (this_insn);
17137 if (!(GET_CODE (scanbody) == SET
17138 || GET_CODE (scanbody) == PARALLEL)
17139 || get_attr_conds (this_insn) != CONDS_NOCOND)
17142 /* A conditional cirrus instruction must be followed by
17143 a non Cirrus instruction. However, since we
17144 conditionalize instructions in this function and by
17145 the time we get here we can't add instructions
17146 (nops), because shorten_branches() has already been
17147 called, we will disable conditionalizing Cirrus
17148 instructions to be safe. */
17149 if (GET_CODE (scanbody) != USE
17150 && GET_CODE (scanbody) != CLOBBER
17151 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17161 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17162 arm_target_label = CODE_LABEL_NUMBER (label);
17165 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17167 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17169 this_insn = next_nonnote_insn (this_insn);
17170 gcc_assert (!this_insn
17171 || (GET_CODE (this_insn) != BARRIER
17172 && GET_CODE (this_insn) != CODE_LABEL));
17176 /* Oh, dear! we ran off the end.. give up. */
17177 extract_constrain_insn_cached (insn);
17178 arm_ccfsm_state = 0;
17179 arm_target_insn = NULL;
17182 arm_target_insn = this_insn;
17185 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17188 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17190 if (reverse || then_not_else)
17191 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17194 /* Restore recog_data (getting the attributes of other insns can
17195 destroy this array, but final.c assumes that it remains intact
17196 across this call. */
17197 extract_constrain_insn_cached (insn);
17201 /* Output IT instructions. */
17203 thumb2_asm_output_opcode (FILE * stream)
17208 if (arm_condexec_mask)
17210 for (n = 0; n < arm_condexec_masklen; n++)
17211 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17213 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17214 arm_condition_codes[arm_current_cc]);
17215 arm_condexec_mask = 0;
17219 /* Returns true if REGNO is a valid register
17220 for holding a quantity of type MODE. */
17222 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17224 if (GET_MODE_CLASS (mode) == MODE_CC)
17225 return (regno == CC_REGNUM
17226 || (TARGET_HARD_FLOAT && TARGET_VFP
17227 && regno == VFPCC_REGNUM));
17230 /* For the Thumb we only allow values bigger than SImode in
17231 registers 0 - 6, so that there is always a second low
17232 register available to hold the upper part of the value.
17233 We probably we ought to ensure that the register is the
17234 start of an even numbered register pair. */
17235 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17237 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17238 && IS_CIRRUS_REGNUM (regno))
17239 /* We have outlawed SI values in Cirrus registers because they
17240 reside in the lower 32 bits, but SF values reside in the
17241 upper 32 bits. This causes gcc all sorts of grief. We can't
17242 even split the registers into pairs because Cirrus SI values
17243 get sign extended to 64bits-- aldyh. */
17244 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17246 if (TARGET_HARD_FLOAT && TARGET_VFP
17247 && IS_VFP_REGNUM (regno))
17249 if (mode == SFmode || mode == SImode)
17250 return VFP_REGNO_OK_FOR_SINGLE (regno);
17252 if (mode == DFmode)
17253 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17255 /* VFP registers can hold HFmode values, but there is no point in
17256 putting them there unless we have hardware conversion insns. */
17257 if (mode == HFmode)
17258 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17261 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17262 || (VALID_NEON_QREG_MODE (mode)
17263 && NEON_REGNO_OK_FOR_QUAD (regno))
17264 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17265 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17266 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17267 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17268 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17273 if (TARGET_REALLY_IWMMXT)
17275 if (IS_IWMMXT_GR_REGNUM (regno))
17276 return mode == SImode;
17278 if (IS_IWMMXT_REGNUM (regno))
17279 return VALID_IWMMXT_REG_MODE (mode);
17282 /* We allow almost any value to be stored in the general registers.
17283 Restrict doubleword quantities to even register pairs so that we can
17284 use ldrd. Do not allow very large Neon structure opaque modes in
17285 general registers; they would use too many. */
17286 if (regno <= LAST_ARM_REGNUM)
17287 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17288 && ARM_NUM_REGS (mode) <= 4;
17290 if (regno == FRAME_POINTER_REGNUM
17291 || regno == ARG_POINTER_REGNUM)
17292 /* We only allow integers in the fake hard registers. */
17293 return GET_MODE_CLASS (mode) == MODE_INT;
17295 /* The only registers left are the FPA registers
17296 which we only allow to hold FP values. */
17297 return (TARGET_HARD_FLOAT && TARGET_FPA
17298 && GET_MODE_CLASS (mode) == MODE_FLOAT
17299 && regno >= FIRST_FPA_REGNUM
17300 && regno <= LAST_FPA_REGNUM);
17303 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17304 not used in arm mode. */
17307 arm_regno_class (int regno)
17311 if (regno == STACK_POINTER_REGNUM)
17313 if (regno == CC_REGNUM)
17320 if (TARGET_THUMB2 && regno < 8)
17323 if ( regno <= LAST_ARM_REGNUM
17324 || regno == FRAME_POINTER_REGNUM
17325 || regno == ARG_POINTER_REGNUM)
17326 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17328 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17329 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17331 if (IS_CIRRUS_REGNUM (regno))
17332 return CIRRUS_REGS;
17334 if (IS_VFP_REGNUM (regno))
17336 if (regno <= D7_VFP_REGNUM)
17337 return VFP_D0_D7_REGS;
17338 else if (regno <= LAST_LO_VFP_REGNUM)
17339 return VFP_LO_REGS;
17341 return VFP_HI_REGS;
17344 if (IS_IWMMXT_REGNUM (regno))
17345 return IWMMXT_REGS;
17347 if (IS_IWMMXT_GR_REGNUM (regno))
17348 return IWMMXT_GR_REGS;
17353 /* Handle a special case when computing the offset
17354 of an argument from the frame pointer. */
17356 arm_debugger_arg_offset (int value, rtx addr)
17360 /* We are only interested if dbxout_parms() failed to compute the offset. */
17364 /* We can only cope with the case where the address is held in a register. */
17365 if (GET_CODE (addr) != REG)
17368 /* If we are using the frame pointer to point at the argument, then
17369 an offset of 0 is correct. */
17370 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17373 /* If we are using the stack pointer to point at the
17374 argument, then an offset of 0 is correct. */
17375 /* ??? Check this is consistent with thumb2 frame layout. */
17376 if ((TARGET_THUMB || !frame_pointer_needed)
17377 && REGNO (addr) == SP_REGNUM)
17380 /* Oh dear. The argument is pointed to by a register rather
17381 than being held in a register, or being stored at a known
17382 offset from the frame pointer. Since GDB only understands
17383 those two kinds of argument we must translate the address
17384 held in the register into an offset from the frame pointer.
17385 We do this by searching through the insns for the function
17386 looking to see where this register gets its value. If the
17387 register is initialized from the frame pointer plus an offset
17388 then we are in luck and we can continue, otherwise we give up.
17390 This code is exercised by producing debugging information
17391 for a function with arguments like this:
17393 double func (double a, double b, int c, double d) {return d;}
17395 Without this code the stab for parameter 'd' will be set to
17396 an offset of 0 from the frame pointer, rather than 8. */
17398 /* The if() statement says:
17400 If the insn is a normal instruction
17401 and if the insn is setting the value in a register
17402 and if the register being set is the register holding the address of the argument
17403 and if the address is computing by an addition
17404 that involves adding to a register
17405 which is the frame pointer
17410 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17412 if ( GET_CODE (insn) == INSN
17413 && GET_CODE (PATTERN (insn)) == SET
17414 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17415 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17416 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17417 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17418 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17421 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17430 warning (0, "unable to compute real location of stacked parameter");
17431 value = 8; /* XXX magic hack */
17437 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17440 if ((MASK) & insn_flags) \
17441 add_builtin_function ((NAME), (TYPE), (CODE), \
17442 BUILT_IN_MD, NULL, NULL_TREE); \
17446 struct builtin_description
17448 const unsigned int mask;
17449 const enum insn_code icode;
17450 const char * const name;
17451 const enum arm_builtins code;
17452 const enum rtx_code comparison;
17453 const unsigned int flag;
17456 static const struct builtin_description bdesc_2arg[] =
17458 #define IWMMXT_BUILTIN(code, string, builtin) \
17459 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17460 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17462 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17463 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17464 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17465 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17466 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17467 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17468 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17469 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17470 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17471 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17472 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17473 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17474 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17475 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17476 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17477 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17478 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17479 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17480 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17481 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17482 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17483 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17484 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17485 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17486 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17487 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17488 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17489 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17490 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17491 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17492 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17493 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17494 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17495 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17496 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17497 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17498 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17499 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17500 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17501 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17502 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17503 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17504 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17505 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17506 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17507 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17508 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17509 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17510 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17511 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17512 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17513 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17514 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17515 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17516 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17517 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17518 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17519 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17521 #define IWMMXT_BUILTIN2(code, builtin) \
17522 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17524 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17525 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17526 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17527 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17528 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17529 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17530 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17531 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17532 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17533 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17534 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17535 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17536 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17537 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17538 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17539 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17540 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17541 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17542 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17543 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17544 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17545 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17546 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17547 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17548 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17549 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17550 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17551 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17552 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17553 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17554 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17555 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17558 static const struct builtin_description bdesc_1arg[] =
17560 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17561 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17562 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17563 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17564 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17565 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17566 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17567 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17568 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17569 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17570 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17571 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17572 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17573 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17574 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17575 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17576 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17577 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17580 /* Set up all the iWMMXt builtins. This is
17581 not called if TARGET_IWMMXT is zero. */
17584 arm_init_iwmmxt_builtins (void)
17586 const struct builtin_description * d;
17588 tree endlink = void_list_node;
17590 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17591 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17592 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17595 = build_function_type (integer_type_node,
17596 tree_cons (NULL_TREE, integer_type_node, endlink));
17597 tree v8qi_ftype_v8qi_v8qi_int
17598 = build_function_type (V8QI_type_node,
17599 tree_cons (NULL_TREE, V8QI_type_node,
17600 tree_cons (NULL_TREE, V8QI_type_node,
17601 tree_cons (NULL_TREE,
17604 tree v4hi_ftype_v4hi_int
17605 = build_function_type (V4HI_type_node,
17606 tree_cons (NULL_TREE, V4HI_type_node,
17607 tree_cons (NULL_TREE, integer_type_node,
17609 tree v2si_ftype_v2si_int
17610 = build_function_type (V2SI_type_node,
17611 tree_cons (NULL_TREE, V2SI_type_node,
17612 tree_cons (NULL_TREE, integer_type_node,
17614 tree v2si_ftype_di_di
17615 = build_function_type (V2SI_type_node,
17616 tree_cons (NULL_TREE, long_long_integer_type_node,
17617 tree_cons (NULL_TREE, long_long_integer_type_node,
17619 tree di_ftype_di_int
17620 = build_function_type (long_long_integer_type_node,
17621 tree_cons (NULL_TREE, long_long_integer_type_node,
17622 tree_cons (NULL_TREE, integer_type_node,
17624 tree di_ftype_di_int_int
17625 = build_function_type (long_long_integer_type_node,
17626 tree_cons (NULL_TREE, long_long_integer_type_node,
17627 tree_cons (NULL_TREE, integer_type_node,
17628 tree_cons (NULL_TREE,
17631 tree int_ftype_v8qi
17632 = build_function_type (integer_type_node,
17633 tree_cons (NULL_TREE, V8QI_type_node,
17635 tree int_ftype_v4hi
17636 = build_function_type (integer_type_node,
17637 tree_cons (NULL_TREE, V4HI_type_node,
17639 tree int_ftype_v2si
17640 = build_function_type (integer_type_node,
17641 tree_cons (NULL_TREE, V2SI_type_node,
17643 tree int_ftype_v8qi_int
17644 = build_function_type (integer_type_node,
17645 tree_cons (NULL_TREE, V8QI_type_node,
17646 tree_cons (NULL_TREE, integer_type_node,
17648 tree int_ftype_v4hi_int
17649 = build_function_type (integer_type_node,
17650 tree_cons (NULL_TREE, V4HI_type_node,
17651 tree_cons (NULL_TREE, integer_type_node,
17653 tree int_ftype_v2si_int
17654 = build_function_type (integer_type_node,
17655 tree_cons (NULL_TREE, V2SI_type_node,
17656 tree_cons (NULL_TREE, integer_type_node,
17658 tree v8qi_ftype_v8qi_int_int
17659 = build_function_type (V8QI_type_node,
17660 tree_cons (NULL_TREE, V8QI_type_node,
17661 tree_cons (NULL_TREE, integer_type_node,
17662 tree_cons (NULL_TREE,
17665 tree v4hi_ftype_v4hi_int_int
17666 = build_function_type (V4HI_type_node,
17667 tree_cons (NULL_TREE, V4HI_type_node,
17668 tree_cons (NULL_TREE, integer_type_node,
17669 tree_cons (NULL_TREE,
17672 tree v2si_ftype_v2si_int_int
17673 = build_function_type (V2SI_type_node,
17674 tree_cons (NULL_TREE, V2SI_type_node,
17675 tree_cons (NULL_TREE, integer_type_node,
17676 tree_cons (NULL_TREE,
17679 /* Miscellaneous. */
17680 tree v8qi_ftype_v4hi_v4hi
17681 = build_function_type (V8QI_type_node,
17682 tree_cons (NULL_TREE, V4HI_type_node,
17683 tree_cons (NULL_TREE, V4HI_type_node,
17685 tree v4hi_ftype_v2si_v2si
17686 = build_function_type (V4HI_type_node,
17687 tree_cons (NULL_TREE, V2SI_type_node,
17688 tree_cons (NULL_TREE, V2SI_type_node,
17690 tree v2si_ftype_v4hi_v4hi
17691 = build_function_type (V2SI_type_node,
17692 tree_cons (NULL_TREE, V4HI_type_node,
17693 tree_cons (NULL_TREE, V4HI_type_node,
17695 tree v2si_ftype_v8qi_v8qi
17696 = build_function_type (V2SI_type_node,
17697 tree_cons (NULL_TREE, V8QI_type_node,
17698 tree_cons (NULL_TREE, V8QI_type_node,
17700 tree v4hi_ftype_v4hi_di
17701 = build_function_type (V4HI_type_node,
17702 tree_cons (NULL_TREE, V4HI_type_node,
17703 tree_cons (NULL_TREE,
17704 long_long_integer_type_node,
17706 tree v2si_ftype_v2si_di
17707 = build_function_type (V2SI_type_node,
17708 tree_cons (NULL_TREE, V2SI_type_node,
17709 tree_cons (NULL_TREE,
17710 long_long_integer_type_node,
17712 tree void_ftype_int_int
17713 = build_function_type (void_type_node,
17714 tree_cons (NULL_TREE, integer_type_node,
17715 tree_cons (NULL_TREE, integer_type_node,
17718 = build_function_type (long_long_unsigned_type_node, endlink);
17720 = build_function_type (long_long_integer_type_node,
17721 tree_cons (NULL_TREE, V8QI_type_node,
17724 = build_function_type (long_long_integer_type_node,
17725 tree_cons (NULL_TREE, V4HI_type_node,
17728 = build_function_type (long_long_integer_type_node,
17729 tree_cons (NULL_TREE, V2SI_type_node,
17731 tree v2si_ftype_v4hi
17732 = build_function_type (V2SI_type_node,
17733 tree_cons (NULL_TREE, V4HI_type_node,
17735 tree v4hi_ftype_v8qi
17736 = build_function_type (V4HI_type_node,
17737 tree_cons (NULL_TREE, V8QI_type_node,
17740 tree di_ftype_di_v4hi_v4hi
17741 = build_function_type (long_long_unsigned_type_node,
17742 tree_cons (NULL_TREE,
17743 long_long_unsigned_type_node,
17744 tree_cons (NULL_TREE, V4HI_type_node,
17745 tree_cons (NULL_TREE,
17749 tree di_ftype_v4hi_v4hi
17750 = build_function_type (long_long_unsigned_type_node,
17751 tree_cons (NULL_TREE, V4HI_type_node,
17752 tree_cons (NULL_TREE, V4HI_type_node,
17755 /* Normal vector binops. */
17756 tree v8qi_ftype_v8qi_v8qi
17757 = build_function_type (V8QI_type_node,
17758 tree_cons (NULL_TREE, V8QI_type_node,
17759 tree_cons (NULL_TREE, V8QI_type_node,
17761 tree v4hi_ftype_v4hi_v4hi
17762 = build_function_type (V4HI_type_node,
17763 tree_cons (NULL_TREE, V4HI_type_node,
17764 tree_cons (NULL_TREE, V4HI_type_node,
17766 tree v2si_ftype_v2si_v2si
17767 = build_function_type (V2SI_type_node,
17768 tree_cons (NULL_TREE, V2SI_type_node,
17769 tree_cons (NULL_TREE, V2SI_type_node,
17771 tree di_ftype_di_di
17772 = build_function_type (long_long_unsigned_type_node,
17773 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17774 tree_cons (NULL_TREE,
17775 long_long_unsigned_type_node,
17778 /* Add all builtins that are more or less simple operations on two
17780 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17782 /* Use one of the operands; the target can have a different mode for
17783 mask-generating compares. */
17784 enum machine_mode mode;
17790 mode = insn_data[d->icode].operand[1].mode;
17795 type = v8qi_ftype_v8qi_v8qi;
17798 type = v4hi_ftype_v4hi_v4hi;
17801 type = v2si_ftype_v2si_v2si;
17804 type = di_ftype_di_di;
17808 gcc_unreachable ();
17811 def_mbuiltin (d->mask, d->name, type, d->code);
17814 /* Add the remaining MMX insns with somewhat more complicated types. */
17815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17817 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17819 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17820 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17821 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17824 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17826 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17828 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17834 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17835 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17836 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17837 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17838 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17840 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17842 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17843 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17844 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17849 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17851 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17852 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17854 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17855 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17856 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17857 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17858 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17859 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17860 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17861 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17862 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17864 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17865 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17866 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17868 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17869 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17870 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17872 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17874 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17875 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17876 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17877 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17879 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17880 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17881 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17882 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17883 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17884 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17885 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17886 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17887 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17888 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17889 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17890 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17892 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17893 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17894 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17895 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17897 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17898 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17899 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17900 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17901 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17902 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17903 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17907 arm_init_tls_builtins (void)
17911 ftype = build_function_type (ptr_type_node, void_list_node);
17912 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17913 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17915 TREE_NOTHROW (decl) = 1;
17916 TREE_READONLY (decl) = 1;
17919 enum neon_builtin_type_bits {
17935 #define v8qi_UP T_V8QI
17936 #define v4hi_UP T_V4HI
17937 #define v2si_UP T_V2SI
17938 #define v2sf_UP T_V2SF
17940 #define v16qi_UP T_V16QI
17941 #define v8hi_UP T_V8HI
17942 #define v4si_UP T_V4SI
17943 #define v4sf_UP T_V4SF
17944 #define v2di_UP T_V2DI
17949 #define UP(X) X##_UP
17984 NEON_LOADSTRUCTLANE,
17986 NEON_STORESTRUCTLANE,
17995 const neon_itype itype;
17997 const enum insn_code codes[T_MAX];
17998 const unsigned int num_vars;
17999 unsigned int base_fcode;
18000 } neon_builtin_datum;
18002 #define CF(N,X) CODE_FOR_neon_##N##X
18004 #define VAR1(T, N, A) \
18005 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18006 #define VAR2(T, N, A, B) \
18007 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18008 #define VAR3(T, N, A, B, C) \
18009 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18010 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18011 #define VAR4(T, N, A, B, C, D) \
18012 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18013 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18014 #define VAR5(T, N, A, B, C, D, E) \
18015 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18016 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18017 #define VAR6(T, N, A, B, C, D, E, F) \
18018 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18019 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18020 #define VAR7(T, N, A, B, C, D, E, F, G) \
18021 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18022 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18024 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18025 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18027 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18028 CF (N, G), CF (N, H) }, 8, 0
18029 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18030 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18031 | UP (H) | UP (I), \
18032 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18033 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18034 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18035 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18036 | UP (H) | UP (I) | UP (J), \
18037 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18038 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18040 /* The mode entries in the following table correspond to the "key" type of the
18041 instruction variant, i.e. equivalent to that which would be specified after
18042 the assembler mnemonic, which usually refers to the last vector operand.
18043 (Signed/unsigned/polynomial types are not differentiated between though, and
18044 are all mapped onto the same mode for a given element size.) The modes
18045 listed per instruction should be the same as those defined for that
18046 instruction's pattern in neon.md.
18047 WARNING: Variants should be listed in the same increasing order as
18048 neon_builtin_type_bits. */
18050 static neon_builtin_datum neon_builtin_data[] =
18052 { VAR10 (BINOP, vadd,
18053 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18054 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18055 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18056 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18057 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18058 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18059 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18060 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18061 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18062 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18063 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18064 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18065 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18066 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18067 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18068 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18069 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18070 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18071 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18072 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18073 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18074 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18075 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18076 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18077 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18078 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18079 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18080 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18081 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18082 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18083 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18084 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18085 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18086 { VAR10 (BINOP, vsub,
18087 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18088 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18089 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18090 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18091 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18092 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18093 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18094 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18095 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18096 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18097 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18098 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18099 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18100 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18101 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18102 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18103 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18104 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18105 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18106 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18107 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18108 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18109 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18110 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18111 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18112 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18113 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18114 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18115 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18116 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18117 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18118 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18119 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18120 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18121 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18122 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18123 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18124 /* FIXME: vget_lane supports more variants than this! */
18125 { VAR10 (GETLANE, vget_lane,
18126 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18127 { VAR10 (SETLANE, vset_lane,
18128 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18129 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18130 { VAR10 (DUP, vdup_n,
18131 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18132 { VAR10 (DUPLANE, vdup_lane,
18133 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18134 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18135 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18136 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18137 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18138 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18139 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18140 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18141 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18142 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18143 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18144 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18145 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18146 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18147 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18148 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18149 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18150 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18151 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18152 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18153 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18154 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18155 { VAR10 (BINOP, vext,
18156 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18157 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18158 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18159 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18160 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18161 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18162 { VAR10 (SELECT, vbsl,
18163 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18164 { VAR1 (VTBL, vtbl1, v8qi) },
18165 { VAR1 (VTBL, vtbl2, v8qi) },
18166 { VAR1 (VTBL, vtbl3, v8qi) },
18167 { VAR1 (VTBL, vtbl4, v8qi) },
18168 { VAR1 (VTBX, vtbx1, v8qi) },
18169 { VAR1 (VTBX, vtbx2, v8qi) },
18170 { VAR1 (VTBX, vtbx3, v8qi) },
18171 { VAR1 (VTBX, vtbx4, v8qi) },
18172 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18173 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18174 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18175 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18176 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18177 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18178 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18179 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18180 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18181 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18182 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18183 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18184 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18185 { VAR10 (LOAD1, vld1,
18186 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18187 { VAR10 (LOAD1LANE, vld1_lane,
18188 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18189 { VAR10 (LOAD1, vld1_dup,
18190 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18191 { VAR10 (STORE1, vst1,
18192 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18193 { VAR10 (STORE1LANE, vst1_lane,
18194 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18195 { VAR9 (LOADSTRUCT,
18196 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18197 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18198 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18199 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18200 { VAR9 (STORESTRUCT, vst2,
18201 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18202 { VAR7 (STORESTRUCTLANE, vst2_lane,
18203 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18204 { VAR9 (LOADSTRUCT,
18205 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18206 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18207 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18208 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18209 { VAR9 (STORESTRUCT, vst3,
18210 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18211 { VAR7 (STORESTRUCTLANE, vst3_lane,
18212 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18213 { VAR9 (LOADSTRUCT, vld4,
18214 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18215 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18216 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18217 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18218 { VAR9 (STORESTRUCT, vst4,
18219 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18220 { VAR7 (STORESTRUCTLANE, vst4_lane,
18221 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18222 { VAR10 (LOGICBINOP, vand,
18223 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18224 { VAR10 (LOGICBINOP, vorr,
18225 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18226 { VAR10 (BINOP, veor,
18227 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18228 { VAR10 (LOGICBINOP, vbic,
18229 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18230 { VAR10 (LOGICBINOP, vorn,
18231 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18247 arm_init_neon_builtins (void)
18249 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18251 tree neon_intQI_type_node;
18252 tree neon_intHI_type_node;
18253 tree neon_polyQI_type_node;
18254 tree neon_polyHI_type_node;
18255 tree neon_intSI_type_node;
18256 tree neon_intDI_type_node;
18257 tree neon_float_type_node;
18259 tree intQI_pointer_node;
18260 tree intHI_pointer_node;
18261 tree intSI_pointer_node;
18262 tree intDI_pointer_node;
18263 tree float_pointer_node;
18265 tree const_intQI_node;
18266 tree const_intHI_node;
18267 tree const_intSI_node;
18268 tree const_intDI_node;
18269 tree const_float_node;
18271 tree const_intQI_pointer_node;
18272 tree const_intHI_pointer_node;
18273 tree const_intSI_pointer_node;
18274 tree const_intDI_pointer_node;
18275 tree const_float_pointer_node;
18277 tree V8QI_type_node;
18278 tree V4HI_type_node;
18279 tree V2SI_type_node;
18280 tree V2SF_type_node;
18281 tree V16QI_type_node;
18282 tree V8HI_type_node;
18283 tree V4SI_type_node;
18284 tree V4SF_type_node;
18285 tree V2DI_type_node;
18287 tree intUQI_type_node;
18288 tree intUHI_type_node;
18289 tree intUSI_type_node;
18290 tree intUDI_type_node;
18292 tree intEI_type_node;
18293 tree intOI_type_node;
18294 tree intCI_type_node;
18295 tree intXI_type_node;
18297 tree V8QI_pointer_node;
18298 tree V4HI_pointer_node;
18299 tree V2SI_pointer_node;
18300 tree V2SF_pointer_node;
18301 tree V16QI_pointer_node;
18302 tree V8HI_pointer_node;
18303 tree V4SI_pointer_node;
18304 tree V4SF_pointer_node;
18305 tree V2DI_pointer_node;
18307 tree void_ftype_pv8qi_v8qi_v8qi;
18308 tree void_ftype_pv4hi_v4hi_v4hi;
18309 tree void_ftype_pv2si_v2si_v2si;
18310 tree void_ftype_pv2sf_v2sf_v2sf;
18311 tree void_ftype_pdi_di_di;
18312 tree void_ftype_pv16qi_v16qi_v16qi;
18313 tree void_ftype_pv8hi_v8hi_v8hi;
18314 tree void_ftype_pv4si_v4si_v4si;
18315 tree void_ftype_pv4sf_v4sf_v4sf;
18316 tree void_ftype_pv2di_v2di_v2di;
18318 tree reinterp_ftype_dreg[5][5];
18319 tree reinterp_ftype_qreg[5][5];
18320 tree dreg_types[5], qreg_types[5];
18322 /* Create distinguished type nodes for NEON vector element types,
18323 and pointers to values of such types, so we can detect them later. */
18324 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18325 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18326 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18327 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18328 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18329 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18330 neon_float_type_node = make_node (REAL_TYPE);
18331 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18332 layout_type (neon_float_type_node);
18334 /* Define typedefs which exactly correspond to the modes we are basing vector
18335 types on. If you change these names you'll need to change
18336 the table used by arm_mangle_type too. */
18337 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18338 "__builtin_neon_qi");
18339 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18340 "__builtin_neon_hi");
18341 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18342 "__builtin_neon_si");
18343 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18344 "__builtin_neon_sf");
18345 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18346 "__builtin_neon_di");
18347 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18348 "__builtin_neon_poly8");
18349 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18350 "__builtin_neon_poly16");
18352 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18353 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18354 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18355 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18356 float_pointer_node = build_pointer_type (neon_float_type_node);
18358 /* Next create constant-qualified versions of the above types. */
18359 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18361 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18363 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18365 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18367 const_float_node = build_qualified_type (neon_float_type_node,
18370 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18371 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18372 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18373 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18374 const_float_pointer_node = build_pointer_type (const_float_node);
18376 /* Now create vector types based on our NEON element types. */
18377 /* 64-bit vectors. */
18379 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18381 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18383 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18385 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18386 /* 128-bit vectors. */
18388 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18390 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18392 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18394 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18396 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18398 /* Unsigned integer types for various mode sizes. */
18399 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18400 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18401 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18402 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18404 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18405 "__builtin_neon_uqi");
18406 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18407 "__builtin_neon_uhi");
18408 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18409 "__builtin_neon_usi");
18410 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18411 "__builtin_neon_udi");
18413 /* Opaque integer types for structures of vectors. */
18414 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18415 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18416 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18417 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18419 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18420 "__builtin_neon_ti");
18421 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18422 "__builtin_neon_ei");
18423 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18424 "__builtin_neon_oi");
18425 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18426 "__builtin_neon_ci");
18427 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18428 "__builtin_neon_xi");
18430 /* Pointers to vector types. */
18431 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18432 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18433 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18434 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18435 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18436 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18437 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18438 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18439 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18441 /* Operations which return results as pairs. */
18442 void_ftype_pv8qi_v8qi_v8qi =
18443 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18444 V8QI_type_node, NULL);
18445 void_ftype_pv4hi_v4hi_v4hi =
18446 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18447 V4HI_type_node, NULL);
18448 void_ftype_pv2si_v2si_v2si =
18449 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18450 V2SI_type_node, NULL);
18451 void_ftype_pv2sf_v2sf_v2sf =
18452 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18453 V2SF_type_node, NULL);
18454 void_ftype_pdi_di_di =
18455 build_function_type_list (void_type_node, intDI_pointer_node,
18456 neon_intDI_type_node, neon_intDI_type_node, NULL);
18457 void_ftype_pv16qi_v16qi_v16qi =
18458 build_function_type_list (void_type_node, V16QI_pointer_node,
18459 V16QI_type_node, V16QI_type_node, NULL);
18460 void_ftype_pv8hi_v8hi_v8hi =
18461 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18462 V8HI_type_node, NULL);
18463 void_ftype_pv4si_v4si_v4si =
18464 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18465 V4SI_type_node, NULL);
18466 void_ftype_pv4sf_v4sf_v4sf =
18467 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18468 V4SF_type_node, NULL);
18469 void_ftype_pv2di_v2di_v2di =
18470 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18471 V2DI_type_node, NULL);
18473 dreg_types[0] = V8QI_type_node;
18474 dreg_types[1] = V4HI_type_node;
18475 dreg_types[2] = V2SI_type_node;
18476 dreg_types[3] = V2SF_type_node;
18477 dreg_types[4] = neon_intDI_type_node;
18479 qreg_types[0] = V16QI_type_node;
18480 qreg_types[1] = V8HI_type_node;
18481 qreg_types[2] = V4SI_type_node;
18482 qreg_types[3] = V4SF_type_node;
18483 qreg_types[4] = V2DI_type_node;
18485 for (i = 0; i < 5; i++)
18488 for (j = 0; j < 5; j++)
18490 reinterp_ftype_dreg[i][j]
18491 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18492 reinterp_ftype_qreg[i][j]
18493 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18497 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18499 neon_builtin_datum *d = &neon_builtin_data[i];
18500 unsigned int j, codeidx = 0;
18502 d->base_fcode = fcode;
18504 for (j = 0; j < T_MAX; j++)
18506 const char* const modenames[] = {
18507 "v8qi", "v4hi", "v2si", "v2sf", "di",
18508 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18512 enum insn_code icode;
18513 int is_load = 0, is_store = 0;
18515 if ((d->bits & (1 << j)) == 0)
18518 icode = d->codes[codeidx++];
18523 case NEON_LOAD1LANE:
18524 case NEON_LOADSTRUCT:
18525 case NEON_LOADSTRUCTLANE:
18527 /* Fall through. */
18529 case NEON_STORE1LANE:
18530 case NEON_STORESTRUCT:
18531 case NEON_STORESTRUCTLANE:
18534 /* Fall through. */
18537 case NEON_LOGICBINOP:
18538 case NEON_SHIFTINSERT:
18545 case NEON_SHIFTIMM:
18546 case NEON_SHIFTACC:
18552 case NEON_LANEMULL:
18553 case NEON_LANEMULH:
18555 case NEON_SCALARMUL:
18556 case NEON_SCALARMULL:
18557 case NEON_SCALARMULH:
18558 case NEON_SCALARMAC:
18564 tree return_type = void_type_node, args = void_list_node;
18566 /* Build a function type directly from the insn_data for this
18567 builtin. The build_function_type() function takes care of
18568 removing duplicates for us. */
18569 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18573 if (is_load && k == 1)
18575 /* Neon load patterns always have the memory operand
18576 (a SImode pointer) in the operand 1 position. We
18577 want a const pointer to the element type in that
18579 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18585 eltype = const_intQI_pointer_node;
18590 eltype = const_intHI_pointer_node;
18595 eltype = const_intSI_pointer_node;
18600 eltype = const_float_pointer_node;
18605 eltype = const_intDI_pointer_node;
18608 default: gcc_unreachable ();
18611 else if (is_store && k == 0)
18613 /* Similarly, Neon store patterns use operand 0 as
18614 the memory location to store to (a SImode pointer).
18615 Use a pointer to the element type of the store in
18617 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18623 eltype = intQI_pointer_node;
18628 eltype = intHI_pointer_node;
18633 eltype = intSI_pointer_node;
18638 eltype = float_pointer_node;
18643 eltype = intDI_pointer_node;
18646 default: gcc_unreachable ();
18651 switch (insn_data[icode].operand[k].mode)
18653 case VOIDmode: eltype = void_type_node; break;
18655 case QImode: eltype = neon_intQI_type_node; break;
18656 case HImode: eltype = neon_intHI_type_node; break;
18657 case SImode: eltype = neon_intSI_type_node; break;
18658 case SFmode: eltype = neon_float_type_node; break;
18659 case DImode: eltype = neon_intDI_type_node; break;
18660 case TImode: eltype = intTI_type_node; break;
18661 case EImode: eltype = intEI_type_node; break;
18662 case OImode: eltype = intOI_type_node; break;
18663 case CImode: eltype = intCI_type_node; break;
18664 case XImode: eltype = intXI_type_node; break;
18665 /* 64-bit vectors. */
18666 case V8QImode: eltype = V8QI_type_node; break;
18667 case V4HImode: eltype = V4HI_type_node; break;
18668 case V2SImode: eltype = V2SI_type_node; break;
18669 case V2SFmode: eltype = V2SF_type_node; break;
18670 /* 128-bit vectors. */
18671 case V16QImode: eltype = V16QI_type_node; break;
18672 case V8HImode: eltype = V8HI_type_node; break;
18673 case V4SImode: eltype = V4SI_type_node; break;
18674 case V4SFmode: eltype = V4SF_type_node; break;
18675 case V2DImode: eltype = V2DI_type_node; break;
18676 default: gcc_unreachable ();
18680 if (k == 0 && !is_store)
18681 return_type = eltype;
18683 args = tree_cons (NULL_TREE, eltype, args);
18686 ftype = build_function_type (return_type, args);
18690 case NEON_RESULTPAIR:
18692 switch (insn_data[icode].operand[1].mode)
18694 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18695 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18696 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18697 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18698 case DImode: ftype = void_ftype_pdi_di_di; break;
18699 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18700 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18701 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18702 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18703 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18704 default: gcc_unreachable ();
18709 case NEON_REINTERP:
18711 /* We iterate over 5 doubleword types, then 5 quadword
18714 switch (insn_data[icode].operand[0].mode)
18716 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18717 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18718 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18719 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18720 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18721 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18722 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18723 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18724 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18725 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18726 default: gcc_unreachable ();
18732 gcc_unreachable ();
18735 gcc_assert (ftype != NULL);
18737 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18739 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18746 arm_init_fp16_builtins (void)
18748 tree fp16_type = make_node (REAL_TYPE);
18749 TYPE_PRECISION (fp16_type) = 16;
18750 layout_type (fp16_type);
18751 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18755 arm_init_builtins (void)
18757 arm_init_tls_builtins ();
18759 if (TARGET_REALLY_IWMMXT)
18760 arm_init_iwmmxt_builtins ();
18763 arm_init_neon_builtins ();
18765 if (arm_fp16_format)
18766 arm_init_fp16_builtins ();
18769 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18771 static const char *
18772 arm_invalid_parameter_type (const_tree t)
18774 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18775 return N_("function parameters cannot have __fp16 type");
18779 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18781 static const char *
18782 arm_invalid_return_type (const_tree t)
18784 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18785 return N_("functions cannot return __fp16 type");
18789 /* Implement TARGET_PROMOTED_TYPE. */
18792 arm_promoted_type (const_tree t)
18794 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18795 return float_type_node;
18799 /* Implement TARGET_CONVERT_TO_TYPE.
18800 Specifically, this hook implements the peculiarity of the ARM
18801 half-precision floating-point C semantics that requires conversions between
18802 __fp16 to or from double to do an intermediate conversion to float. */
18805 arm_convert_to_type (tree type, tree expr)
18807 tree fromtype = TREE_TYPE (expr);
18808 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18810 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18811 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18812 return convert (type, convert (float_type_node, expr));
18816 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18817 This simply adds HFmode as a supported mode; even though we don't
18818 implement arithmetic on this type directly, it's supported by
18819 optabs conversions, much the way the double-word arithmetic is
18820 special-cased in the default hook. */
18823 arm_scalar_mode_supported_p (enum machine_mode mode)
18825 if (mode == HFmode)
18826 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18828 return default_scalar_mode_supported_p (mode);
18831 /* Errors in the source file can cause expand_expr to return const0_rtx
18832 where we expect a vector. To avoid crashing, use one of the vector
18833 clear instructions. */
18836 safe_vector_operand (rtx x, enum machine_mode mode)
18838 if (x != const0_rtx)
18840 x = gen_reg_rtx (mode);
18842 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18843 : gen_rtx_SUBREG (DImode, x, 0)));
18847 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18850 arm_expand_binop_builtin (enum insn_code icode,
18851 tree exp, rtx target)
18854 tree arg0 = CALL_EXPR_ARG (exp, 0);
18855 tree arg1 = CALL_EXPR_ARG (exp, 1);
18856 rtx op0 = expand_normal (arg0);
18857 rtx op1 = expand_normal (arg1);
18858 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18859 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18860 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18862 if (VECTOR_MODE_P (mode0))
18863 op0 = safe_vector_operand (op0, mode0);
18864 if (VECTOR_MODE_P (mode1))
18865 op1 = safe_vector_operand (op1, mode1);
18868 || GET_MODE (target) != tmode
18869 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18870 target = gen_reg_rtx (tmode);
18872 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18875 op0 = copy_to_mode_reg (mode0, op0);
18876 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18877 op1 = copy_to_mode_reg (mode1, op1);
18879 pat = GEN_FCN (icode) (target, op0, op1);
18886 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18889 arm_expand_unop_builtin (enum insn_code icode,
18890 tree exp, rtx target, int do_load)
18893 tree arg0 = CALL_EXPR_ARG (exp, 0);
18894 rtx op0 = expand_normal (arg0);
18895 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18896 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18899 || GET_MODE (target) != tmode
18900 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18901 target = gen_reg_rtx (tmode);
18903 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18906 if (VECTOR_MODE_P (mode0))
18907 op0 = safe_vector_operand (op0, mode0);
18909 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18910 op0 = copy_to_mode_reg (mode0, op0);
18913 pat = GEN_FCN (icode) (target, op0);
18921 neon_builtin_compare (const void *a, const void *b)
18923 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18924 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18925 unsigned int soughtcode = key->base_fcode;
18927 if (soughtcode >= memb->base_fcode
18928 && soughtcode < memb->base_fcode + memb->num_vars)
18930 else if (soughtcode < memb->base_fcode)
18936 static enum insn_code
18937 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18939 neon_builtin_datum key, *found;
18942 key.base_fcode = fcode;
18943 found = (neon_builtin_datum *)
18944 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18945 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18946 gcc_assert (found);
18947 idx = fcode - (int) found->base_fcode;
18948 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18951 *itype = found->itype;
18953 return found->codes[idx];
18957 NEON_ARG_COPY_TO_REG,
18962 #define NEON_MAX_BUILTIN_ARGS 5
18964 /* Expand a Neon builtin. */
18966 arm_expand_neon_args (rtx target, int icode, int have_retval,
18971 tree arg[NEON_MAX_BUILTIN_ARGS];
18972 rtx op[NEON_MAX_BUILTIN_ARGS];
18973 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18974 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18979 || GET_MODE (target) != tmode
18980 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18981 target = gen_reg_rtx (tmode);
18983 va_start (ap, exp);
18987 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18989 if (thisarg == NEON_ARG_STOP)
18993 arg[argc] = CALL_EXPR_ARG (exp, argc);
18994 op[argc] = expand_normal (arg[argc]);
18995 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18999 case NEON_ARG_COPY_TO_REG:
19000 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19001 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19002 (op[argc], mode[argc]))
19003 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19006 case NEON_ARG_CONSTANT:
19007 /* FIXME: This error message is somewhat unhelpful. */
19008 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19009 (op[argc], mode[argc]))
19010 error ("argument must be a constant");
19013 case NEON_ARG_STOP:
19014 gcc_unreachable ();
19027 pat = GEN_FCN (icode) (target, op[0]);
19031 pat = GEN_FCN (icode) (target, op[0], op[1]);
19035 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19039 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19043 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19047 gcc_unreachable ();
19053 pat = GEN_FCN (icode) (op[0]);
19057 pat = GEN_FCN (icode) (op[0], op[1]);
19061 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19065 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19069 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19073 gcc_unreachable ();
19084 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19085 constants defined per-instruction or per instruction-variant. Instead, the
19086 required info is looked up in the table neon_builtin_data. */
19088 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19091 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19098 return arm_expand_neon_args (target, icode, 1, exp,
19099 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19103 case NEON_SCALARMUL:
19104 case NEON_SCALARMULL:
19105 case NEON_SCALARMULH:
19106 case NEON_SHIFTINSERT:
19107 case NEON_LOGICBINOP:
19108 return arm_expand_neon_args (target, icode, 1, exp,
19109 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19113 return arm_expand_neon_args (target, icode, 1, exp,
19114 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19115 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19119 case NEON_SHIFTIMM:
19120 return arm_expand_neon_args (target, icode, 1, exp,
19121 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19125 return arm_expand_neon_args (target, icode, 1, exp,
19126 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19130 case NEON_REINTERP:
19131 return arm_expand_neon_args (target, icode, 1, exp,
19132 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19136 return arm_expand_neon_args (target, icode, 1, exp,
19137 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19139 case NEON_RESULTPAIR:
19140 return arm_expand_neon_args (target, icode, 0, exp,
19141 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19145 case NEON_LANEMULL:
19146 case NEON_LANEMULH:
19147 return arm_expand_neon_args (target, icode, 1, exp,
19148 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19149 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19152 return arm_expand_neon_args (target, icode, 1, exp,
19153 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19154 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19156 case NEON_SHIFTACC:
19157 return arm_expand_neon_args (target, icode, 1, exp,
19158 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19159 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19161 case NEON_SCALARMAC:
19162 return arm_expand_neon_args (target, icode, 1, exp,
19163 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19164 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19168 return arm_expand_neon_args (target, icode, 1, exp,
19169 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19173 case NEON_LOADSTRUCT:
19174 return arm_expand_neon_args (target, icode, 1, exp,
19175 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19177 case NEON_LOAD1LANE:
19178 case NEON_LOADSTRUCTLANE:
19179 return arm_expand_neon_args (target, icode, 1, exp,
19180 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19184 case NEON_STORESTRUCT:
19185 return arm_expand_neon_args (target, icode, 0, exp,
19186 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19188 case NEON_STORE1LANE:
19189 case NEON_STORESTRUCTLANE:
19190 return arm_expand_neon_args (target, icode, 0, exp,
19191 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19195 gcc_unreachable ();
19198 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19200 neon_reinterpret (rtx dest, rtx src)
19202 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19205 /* Emit code to place a Neon pair result in memory locations (with equal
19208 neon_emit_pair_result_insn (enum machine_mode mode,
19209 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19212 rtx mem = gen_rtx_MEM (mode, destaddr);
19213 rtx tmp1 = gen_reg_rtx (mode);
19214 rtx tmp2 = gen_reg_rtx (mode);
19216 emit_insn (intfn (tmp1, op1, tmp2, op2));
19218 emit_move_insn (mem, tmp1);
19219 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19220 emit_move_insn (mem, tmp2);
19223 /* Set up operands for a register copy from src to dest, taking care not to
19224 clobber registers in the process.
19225 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19226 be called with a large N, so that should be OK. */
19229 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19231 unsigned int copied = 0, opctr = 0;
19232 unsigned int done = (1 << count) - 1;
19235 while (copied != done)
19237 for (i = 0; i < count; i++)
19241 for (j = 0; good && j < count; j++)
19242 if (i != j && (copied & (1 << j)) == 0
19243 && reg_overlap_mentioned_p (src[j], dest[i]))
19248 operands[opctr++] = dest[i];
19249 operands[opctr++] = src[i];
19255 gcc_assert (opctr == count * 2);
19258 /* Expand an expression EXP that calls a built-in function,
19259 with result going to TARGET if that's convenient
19260 (and in mode MODE if that's convenient).
19261 SUBTARGET may be used as the target for computing one of EXP's operands.
19262 IGNORE is nonzero if the value is to be ignored. */
19265 arm_expand_builtin (tree exp,
19267 rtx subtarget ATTRIBUTE_UNUSED,
19268 enum machine_mode mode ATTRIBUTE_UNUSED,
19269 int ignore ATTRIBUTE_UNUSED)
19271 const struct builtin_description * d;
19272 enum insn_code icode;
19273 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19281 int fcode = DECL_FUNCTION_CODE (fndecl);
19283 enum machine_mode tmode;
19284 enum machine_mode mode0;
19285 enum machine_mode mode1;
19286 enum machine_mode mode2;
19288 if (fcode >= ARM_BUILTIN_NEON_BASE)
19289 return arm_expand_neon_builtin (fcode, exp, target);
19293 case ARM_BUILTIN_TEXTRMSB:
19294 case ARM_BUILTIN_TEXTRMUB:
19295 case ARM_BUILTIN_TEXTRMSH:
19296 case ARM_BUILTIN_TEXTRMUH:
19297 case ARM_BUILTIN_TEXTRMSW:
19298 case ARM_BUILTIN_TEXTRMUW:
19299 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19300 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19301 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19302 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19303 : CODE_FOR_iwmmxt_textrmw);
19305 arg0 = CALL_EXPR_ARG (exp, 0);
19306 arg1 = CALL_EXPR_ARG (exp, 1);
19307 op0 = expand_normal (arg0);
19308 op1 = expand_normal (arg1);
19309 tmode = insn_data[icode].operand[0].mode;
19310 mode0 = insn_data[icode].operand[1].mode;
19311 mode1 = insn_data[icode].operand[2].mode;
19313 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19314 op0 = copy_to_mode_reg (mode0, op0);
19315 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19317 /* @@@ better error message */
19318 error ("selector must be an immediate");
19319 return gen_reg_rtx (tmode);
19322 || GET_MODE (target) != tmode
19323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19324 target = gen_reg_rtx (tmode);
19325 pat = GEN_FCN (icode) (target, op0, op1);
19331 case ARM_BUILTIN_TINSRB:
19332 case ARM_BUILTIN_TINSRH:
19333 case ARM_BUILTIN_TINSRW:
19334 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19335 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19336 : CODE_FOR_iwmmxt_tinsrw);
19337 arg0 = CALL_EXPR_ARG (exp, 0);
19338 arg1 = CALL_EXPR_ARG (exp, 1);
19339 arg2 = CALL_EXPR_ARG (exp, 2);
19340 op0 = expand_normal (arg0);
19341 op1 = expand_normal (arg1);
19342 op2 = expand_normal (arg2);
19343 tmode = insn_data[icode].operand[0].mode;
19344 mode0 = insn_data[icode].operand[1].mode;
19345 mode1 = insn_data[icode].operand[2].mode;
19346 mode2 = insn_data[icode].operand[3].mode;
19348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19349 op0 = copy_to_mode_reg (mode0, op0);
19350 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19351 op1 = copy_to_mode_reg (mode1, op1);
19352 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19354 /* @@@ better error message */
19355 error ("selector must be an immediate");
19359 || GET_MODE (target) != tmode
19360 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19361 target = gen_reg_rtx (tmode);
19362 pat = GEN_FCN (icode) (target, op0, op1, op2);
19368 case ARM_BUILTIN_SETWCX:
19369 arg0 = CALL_EXPR_ARG (exp, 0);
19370 arg1 = CALL_EXPR_ARG (exp, 1);
19371 op0 = force_reg (SImode, expand_normal (arg0));
19372 op1 = expand_normal (arg1);
19373 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19376 case ARM_BUILTIN_GETWCX:
19377 arg0 = CALL_EXPR_ARG (exp, 0);
19378 op0 = expand_normal (arg0);
19379 target = gen_reg_rtx (SImode);
19380 emit_insn (gen_iwmmxt_tmrc (target, op0));
19383 case ARM_BUILTIN_WSHUFH:
19384 icode = CODE_FOR_iwmmxt_wshufh;
19385 arg0 = CALL_EXPR_ARG (exp, 0);
19386 arg1 = CALL_EXPR_ARG (exp, 1);
19387 op0 = expand_normal (arg0);
19388 op1 = expand_normal (arg1);
19389 tmode = insn_data[icode].operand[0].mode;
19390 mode1 = insn_data[icode].operand[1].mode;
19391 mode2 = insn_data[icode].operand[2].mode;
19393 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19394 op0 = copy_to_mode_reg (mode1, op0);
19395 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19397 /* @@@ better error message */
19398 error ("mask must be an immediate");
19402 || GET_MODE (target) != tmode
19403 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19404 target = gen_reg_rtx (tmode);
19405 pat = GEN_FCN (icode) (target, op0, op1);
19411 case ARM_BUILTIN_WSADB:
19412 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19413 case ARM_BUILTIN_WSADH:
19414 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19415 case ARM_BUILTIN_WSADBZ:
19416 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19417 case ARM_BUILTIN_WSADHZ:
19418 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19420 /* Several three-argument builtins. */
19421 case ARM_BUILTIN_WMACS:
19422 case ARM_BUILTIN_WMACU:
19423 case ARM_BUILTIN_WALIGN:
19424 case ARM_BUILTIN_TMIA:
19425 case ARM_BUILTIN_TMIAPH:
19426 case ARM_BUILTIN_TMIATT:
19427 case ARM_BUILTIN_TMIATB:
19428 case ARM_BUILTIN_TMIABT:
19429 case ARM_BUILTIN_TMIABB:
19430 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19431 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19432 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19433 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19434 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19435 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19436 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19437 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19438 : CODE_FOR_iwmmxt_walign);
19439 arg0 = CALL_EXPR_ARG (exp, 0);
19440 arg1 = CALL_EXPR_ARG (exp, 1);
19441 arg2 = CALL_EXPR_ARG (exp, 2);
19442 op0 = expand_normal (arg0);
19443 op1 = expand_normal (arg1);
19444 op2 = expand_normal (arg2);
19445 tmode = insn_data[icode].operand[0].mode;
19446 mode0 = insn_data[icode].operand[1].mode;
19447 mode1 = insn_data[icode].operand[2].mode;
19448 mode2 = insn_data[icode].operand[3].mode;
19450 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19451 op0 = copy_to_mode_reg (mode0, op0);
19452 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19453 op1 = copy_to_mode_reg (mode1, op1);
19454 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19455 op2 = copy_to_mode_reg (mode2, op2);
19457 || GET_MODE (target) != tmode
19458 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19459 target = gen_reg_rtx (tmode);
19460 pat = GEN_FCN (icode) (target, op0, op1, op2);
19466 case ARM_BUILTIN_WZERO:
19467 target = gen_reg_rtx (DImode);
19468 emit_insn (gen_iwmmxt_clrdi (target));
19471 case ARM_BUILTIN_THREAD_POINTER:
19472 return arm_load_tp (target);
19478 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19479 if (d->code == (const enum arm_builtins) fcode)
19480 return arm_expand_binop_builtin (d->icode, exp, target);
19482 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19483 if (d->code == (const enum arm_builtins) fcode)
19484 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19486 /* @@@ Should really do something sensible here. */
19490 /* Return the number (counting from 0) of
19491 the least significant set bit in MASK. */
19494 number_of_first_bit_set (unsigned mask)
19499 (mask & (1 << bit)) == 0;
19506 /* Emit code to push or pop registers to or from the stack. F is the
19507 assembly file. MASK is the registers to push or pop. PUSH is
19508 nonzero if we should push, and zero if we should pop. For debugging
19509 output, if pushing, adjust CFA_OFFSET by the amount of space added
19510 to the stack. REAL_REGS should have the same number of bits set as
19511 MASK, and will be used instead (in the same order) to describe which
19512 registers were saved - this is used to mark the save slots when we
19513 push high registers after moving them to low registers. */
19515 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19516 unsigned long real_regs)
19519 int lo_mask = mask & 0xFF;
19520 int pushed_words = 0;
19524 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19526 /* Special case. Do not generate a POP PC statement here, do it in
19528 thumb_exit (f, -1);
19532 if (ARM_EABI_UNWIND_TABLES && push)
19534 fprintf (f, "\t.save\t{");
19535 for (regno = 0; regno < 15; regno++)
19537 if (real_regs & (1 << regno))
19539 if (real_regs & ((1 << regno) -1))
19541 asm_fprintf (f, "%r", regno);
19544 fprintf (f, "}\n");
19547 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19549 /* Look at the low registers first. */
19550 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19554 asm_fprintf (f, "%r", regno);
19556 if ((lo_mask & ~1) != 0)
19563 if (push && (mask & (1 << LR_REGNUM)))
19565 /* Catch pushing the LR. */
19569 asm_fprintf (f, "%r", LR_REGNUM);
19573 else if (!push && (mask & (1 << PC_REGNUM)))
19575 /* Catch popping the PC. */
19576 if (TARGET_INTERWORK || TARGET_BACKTRACE
19577 || crtl->calls_eh_return)
19579 /* The PC is never poped directly, instead
19580 it is popped into r3 and then BX is used. */
19581 fprintf (f, "}\n");
19583 thumb_exit (f, -1);
19592 asm_fprintf (f, "%r", PC_REGNUM);
19596 fprintf (f, "}\n");
19598 if (push && pushed_words && dwarf2out_do_frame ())
19600 char *l = dwarf2out_cfi_label (false);
19601 int pushed_mask = real_regs;
19603 *cfa_offset += pushed_words * 4;
19604 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19607 pushed_mask = real_regs;
19608 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19610 if (pushed_mask & 1)
19611 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19616 /* Generate code to return from a thumb function.
19617 If 'reg_containing_return_addr' is -1, then the return address is
19618 actually on the stack, at the stack pointer. */
19620 thumb_exit (FILE *f, int reg_containing_return_addr)
19622 unsigned regs_available_for_popping;
19623 unsigned regs_to_pop;
19625 unsigned available;
19629 int restore_a4 = FALSE;
19631 /* Compute the registers we need to pop. */
19635 if (reg_containing_return_addr == -1)
19637 regs_to_pop |= 1 << LR_REGNUM;
19641 if (TARGET_BACKTRACE)
19643 /* Restore the (ARM) frame pointer and stack pointer. */
19644 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19648 /* If there is nothing to pop then just emit the BX instruction and
19650 if (pops_needed == 0)
19652 if (crtl->calls_eh_return)
19653 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19655 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19658 /* Otherwise if we are not supporting interworking and we have not created
19659 a backtrace structure and the function was not entered in ARM mode then
19660 just pop the return address straight into the PC. */
19661 else if (!TARGET_INTERWORK
19662 && !TARGET_BACKTRACE
19663 && !is_called_in_ARM_mode (current_function_decl)
19664 && !crtl->calls_eh_return)
19666 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19670 /* Find out how many of the (return) argument registers we can corrupt. */
19671 regs_available_for_popping = 0;
19673 /* If returning via __builtin_eh_return, the bottom three registers
19674 all contain information needed for the return. */
19675 if (crtl->calls_eh_return)
19679 /* If we can deduce the registers used from the function's
19680 return value. This is more reliable that examining
19681 df_regs_ever_live_p () because that will be set if the register is
19682 ever used in the function, not just if the register is used
19683 to hold a return value. */
19685 if (crtl->return_rtx != 0)
19686 mode = GET_MODE (crtl->return_rtx);
19688 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19690 size = GET_MODE_SIZE (mode);
19694 /* In a void function we can use any argument register.
19695 In a function that returns a structure on the stack
19696 we can use the second and third argument registers. */
19697 if (mode == VOIDmode)
19698 regs_available_for_popping =
19699 (1 << ARG_REGISTER (1))
19700 | (1 << ARG_REGISTER (2))
19701 | (1 << ARG_REGISTER (3));
19703 regs_available_for_popping =
19704 (1 << ARG_REGISTER (2))
19705 | (1 << ARG_REGISTER (3));
19707 else if (size <= 4)
19708 regs_available_for_popping =
19709 (1 << ARG_REGISTER (2))
19710 | (1 << ARG_REGISTER (3));
19711 else if (size <= 8)
19712 regs_available_for_popping =
19713 (1 << ARG_REGISTER (3));
19716 /* Match registers to be popped with registers into which we pop them. */
19717 for (available = regs_available_for_popping,
19718 required = regs_to_pop;
19719 required != 0 && available != 0;
19720 available &= ~(available & - available),
19721 required &= ~(required & - required))
19724 /* If we have any popping registers left over, remove them. */
19726 regs_available_for_popping &= ~available;
19728 /* Otherwise if we need another popping register we can use
19729 the fourth argument register. */
19730 else if (pops_needed)
19732 /* If we have not found any free argument registers and
19733 reg a4 contains the return address, we must move it. */
19734 if (regs_available_for_popping == 0
19735 && reg_containing_return_addr == LAST_ARG_REGNUM)
19737 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19738 reg_containing_return_addr = LR_REGNUM;
19740 else if (size > 12)
19742 /* Register a4 is being used to hold part of the return value,
19743 but we have dire need of a free, low register. */
19746 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19749 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19751 /* The fourth argument register is available. */
19752 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19758 /* Pop as many registers as we can. */
19759 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19760 regs_available_for_popping);
19762 /* Process the registers we popped. */
19763 if (reg_containing_return_addr == -1)
19765 /* The return address was popped into the lowest numbered register. */
19766 regs_to_pop &= ~(1 << LR_REGNUM);
19768 reg_containing_return_addr =
19769 number_of_first_bit_set (regs_available_for_popping);
19771 /* Remove this register for the mask of available registers, so that
19772 the return address will not be corrupted by further pops. */
19773 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19776 /* If we popped other registers then handle them here. */
19777 if (regs_available_for_popping)
19781 /* Work out which register currently contains the frame pointer. */
19782 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19784 /* Move it into the correct place. */
19785 asm_fprintf (f, "\tmov\t%r, %r\n",
19786 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19788 /* (Temporarily) remove it from the mask of popped registers. */
19789 regs_available_for_popping &= ~(1 << frame_pointer);
19790 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19792 if (regs_available_for_popping)
19796 /* We popped the stack pointer as well,
19797 find the register that contains it. */
19798 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19800 /* Move it into the stack register. */
19801 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19803 /* At this point we have popped all necessary registers, so
19804 do not worry about restoring regs_available_for_popping
19805 to its correct value:
19807 assert (pops_needed == 0)
19808 assert (regs_available_for_popping == (1 << frame_pointer))
19809 assert (regs_to_pop == (1 << STACK_POINTER)) */
19813 /* Since we have just move the popped value into the frame
19814 pointer, the popping register is available for reuse, and
19815 we know that we still have the stack pointer left to pop. */
19816 regs_available_for_popping |= (1 << frame_pointer);
19820 /* If we still have registers left on the stack, but we no longer have
19821 any registers into which we can pop them, then we must move the return
19822 address into the link register and make available the register that
19824 if (regs_available_for_popping == 0 && pops_needed > 0)
19826 regs_available_for_popping |= 1 << reg_containing_return_addr;
19828 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19829 reg_containing_return_addr);
19831 reg_containing_return_addr = LR_REGNUM;
19834 /* If we have registers left on the stack then pop some more.
19835 We know that at most we will want to pop FP and SP. */
19836 if (pops_needed > 0)
19841 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19842 regs_available_for_popping);
19844 /* We have popped either FP or SP.
19845 Move whichever one it is into the correct register. */
19846 popped_into = number_of_first_bit_set (regs_available_for_popping);
19847 move_to = number_of_first_bit_set (regs_to_pop);
19849 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19851 regs_to_pop &= ~(1 << move_to);
19856 /* If we still have not popped everything then we must have only
19857 had one register available to us and we are now popping the SP. */
19858 if (pops_needed > 0)
19862 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19863 regs_available_for_popping);
19865 popped_into = number_of_first_bit_set (regs_available_for_popping);
19867 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19869 assert (regs_to_pop == (1 << STACK_POINTER))
19870 assert (pops_needed == 1)
19874 /* If necessary restore the a4 register. */
19877 if (reg_containing_return_addr != LR_REGNUM)
19879 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19880 reg_containing_return_addr = LR_REGNUM;
19883 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19886 if (crtl->calls_eh_return)
19887 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19889 /* Return to caller. */
19890 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19893 /* Scan INSN just before assembler is output for it.
19894 For Thumb-1, we track the status of the condition codes; this
19895 information is used in the cbranchsi4_insn pattern. */
19897 thumb1_final_prescan_insn (rtx insn)
19899 if (flag_print_asm_name)
19900 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19901 INSN_ADDRESSES (INSN_UID (insn)));
19902 /* Don't overwrite the previous setter when we get to a cbranch. */
19903 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19905 enum attr_conds conds;
19907 if (cfun->machine->thumb1_cc_insn)
19909 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
19910 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
19913 conds = get_attr_conds (insn);
19914 if (conds == CONDS_SET)
19916 rtx set = single_set (insn);
19917 cfun->machine->thumb1_cc_insn = insn;
19918 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
19919 cfun->machine->thumb1_cc_op1 = const0_rtx;
19920 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
19921 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
19923 rtx src1 = XEXP (SET_SRC (set), 1);
19924 if (src1 == const0_rtx)
19925 cfun->machine->thumb1_cc_mode = CCmode;
19928 else if (conds != CONDS_NOCOND)
19929 cfun->machine->thumb1_cc_insn = NULL_RTX;
19934 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19936 unsigned HOST_WIDE_INT mask = 0xff;
19939 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19940 if (val == 0) /* XXX */
19943 for (i = 0; i < 25; i++)
19944 if ((val & (mask << i)) == val)
19950 /* Returns nonzero if the current function contains,
19951 or might contain a far jump. */
19953 thumb_far_jump_used_p (void)
19957 /* This test is only important for leaf functions. */
19958 /* assert (!leaf_function_p ()); */
19960 /* If we have already decided that far jumps may be used,
19961 do not bother checking again, and always return true even if
19962 it turns out that they are not being used. Once we have made
19963 the decision that far jumps are present (and that hence the link
19964 register will be pushed onto the stack) we cannot go back on it. */
19965 if (cfun->machine->far_jump_used)
19968 /* If this function is not being called from the prologue/epilogue
19969 generation code then it must be being called from the
19970 INITIAL_ELIMINATION_OFFSET macro. */
19971 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19973 /* In this case we know that we are being asked about the elimination
19974 of the arg pointer register. If that register is not being used,
19975 then there are no arguments on the stack, and we do not have to
19976 worry that a far jump might force the prologue to push the link
19977 register, changing the stack offsets. In this case we can just
19978 return false, since the presence of far jumps in the function will
19979 not affect stack offsets.
19981 If the arg pointer is live (or if it was live, but has now been
19982 eliminated and so set to dead) then we do have to test to see if
19983 the function might contain a far jump. This test can lead to some
19984 false negatives, since before reload is completed, then length of
19985 branch instructions is not known, so gcc defaults to returning their
19986 longest length, which in turn sets the far jump attribute to true.
19988 A false negative will not result in bad code being generated, but it
19989 will result in a needless push and pop of the link register. We
19990 hope that this does not occur too often.
19992 If we need doubleword stack alignment this could affect the other
19993 elimination offsets so we can't risk getting it wrong. */
19994 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19995 cfun->machine->arg_pointer_live = 1;
19996 else if (!cfun->machine->arg_pointer_live)
20000 /* Check to see if the function contains a branch
20001 insn with the far jump attribute set. */
20002 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20004 if (GET_CODE (insn) == JUMP_INSN
20005 /* Ignore tablejump patterns. */
20006 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20007 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20008 && get_attr_far_jump (insn) == FAR_JUMP_YES
20011 /* Record the fact that we have decided that
20012 the function does use far jumps. */
20013 cfun->machine->far_jump_used = 1;
20021 /* Return nonzero if FUNC must be entered in ARM mode. */
20023 is_called_in_ARM_mode (tree func)
20025 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20027 /* Ignore the problem about functions whose address is taken. */
20028 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20032 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20038 /* Given the stack offsets and register mask in OFFSETS, decide how
20039 many additional registers to push instead of subtracting a constant
20040 from SP. For epilogues the principle is the same except we use pop.
20041 FOR_PROLOGUE indicates which we're generating. */
20043 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20045 HOST_WIDE_INT amount;
20046 unsigned long live_regs_mask = offsets->saved_regs_mask;
20047 /* Extract a mask of the ones we can give to the Thumb's push/pop
20049 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20050 /* Then count how many other high registers will need to be pushed. */
20051 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20052 int n_free, reg_base;
20054 if (!for_prologue && frame_pointer_needed)
20055 amount = offsets->locals_base - offsets->saved_regs;
20057 amount = offsets->outgoing_args - offsets->saved_regs;
20059 /* If the stack frame size is 512 exactly, we can save one load
20060 instruction, which should make this a win even when optimizing
20062 if (!optimize_size && amount != 512)
20065 /* Can't do this if there are high registers to push. */
20066 if (high_regs_pushed != 0)
20069 /* Shouldn't do it in the prologue if no registers would normally
20070 be pushed at all. In the epilogue, also allow it if we'll have
20071 a pop insn for the PC. */
20074 || TARGET_BACKTRACE
20075 || (live_regs_mask & 1 << LR_REGNUM) == 0
20076 || TARGET_INTERWORK
20077 || crtl->args.pretend_args_size != 0))
20080 /* Don't do this if thumb_expand_prologue wants to emit instructions
20081 between the push and the stack frame allocation. */
20083 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20084 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20091 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20092 live_regs_mask >>= reg_base;
20095 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20096 && (for_prologue || call_used_regs[reg_base + n_free]))
20098 live_regs_mask >>= 1;
20104 gcc_assert (amount / 4 * 4 == amount);
20106 if (amount >= 512 && (amount - n_free * 4) < 512)
20107 return (amount - 508) / 4;
20108 if (amount <= n_free * 4)
20113 /* The bits which aren't usefully expanded as rtl. */
20115 thumb_unexpanded_epilogue (void)
20117 arm_stack_offsets *offsets;
20119 unsigned long live_regs_mask = 0;
20120 int high_regs_pushed = 0;
20122 int had_to_push_lr;
20125 if (cfun->machine->return_used_this_function != 0)
20128 if (IS_NAKED (arm_current_func_type ()))
20131 offsets = arm_get_frame_offsets ();
20132 live_regs_mask = offsets->saved_regs_mask;
20133 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20135 /* If we can deduce the registers used from the function's return value.
20136 This is more reliable that examining df_regs_ever_live_p () because that
20137 will be set if the register is ever used in the function, not just if
20138 the register is used to hold a return value. */
20139 size = arm_size_return_regs ();
20141 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20144 unsigned long extra_mask = (1 << extra_pop) - 1;
20145 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20148 /* The prolog may have pushed some high registers to use as
20149 work registers. e.g. the testsuite file:
20150 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20151 compiles to produce:
20152 push {r4, r5, r6, r7, lr}
20156 as part of the prolog. We have to undo that pushing here. */
20158 if (high_regs_pushed)
20160 unsigned long mask = live_regs_mask & 0xff;
20163 /* The available low registers depend on the size of the value we are
20171 /* Oh dear! We have no low registers into which we can pop
20174 ("no low registers available for popping high registers");
20176 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20177 if (live_regs_mask & (1 << next_hi_reg))
20180 while (high_regs_pushed)
20182 /* Find lo register(s) into which the high register(s) can
20184 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20186 if (mask & (1 << regno))
20187 high_regs_pushed--;
20188 if (high_regs_pushed == 0)
20192 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20194 /* Pop the values into the low register(s). */
20195 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20197 /* Move the value(s) into the high registers. */
20198 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20200 if (mask & (1 << regno))
20202 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20205 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20206 if (live_regs_mask & (1 << next_hi_reg))
20211 live_regs_mask &= ~0x0f00;
20214 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20215 live_regs_mask &= 0xff;
20217 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20219 /* Pop the return address into the PC. */
20220 if (had_to_push_lr)
20221 live_regs_mask |= 1 << PC_REGNUM;
20223 /* Either no argument registers were pushed or a backtrace
20224 structure was created which includes an adjusted stack
20225 pointer, so just pop everything. */
20226 if (live_regs_mask)
20227 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20230 /* We have either just popped the return address into the
20231 PC or it is was kept in LR for the entire function.
20232 Note that thumb_pushpop has already called thumb_exit if the
20233 PC was in the list. */
20234 if (!had_to_push_lr)
20235 thumb_exit (asm_out_file, LR_REGNUM);
20239 /* Pop everything but the return address. */
20240 if (live_regs_mask)
20241 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20244 if (had_to_push_lr)
20248 /* We have no free low regs, so save one. */
20249 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20253 /* Get the return address into a temporary register. */
20254 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20255 1 << LAST_ARG_REGNUM);
20259 /* Move the return address to lr. */
20260 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20262 /* Restore the low register. */
20263 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20268 regno = LAST_ARG_REGNUM;
20273 /* Remove the argument registers that were pushed onto the stack. */
20274 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20275 SP_REGNUM, SP_REGNUM,
20276 crtl->args.pretend_args_size);
20278 thumb_exit (asm_out_file, regno);
20284 /* Functions to save and restore machine-specific function data. */
20285 static struct machine_function *
20286 arm_init_machine_status (void)
20288 struct machine_function *machine;
20289 machine = ggc_alloc_cleared_machine_function ();
20291 #if ARM_FT_UNKNOWN != 0
20292 machine->func_type = ARM_FT_UNKNOWN;
20297 /* Return an RTX indicating where the return address to the
20298 calling function can be found. */
20300 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20305 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20308 /* Do anything needed before RTL is emitted for each function. */
20310 arm_init_expanders (void)
20312 /* Arrange to initialize and mark the machine per-function status. */
20313 init_machine_status = arm_init_machine_status;
20315 /* This is to stop the combine pass optimizing away the alignment
20316 adjustment of va_arg. */
20317 /* ??? It is claimed that this should not be necessary. */
20319 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20323 /* Like arm_compute_initial_elimination offset. Simpler because there
20324 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20325 to point at the base of the local variables after static stack
20326 space for a function has been allocated. */
20329 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20331 arm_stack_offsets *offsets;
20333 offsets = arm_get_frame_offsets ();
20337 case ARG_POINTER_REGNUM:
20340 case STACK_POINTER_REGNUM:
20341 return offsets->outgoing_args - offsets->saved_args;
20343 case FRAME_POINTER_REGNUM:
20344 return offsets->soft_frame - offsets->saved_args;
20346 case ARM_HARD_FRAME_POINTER_REGNUM:
20347 return offsets->saved_regs - offsets->saved_args;
20349 case THUMB_HARD_FRAME_POINTER_REGNUM:
20350 return offsets->locals_base - offsets->saved_args;
20353 gcc_unreachable ();
20357 case FRAME_POINTER_REGNUM:
20360 case STACK_POINTER_REGNUM:
20361 return offsets->outgoing_args - offsets->soft_frame;
20363 case ARM_HARD_FRAME_POINTER_REGNUM:
20364 return offsets->saved_regs - offsets->soft_frame;
20366 case THUMB_HARD_FRAME_POINTER_REGNUM:
20367 return offsets->locals_base - offsets->soft_frame;
20370 gcc_unreachable ();
20375 gcc_unreachable ();
20379 /* Generate the rest of a function's prologue. */
20381 thumb1_expand_prologue (void)
20385 HOST_WIDE_INT amount;
20386 arm_stack_offsets *offsets;
20387 unsigned long func_type;
20389 unsigned long live_regs_mask;
20391 func_type = arm_current_func_type ();
20393 /* Naked functions don't have prologues. */
20394 if (IS_NAKED (func_type))
20397 if (IS_INTERRUPT (func_type))
20399 error ("interrupt Service Routines cannot be coded in Thumb mode");
20403 offsets = arm_get_frame_offsets ();
20404 live_regs_mask = offsets->saved_regs_mask;
20405 /* Load the pic register before setting the frame pointer,
20406 so we can use r7 as a temporary work register. */
20407 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20408 arm_load_pic_register (live_regs_mask);
20410 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20411 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20412 stack_pointer_rtx);
20414 amount = offsets->outgoing_args - offsets->saved_regs;
20415 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20420 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20421 GEN_INT (- amount)));
20422 RTX_FRAME_RELATED_P (insn) = 1;
20428 /* The stack decrement is too big for an immediate value in a single
20429 insn. In theory we could issue multiple subtracts, but after
20430 three of them it becomes more space efficient to place the full
20431 value in the constant pool and load into a register. (Also the
20432 ARM debugger really likes to see only one stack decrement per
20433 function). So instead we look for a scratch register into which
20434 we can load the decrement, and then we subtract this from the
20435 stack pointer. Unfortunately on the thumb the only available
20436 scratch registers are the argument registers, and we cannot use
20437 these as they may hold arguments to the function. Instead we
20438 attempt to locate a call preserved register which is used by this
20439 function. If we can find one, then we know that it will have
20440 been pushed at the start of the prologue and so we can corrupt
20442 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20443 if (live_regs_mask & (1 << regno))
20446 gcc_assert(regno <= LAST_LO_REGNUM);
20448 reg = gen_rtx_REG (SImode, regno);
20450 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20452 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20453 stack_pointer_rtx, reg));
20454 RTX_FRAME_RELATED_P (insn) = 1;
20455 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20456 plus_constant (stack_pointer_rtx,
20458 RTX_FRAME_RELATED_P (dwarf) = 1;
20459 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20463 if (frame_pointer_needed)
20464 thumb_set_frame_pointer (offsets);
20466 /* If we are profiling, make sure no instructions are scheduled before
20467 the call to mcount. Similarly if the user has requested no
20468 scheduling in the prolog. Similarly if we want non-call exceptions
20469 using the EABI unwinder, to prevent faulting instructions from being
20470 swapped with a stack adjustment. */
20471 if (crtl->profile || !TARGET_SCHED_PROLOG
20472 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20473 emit_insn (gen_blockage ());
20475 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20476 if (live_regs_mask & 0xff)
20477 cfun->machine->lr_save_eliminated = 0;
20482 thumb1_expand_epilogue (void)
20484 HOST_WIDE_INT amount;
20485 arm_stack_offsets *offsets;
20488 /* Naked functions don't have prologues. */
20489 if (IS_NAKED (arm_current_func_type ()))
20492 offsets = arm_get_frame_offsets ();
20493 amount = offsets->outgoing_args - offsets->saved_regs;
20495 if (frame_pointer_needed)
20497 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20498 amount = offsets->locals_base - offsets->saved_regs;
20500 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20502 gcc_assert (amount >= 0);
20506 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20507 GEN_INT (amount)));
20510 /* r3 is always free in the epilogue. */
20511 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20513 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20514 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20518 /* Emit a USE (stack_pointer_rtx), so that
20519 the stack adjustment will not be deleted. */
20520 emit_insn (gen_prologue_use (stack_pointer_rtx));
20522 if (crtl->profile || !TARGET_SCHED_PROLOG)
20523 emit_insn (gen_blockage ());
20525 /* Emit a clobber for each insn that will be restored in the epilogue,
20526 so that flow2 will get register lifetimes correct. */
20527 for (regno = 0; regno < 13; regno++)
20528 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20529 emit_clobber (gen_rtx_REG (SImode, regno));
20531 if (! df_regs_ever_live_p (LR_REGNUM))
20532 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20536 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20538 arm_stack_offsets *offsets;
20539 unsigned long live_regs_mask = 0;
20540 unsigned long l_mask;
20541 unsigned high_regs_pushed = 0;
20542 int cfa_offset = 0;
20545 if (IS_NAKED (arm_current_func_type ()))
20548 if (is_called_in_ARM_mode (current_function_decl))
20552 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20553 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20555 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20557 /* Generate code sequence to switch us into Thumb mode. */
20558 /* The .code 32 directive has already been emitted by
20559 ASM_DECLARE_FUNCTION_NAME. */
20560 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20561 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20563 /* Generate a label, so that the debugger will notice the
20564 change in instruction sets. This label is also used by
20565 the assembler to bypass the ARM code when this function
20566 is called from a Thumb encoded function elsewhere in the
20567 same file. Hence the definition of STUB_NAME here must
20568 agree with the definition in gas/config/tc-arm.c. */
20570 #define STUB_NAME ".real_start_of"
20572 fprintf (f, "\t.code\t16\n");
20574 if (arm_dllexport_name_p (name))
20575 name = arm_strip_name_encoding (name);
20577 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20578 fprintf (f, "\t.thumb_func\n");
20579 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20582 if (crtl->args.pretend_args_size)
20584 /* Output unwind directive for the stack adjustment. */
20585 if (ARM_EABI_UNWIND_TABLES)
20586 fprintf (f, "\t.pad #%d\n",
20587 crtl->args.pretend_args_size);
20589 if (cfun->machine->uses_anonymous_args)
20593 fprintf (f, "\tpush\t{");
20595 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20597 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20598 regno <= LAST_ARG_REGNUM;
20600 asm_fprintf (f, "%r%s", regno,
20601 regno == LAST_ARG_REGNUM ? "" : ", ");
20603 fprintf (f, "}\n");
20606 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20607 SP_REGNUM, SP_REGNUM,
20608 crtl->args.pretend_args_size);
20610 /* We don't need to record the stores for unwinding (would it
20611 help the debugger any if we did?), but record the change in
20612 the stack pointer. */
20613 if (dwarf2out_do_frame ())
20615 char *l = dwarf2out_cfi_label (false);
20617 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20618 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20622 /* Get the registers we are going to push. */
20623 offsets = arm_get_frame_offsets ();
20624 live_regs_mask = offsets->saved_regs_mask;
20625 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20626 l_mask = live_regs_mask & 0x40ff;
20627 /* Then count how many other high registers will need to be pushed. */
20628 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20630 if (TARGET_BACKTRACE)
20633 unsigned work_register;
20635 /* We have been asked to create a stack backtrace structure.
20636 The code looks like this:
20640 0 sub SP, #16 Reserve space for 4 registers.
20641 2 push {R7} Push low registers.
20642 4 add R7, SP, #20 Get the stack pointer before the push.
20643 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20644 8 mov R7, PC Get hold of the start of this code plus 12.
20645 10 str R7, [SP, #16] Store it.
20646 12 mov R7, FP Get hold of the current frame pointer.
20647 14 str R7, [SP, #4] Store it.
20648 16 mov R7, LR Get hold of the current return address.
20649 18 str R7, [SP, #12] Store it.
20650 20 add R7, SP, #16 Point at the start of the backtrace structure.
20651 22 mov FP, R7 Put this value into the frame pointer. */
20653 work_register = thumb_find_work_register (live_regs_mask);
20655 if (ARM_EABI_UNWIND_TABLES)
20656 asm_fprintf (f, "\t.pad #16\n");
20659 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20660 SP_REGNUM, SP_REGNUM);
20662 if (dwarf2out_do_frame ())
20664 char *l = dwarf2out_cfi_label (false);
20666 cfa_offset = cfa_offset + 16;
20667 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20672 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20673 offset = bit_count (l_mask) * UNITS_PER_WORD;
20678 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20679 offset + 16 + crtl->args.pretend_args_size);
20681 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20684 /* Make sure that the instruction fetching the PC is in the right place
20685 to calculate "start of backtrace creation code + 12". */
20688 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20689 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20691 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20692 ARM_HARD_FRAME_POINTER_REGNUM);
20693 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20698 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20699 ARM_HARD_FRAME_POINTER_REGNUM);
20700 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20702 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20703 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20707 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20708 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20710 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20712 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20713 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20715 /* Optimization: If we are not pushing any low registers but we are going
20716 to push some high registers then delay our first push. This will just
20717 be a push of LR and we can combine it with the push of the first high
20719 else if ((l_mask & 0xff) != 0
20720 || (high_regs_pushed == 0 && l_mask))
20722 unsigned long mask = l_mask;
20723 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20724 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20727 if (high_regs_pushed)
20729 unsigned pushable_regs;
20730 unsigned next_hi_reg;
20732 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20733 if (live_regs_mask & (1 << next_hi_reg))
20736 pushable_regs = l_mask & 0xff;
20738 if (pushable_regs == 0)
20739 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20741 while (high_regs_pushed > 0)
20743 unsigned long real_regs_mask = 0;
20745 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20747 if (pushable_regs & (1 << regno))
20749 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20751 high_regs_pushed --;
20752 real_regs_mask |= (1 << next_hi_reg);
20754 if (high_regs_pushed)
20756 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20758 if (live_regs_mask & (1 << next_hi_reg))
20763 pushable_regs &= ~((1 << regno) - 1);
20769 /* If we had to find a work register and we have not yet
20770 saved the LR then add it to the list of regs to push. */
20771 if (l_mask == (1 << LR_REGNUM))
20773 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20775 real_regs_mask | (1 << LR_REGNUM));
20779 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20784 /* Handle the case of a double word load into a low register from
20785 a computed memory address. The computed address may involve a
20786 register which is overwritten by the load. */
20788 thumb_load_double_from_address (rtx *operands)
20796 gcc_assert (GET_CODE (operands[0]) == REG);
20797 gcc_assert (GET_CODE (operands[1]) == MEM);
20799 /* Get the memory address. */
20800 addr = XEXP (operands[1], 0);
20802 /* Work out how the memory address is computed. */
20803 switch (GET_CODE (addr))
20806 operands[2] = adjust_address (operands[1], SImode, 4);
20808 if (REGNO (operands[0]) == REGNO (addr))
20810 output_asm_insn ("ldr\t%H0, %2", operands);
20811 output_asm_insn ("ldr\t%0, %1", operands);
20815 output_asm_insn ("ldr\t%0, %1", operands);
20816 output_asm_insn ("ldr\t%H0, %2", operands);
20821 /* Compute <address> + 4 for the high order load. */
20822 operands[2] = adjust_address (operands[1], SImode, 4);
20824 output_asm_insn ("ldr\t%0, %1", operands);
20825 output_asm_insn ("ldr\t%H0, %2", operands);
20829 arg1 = XEXP (addr, 0);
20830 arg2 = XEXP (addr, 1);
20832 if (CONSTANT_P (arg1))
20833 base = arg2, offset = arg1;
20835 base = arg1, offset = arg2;
20837 gcc_assert (GET_CODE (base) == REG);
20839 /* Catch the case of <address> = <reg> + <reg> */
20840 if (GET_CODE (offset) == REG)
20842 int reg_offset = REGNO (offset);
20843 int reg_base = REGNO (base);
20844 int reg_dest = REGNO (operands[0]);
20846 /* Add the base and offset registers together into the
20847 higher destination register. */
20848 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20849 reg_dest + 1, reg_base, reg_offset);
20851 /* Load the lower destination register from the address in
20852 the higher destination register. */
20853 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20854 reg_dest, reg_dest + 1);
20856 /* Load the higher destination register from its own address
20858 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20859 reg_dest + 1, reg_dest + 1);
20863 /* Compute <address> + 4 for the high order load. */
20864 operands[2] = adjust_address (operands[1], SImode, 4);
20866 /* If the computed address is held in the low order register
20867 then load the high order register first, otherwise always
20868 load the low order register first. */
20869 if (REGNO (operands[0]) == REGNO (base))
20871 output_asm_insn ("ldr\t%H0, %2", operands);
20872 output_asm_insn ("ldr\t%0, %1", operands);
20876 output_asm_insn ("ldr\t%0, %1", operands);
20877 output_asm_insn ("ldr\t%H0, %2", operands);
20883 /* With no registers to worry about we can just load the value
20885 operands[2] = adjust_address (operands[1], SImode, 4);
20887 output_asm_insn ("ldr\t%H0, %2", operands);
20888 output_asm_insn ("ldr\t%0, %1", operands);
20892 gcc_unreachable ();
20899 thumb_output_move_mem_multiple (int n, rtx *operands)
20906 if (REGNO (operands[4]) > REGNO (operands[5]))
20909 operands[4] = operands[5];
20912 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20913 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20917 if (REGNO (operands[4]) > REGNO (operands[5]))
20920 operands[4] = operands[5];
20923 if (REGNO (operands[5]) > REGNO (operands[6]))
20926 operands[5] = operands[6];
20929 if (REGNO (operands[4]) > REGNO (operands[5]))
20932 operands[4] = operands[5];
20936 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20937 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20941 gcc_unreachable ();
20947 /* Output a call-via instruction for thumb state. */
20949 thumb_call_via_reg (rtx reg)
20951 int regno = REGNO (reg);
20954 gcc_assert (regno < LR_REGNUM);
20956 /* If we are in the normal text section we can use a single instance
20957 per compilation unit. If we are doing function sections, then we need
20958 an entry per section, since we can't rely on reachability. */
20959 if (in_section == text_section)
20961 thumb_call_reg_needed = 1;
20963 if (thumb_call_via_label[regno] == NULL)
20964 thumb_call_via_label[regno] = gen_label_rtx ();
20965 labelp = thumb_call_via_label + regno;
20969 if (cfun->machine->call_via[regno] == NULL)
20970 cfun->machine->call_via[regno] = gen_label_rtx ();
20971 labelp = cfun->machine->call_via + regno;
20974 output_asm_insn ("bl\t%a0", labelp);
20978 /* Routines for generating rtl. */
20980 thumb_expand_movmemqi (rtx *operands)
20982 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20983 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20984 HOST_WIDE_INT len = INTVAL (operands[2]);
20985 HOST_WIDE_INT offset = 0;
20989 emit_insn (gen_movmem12b (out, in, out, in));
20995 emit_insn (gen_movmem8b (out, in, out, in));
21001 rtx reg = gen_reg_rtx (SImode);
21002 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21003 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21010 rtx reg = gen_reg_rtx (HImode);
21011 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21012 plus_constant (in, offset))));
21013 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21021 rtx reg = gen_reg_rtx (QImode);
21022 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21023 plus_constant (in, offset))));
21024 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21030 thumb_reload_out_hi (rtx *operands)
21032 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21035 /* Handle reading a half-word from memory during reload. */
21037 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21039 gcc_unreachable ();
21042 /* Return the length of a function name prefix
21043 that starts with the character 'c'. */
21045 arm_get_strip_length (int c)
21049 ARM_NAME_ENCODING_LENGTHS
21054 /* Return a pointer to a function's name with any
21055 and all prefix encodings stripped from it. */
21057 arm_strip_name_encoding (const char *name)
21061 while ((skip = arm_get_strip_length (* name)))
21067 /* If there is a '*' anywhere in the name's prefix, then
21068 emit the stripped name verbatim, otherwise prepend an
21069 underscore if leading underscores are being used. */
21071 arm_asm_output_labelref (FILE *stream, const char *name)
21076 while ((skip = arm_get_strip_length (* name)))
21078 verbatim |= (*name == '*');
21083 fputs (name, stream);
21085 asm_fprintf (stream, "%U%s", name);
21089 arm_file_start (void)
21093 if (TARGET_UNIFIED_ASM)
21094 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21098 const char *fpu_name;
21099 if (arm_selected_arch)
21100 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21102 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21104 if (TARGET_SOFT_FLOAT)
21107 fpu_name = "softvfp";
21109 fpu_name = "softfpa";
21113 fpu_name = arm_fpu_desc->name;
21114 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21116 if (TARGET_HARD_FLOAT)
21117 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21118 if (TARGET_HARD_FLOAT_ABI)
21119 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21122 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21124 /* Some of these attributes only apply when the corresponding features
21125 are used. However we don't have any easy way of figuring this out.
21126 Conservatively record the setting that would have been used. */
21128 /* Tag_ABI_FP_rounding. */
21129 if (flag_rounding_math)
21130 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21131 if (!flag_unsafe_math_optimizations)
21133 /* Tag_ABI_FP_denomal. */
21134 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21135 /* Tag_ABI_FP_exceptions. */
21136 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21138 /* Tag_ABI_FP_user_exceptions. */
21139 if (flag_signaling_nans)
21140 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21141 /* Tag_ABI_FP_number_model. */
21142 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21143 flag_finite_math_only ? 1 : 3);
21145 /* Tag_ABI_align8_needed. */
21146 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21147 /* Tag_ABI_align8_preserved. */
21148 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21149 /* Tag_ABI_enum_size. */
21150 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21151 flag_short_enums ? 1 : 2);
21153 /* Tag_ABI_optimization_goals. */
21156 else if (optimize >= 2)
21162 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21164 /* Tag_ABI_FP_16bit_format. */
21165 if (arm_fp16_format)
21166 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21167 (int)arm_fp16_format);
21169 if (arm_lang_output_object_attributes_hook)
21170 arm_lang_output_object_attributes_hook();
21172 default_file_start();
21176 arm_file_end (void)
21180 if (NEED_INDICATE_EXEC_STACK)
21181 /* Add .note.GNU-stack. */
21182 file_end_indicate_exec_stack ();
21184 if (! thumb_call_reg_needed)
21187 switch_to_section (text_section);
21188 asm_fprintf (asm_out_file, "\t.code 16\n");
21189 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21191 for (regno = 0; regno < LR_REGNUM; regno++)
21193 rtx label = thumb_call_via_label[regno];
21197 targetm.asm_out.internal_label (asm_out_file, "L",
21198 CODE_LABEL_NUMBER (label));
21199 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21205 /* Symbols in the text segment can be accessed without indirecting via the
21206 constant pool; it may take an extra binary operation, but this is still
21207 faster than indirecting via memory. Don't do this when not optimizing,
21208 since we won't be calculating al of the offsets necessary to do this
21212 arm_encode_section_info (tree decl, rtx rtl, int first)
21214 if (optimize > 0 && TREE_CONSTANT (decl))
21215 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21217 default_encode_section_info (decl, rtl, first);
21219 #endif /* !ARM_PE */
21222 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21224 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21225 && !strcmp (prefix, "L"))
21227 arm_ccfsm_state = 0;
21228 arm_target_insn = NULL;
21230 default_internal_label (stream, prefix, labelno);
21233 /* Output code to add DELTA to the first argument, and then jump
21234 to FUNCTION. Used for C++ multiple inheritance. */
21236 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21237 HOST_WIDE_INT delta,
21238 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21241 static int thunk_label = 0;
21244 int mi_delta = delta;
21245 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21247 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21250 mi_delta = - mi_delta;
21254 int labelno = thunk_label++;
21255 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21256 /* Thunks are entered in arm mode when avaiable. */
21257 if (TARGET_THUMB1_ONLY)
21259 /* push r3 so we can use it as a temporary. */
21260 /* TODO: Omit this save if r3 is not used. */
21261 fputs ("\tpush {r3}\n", file);
21262 fputs ("\tldr\tr3, ", file);
21266 fputs ("\tldr\tr12, ", file);
21268 assemble_name (file, label);
21269 fputc ('\n', file);
21272 /* If we are generating PIC, the ldr instruction below loads
21273 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21274 the address of the add + 8, so we have:
21276 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21279 Note that we have "+ 1" because some versions of GNU ld
21280 don't set the low bit of the result for R_ARM_REL32
21281 relocations against thumb function symbols.
21282 On ARMv6M this is +4, not +8. */
21283 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21284 assemble_name (file, labelpc);
21285 fputs (":\n", file);
21286 if (TARGET_THUMB1_ONLY)
21288 /* This is 2 insns after the start of the thunk, so we know it
21289 is 4-byte aligned. */
21290 fputs ("\tadd\tr3, pc, r3\n", file);
21291 fputs ("\tmov r12, r3\n", file);
21294 fputs ("\tadd\tr12, pc, r12\n", file);
21296 else if (TARGET_THUMB1_ONLY)
21297 fputs ("\tmov r12, r3\n", file);
21299 if (TARGET_THUMB1_ONLY)
21301 if (mi_delta > 255)
21303 fputs ("\tldr\tr3, ", file);
21304 assemble_name (file, label);
21305 fputs ("+4\n", file);
21306 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21307 mi_op, this_regno, this_regno);
21309 else if (mi_delta != 0)
21311 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21312 mi_op, this_regno, this_regno,
21318 /* TODO: Use movw/movt for large constants when available. */
21319 while (mi_delta != 0)
21321 if ((mi_delta & (3 << shift)) == 0)
21325 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21326 mi_op, this_regno, this_regno,
21327 mi_delta & (0xff << shift));
21328 mi_delta &= ~(0xff << shift);
21335 if (TARGET_THUMB1_ONLY)
21336 fputs ("\tpop\t{r3}\n", file);
21338 fprintf (file, "\tbx\tr12\n");
21339 ASM_OUTPUT_ALIGN (file, 2);
21340 assemble_name (file, label);
21341 fputs (":\n", file);
21344 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21345 rtx tem = XEXP (DECL_RTL (function), 0);
21346 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21347 tem = gen_rtx_MINUS (GET_MODE (tem),
21349 gen_rtx_SYMBOL_REF (Pmode,
21350 ggc_strdup (labelpc)));
21351 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21354 /* Output ".word .LTHUNKn". */
21355 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21357 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21358 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21362 fputs ("\tb\t", file);
21363 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21364 if (NEED_PLT_RELOC)
21365 fputs ("(PLT)", file);
21366 fputc ('\n', file);
21371 arm_emit_vector_const (FILE *file, rtx x)
21374 const char * pattern;
21376 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21378 switch (GET_MODE (x))
21380 case V2SImode: pattern = "%08x"; break;
21381 case V4HImode: pattern = "%04x"; break;
21382 case V8QImode: pattern = "%02x"; break;
21383 default: gcc_unreachable ();
21386 fprintf (file, "0x");
21387 for (i = CONST_VECTOR_NUNITS (x); i--;)
21391 element = CONST_VECTOR_ELT (x, i);
21392 fprintf (file, pattern, INTVAL (element));
21398 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21399 HFmode constant pool entries are actually loaded with ldr. */
21401 arm_emit_fp16_const (rtx c)
21406 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21407 bits = real_to_target (NULL, &r, HFmode);
21408 if (WORDS_BIG_ENDIAN)
21409 assemble_zeros (2);
21410 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21411 if (!WORDS_BIG_ENDIAN)
21412 assemble_zeros (2);
21416 arm_output_load_gr (rtx *operands)
21423 if (GET_CODE (operands [1]) != MEM
21424 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21425 || GET_CODE (reg = XEXP (sum, 0)) != REG
21426 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21427 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21428 return "wldrw%?\t%0, %1";
21430 /* Fix up an out-of-range load of a GR register. */
21431 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21432 wcgr = operands[0];
21434 output_asm_insn ("ldr%?\t%0, %1", operands);
21436 operands[0] = wcgr;
21438 output_asm_insn ("tmcr%?\t%0, %1", operands);
21439 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21444 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21446 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21447 named arg and all anonymous args onto the stack.
21448 XXX I know the prologue shouldn't be pushing registers, but it is faster
21452 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21453 enum machine_mode mode,
21456 int second_time ATTRIBUTE_UNUSED)
21460 cfun->machine->uses_anonymous_args = 1;
21461 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21463 nregs = pcum->aapcs_ncrn;
21464 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21468 nregs = pcum->nregs;
21470 if (nregs < NUM_ARG_REGS)
21471 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21474 /* Return nonzero if the CONSUMER instruction (a store) does not need
21475 PRODUCER's value to calculate the address. */
21478 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21480 rtx value = PATTERN (producer);
21481 rtx addr = PATTERN (consumer);
21483 if (GET_CODE (value) == COND_EXEC)
21484 value = COND_EXEC_CODE (value);
21485 if (GET_CODE (value) == PARALLEL)
21486 value = XVECEXP (value, 0, 0);
21487 value = XEXP (value, 0);
21488 if (GET_CODE (addr) == COND_EXEC)
21489 addr = COND_EXEC_CODE (addr);
21490 if (GET_CODE (addr) == PARALLEL)
21491 addr = XVECEXP (addr, 0, 0);
21492 addr = XEXP (addr, 0);
21494 return !reg_overlap_mentioned_p (value, addr);
21497 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21498 have an early register shift value or amount dependency on the
21499 result of PRODUCER. */
21502 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21504 rtx value = PATTERN (producer);
21505 rtx op = PATTERN (consumer);
21508 if (GET_CODE (value) == COND_EXEC)
21509 value = COND_EXEC_CODE (value);
21510 if (GET_CODE (value) == PARALLEL)
21511 value = XVECEXP (value, 0, 0);
21512 value = XEXP (value, 0);
21513 if (GET_CODE (op) == COND_EXEC)
21514 op = COND_EXEC_CODE (op);
21515 if (GET_CODE (op) == PARALLEL)
21516 op = XVECEXP (op, 0, 0);
21519 early_op = XEXP (op, 0);
21520 /* This is either an actual independent shift, or a shift applied to
21521 the first operand of another operation. We want the whole shift
21523 if (GET_CODE (early_op) == REG)
21526 return !reg_overlap_mentioned_p (value, early_op);
21529 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21530 have an early register shift value dependency on the result of
21534 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21536 rtx value = PATTERN (producer);
21537 rtx op = PATTERN (consumer);
21540 if (GET_CODE (value) == COND_EXEC)
21541 value = COND_EXEC_CODE (value);
21542 if (GET_CODE (value) == PARALLEL)
21543 value = XVECEXP (value, 0, 0);
21544 value = XEXP (value, 0);
21545 if (GET_CODE (op) == COND_EXEC)
21546 op = COND_EXEC_CODE (op);
21547 if (GET_CODE (op) == PARALLEL)
21548 op = XVECEXP (op, 0, 0);
21551 early_op = XEXP (op, 0);
21553 /* This is either an actual independent shift, or a shift applied to
21554 the first operand of another operation. We want the value being
21555 shifted, in either case. */
21556 if (GET_CODE (early_op) != REG)
21557 early_op = XEXP (early_op, 0);
21559 return !reg_overlap_mentioned_p (value, early_op);
21562 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21563 have an early register mult dependency on the result of
21567 arm_no_early_mul_dep (rtx producer, rtx consumer)
21569 rtx value = PATTERN (producer);
21570 rtx op = PATTERN (consumer);
21572 if (GET_CODE (value) == COND_EXEC)
21573 value = COND_EXEC_CODE (value);
21574 if (GET_CODE (value) == PARALLEL)
21575 value = XVECEXP (value, 0, 0);
21576 value = XEXP (value, 0);
21577 if (GET_CODE (op) == COND_EXEC)
21578 op = COND_EXEC_CODE (op);
21579 if (GET_CODE (op) == PARALLEL)
21580 op = XVECEXP (op, 0, 0);
21583 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21585 if (GET_CODE (XEXP (op, 0)) == MULT)
21586 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21588 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21594 /* We can't rely on the caller doing the proper promotion when
21595 using APCS or ATPCS. */
21598 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21600 return !TARGET_AAPCS_BASED;
21603 static enum machine_mode
21604 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21605 enum machine_mode mode,
21606 int *punsignedp ATTRIBUTE_UNUSED,
21607 const_tree fntype ATTRIBUTE_UNUSED,
21608 int for_return ATTRIBUTE_UNUSED)
21610 if (GET_MODE_CLASS (mode) == MODE_INT
21611 && GET_MODE_SIZE (mode) < 4)
21617 /* AAPCS based ABIs use short enums by default. */
21620 arm_default_short_enums (void)
21622 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21626 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21629 arm_align_anon_bitfield (void)
21631 return TARGET_AAPCS_BASED;
21635 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21638 arm_cxx_guard_type (void)
21640 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21643 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21644 has an accumulator dependency on the result of the producer (a
21645 multiplication instruction) and no other dependency on that result. */
21647 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21649 rtx mul = PATTERN (producer);
21650 rtx mac = PATTERN (consumer);
21652 rtx mac_op0, mac_op1, mac_acc;
21654 if (GET_CODE (mul) == COND_EXEC)
21655 mul = COND_EXEC_CODE (mul);
21656 if (GET_CODE (mac) == COND_EXEC)
21657 mac = COND_EXEC_CODE (mac);
21659 /* Check that mul is of the form (set (...) (mult ...))
21660 and mla is of the form (set (...) (plus (mult ...) (...))). */
21661 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21662 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21663 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21666 mul_result = XEXP (mul, 0);
21667 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21668 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21669 mac_acc = XEXP (XEXP (mac, 1), 1);
21671 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21672 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21673 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21677 /* The EABI says test the least significant bit of a guard variable. */
21680 arm_cxx_guard_mask_bit (void)
21682 return TARGET_AAPCS_BASED;
21686 /* The EABI specifies that all array cookies are 8 bytes long. */
21689 arm_get_cookie_size (tree type)
21693 if (!TARGET_AAPCS_BASED)
21694 return default_cxx_get_cookie_size (type);
21696 size = build_int_cst (sizetype, 8);
21701 /* The EABI says that array cookies should also contain the element size. */
21704 arm_cookie_has_size (void)
21706 return TARGET_AAPCS_BASED;
21710 /* The EABI says constructors and destructors should return a pointer to
21711 the object constructed/destroyed. */
21714 arm_cxx_cdtor_returns_this (void)
21716 return TARGET_AAPCS_BASED;
21719 /* The EABI says that an inline function may never be the key
21723 arm_cxx_key_method_may_be_inline (void)
21725 return !TARGET_AAPCS_BASED;
21729 arm_cxx_determine_class_data_visibility (tree decl)
21731 if (!TARGET_AAPCS_BASED
21732 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21735 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21736 is exported. However, on systems without dynamic vague linkage,
21737 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21738 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21739 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21741 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21742 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21746 arm_cxx_class_data_always_comdat (void)
21748 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21749 vague linkage if the class has no key function. */
21750 return !TARGET_AAPCS_BASED;
21754 /* The EABI says __aeabi_atexit should be used to register static
21758 arm_cxx_use_aeabi_atexit (void)
21760 return TARGET_AAPCS_BASED;
21765 arm_set_return_address (rtx source, rtx scratch)
21767 arm_stack_offsets *offsets;
21768 HOST_WIDE_INT delta;
21770 unsigned long saved_regs;
21772 offsets = arm_get_frame_offsets ();
21773 saved_regs = offsets->saved_regs_mask;
21775 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21776 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21779 if (frame_pointer_needed)
21780 addr = plus_constant(hard_frame_pointer_rtx, -4);
21783 /* LR will be the first saved register. */
21784 delta = offsets->outgoing_args - (offsets->frame + 4);
21789 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21790 GEN_INT (delta & ~4095)));
21795 addr = stack_pointer_rtx;
21797 addr = plus_constant (addr, delta);
21799 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21805 thumb_set_return_address (rtx source, rtx scratch)
21807 arm_stack_offsets *offsets;
21808 HOST_WIDE_INT delta;
21809 HOST_WIDE_INT limit;
21812 unsigned long mask;
21816 offsets = arm_get_frame_offsets ();
21817 mask = offsets->saved_regs_mask;
21818 if (mask & (1 << LR_REGNUM))
21821 /* Find the saved regs. */
21822 if (frame_pointer_needed)
21824 delta = offsets->soft_frame - offsets->saved_args;
21825 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21831 delta = offsets->outgoing_args - offsets->saved_args;
21834 /* Allow for the stack frame. */
21835 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21837 /* The link register is always the first saved register. */
21840 /* Construct the address. */
21841 addr = gen_rtx_REG (SImode, reg);
21844 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21845 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21849 addr = plus_constant (addr, delta);
21851 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21854 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21857 /* Implements target hook vector_mode_supported_p. */
21859 arm_vector_mode_supported_p (enum machine_mode mode)
21861 /* Neon also supports V2SImode, etc. listed in the clause below. */
21862 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21863 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21866 if ((TARGET_NEON || TARGET_IWMMXT)
21867 && ((mode == V2SImode)
21868 || (mode == V4HImode)
21869 || (mode == V8QImode)))
21875 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
21876 registers when autovectorizing for Neon, at least until multiple vector
21877 widths are supported properly by the middle-end. */
21879 static unsigned int
21880 arm_units_per_simd_word (enum machine_mode mode ATTRIBUTE_UNUSED)
21882 return (TARGET_NEON
21883 ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD);
21886 /* Implements target hook small_register_classes_for_mode_p. */
21888 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21890 return TARGET_THUMB1;
21893 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21894 ARM insns and therefore guarantee that the shift count is modulo 256.
21895 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21896 guarantee no particular behavior for out-of-range counts. */
21898 static unsigned HOST_WIDE_INT
21899 arm_shift_truncation_mask (enum machine_mode mode)
21901 return mode == SImode ? 255 : 0;
21905 /* Map internal gcc register numbers to DWARF2 register numbers. */
21908 arm_dbx_register_number (unsigned int regno)
21913 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21914 compatibility. The EABI defines them as registers 96-103. */
21915 if (IS_FPA_REGNUM (regno))
21916 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21918 if (IS_VFP_REGNUM (regno))
21920 /* See comment in arm_dwarf_register_span. */
21921 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21922 return 64 + regno - FIRST_VFP_REGNUM;
21924 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21927 if (IS_IWMMXT_GR_REGNUM (regno))
21928 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21930 if (IS_IWMMXT_REGNUM (regno))
21931 return 112 + regno - FIRST_IWMMXT_REGNUM;
21933 gcc_unreachable ();
21936 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21937 GCC models tham as 64 32-bit registers, so we need to describe this to
21938 the DWARF generation code. Other registers can use the default. */
21940 arm_dwarf_register_span (rtx rtl)
21947 regno = REGNO (rtl);
21948 if (!IS_VFP_REGNUM (regno))
21951 /* XXX FIXME: The EABI defines two VFP register ranges:
21952 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21954 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21955 corresponding D register. Until GDB supports this, we shall use the
21956 legacy encodings. We also use these encodings for D0-D15 for
21957 compatibility with older debuggers. */
21958 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21961 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21962 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21963 regno = (regno - FIRST_VFP_REGNUM) / 2;
21964 for (i = 0; i < nregs; i++)
21965 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21970 #ifdef TARGET_UNWIND_INFO
21971 /* Emit unwind directives for a store-multiple instruction or stack pointer
21972 push during alignment.
21973 These should only ever be generated by the function prologue code, so
21974 expect them to have a particular form. */
21977 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21980 HOST_WIDE_INT offset;
21981 HOST_WIDE_INT nregs;
21987 e = XVECEXP (p, 0, 0);
21988 if (GET_CODE (e) != SET)
21991 /* First insn will adjust the stack pointer. */
21992 if (GET_CODE (e) != SET
21993 || GET_CODE (XEXP (e, 0)) != REG
21994 || REGNO (XEXP (e, 0)) != SP_REGNUM
21995 || GET_CODE (XEXP (e, 1)) != PLUS)
21998 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21999 nregs = XVECLEN (p, 0) - 1;
22001 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22004 /* The function prologue may also push pc, but not annotate it as it is
22005 never restored. We turn this into a stack pointer adjustment. */
22006 if (nregs * 4 == offset - 4)
22008 fprintf (asm_out_file, "\t.pad #4\n");
22012 fprintf (asm_out_file, "\t.save {");
22014 else if (IS_VFP_REGNUM (reg))
22017 fprintf (asm_out_file, "\t.vsave {");
22019 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22021 /* FPA registers are done differently. */
22022 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22026 /* Unknown register type. */
22029 /* If the stack increment doesn't match the size of the saved registers,
22030 something has gone horribly wrong. */
22031 if (offset != nregs * reg_size)
22036 /* The remaining insns will describe the stores. */
22037 for (i = 1; i <= nregs; i++)
22039 /* Expect (set (mem <addr>) (reg)).
22040 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22041 e = XVECEXP (p, 0, i);
22042 if (GET_CODE (e) != SET
22043 || GET_CODE (XEXP (e, 0)) != MEM
22044 || GET_CODE (XEXP (e, 1)) != REG)
22047 reg = REGNO (XEXP (e, 1));
22052 fprintf (asm_out_file, ", ");
22053 /* We can't use %r for vfp because we need to use the
22054 double precision register names. */
22055 if (IS_VFP_REGNUM (reg))
22056 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22058 asm_fprintf (asm_out_file, "%r", reg);
22060 #ifdef ENABLE_CHECKING
22061 /* Check that the addresses are consecutive. */
22062 e = XEXP (XEXP (e, 0), 0);
22063 if (GET_CODE (e) == PLUS)
22065 offset += reg_size;
22066 if (GET_CODE (XEXP (e, 0)) != REG
22067 || REGNO (XEXP (e, 0)) != SP_REGNUM
22068 || GET_CODE (XEXP (e, 1)) != CONST_INT
22069 || offset != INTVAL (XEXP (e, 1)))
22073 || GET_CODE (e) != REG
22074 || REGNO (e) != SP_REGNUM)
22078 fprintf (asm_out_file, "}\n");
22081 /* Emit unwind directives for a SET. */
22084 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22092 switch (GET_CODE (e0))
22095 /* Pushing a single register. */
22096 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22097 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22098 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22101 asm_fprintf (asm_out_file, "\t.save ");
22102 if (IS_VFP_REGNUM (REGNO (e1)))
22103 asm_fprintf(asm_out_file, "{d%d}\n",
22104 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22106 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22110 if (REGNO (e0) == SP_REGNUM)
22112 /* A stack increment. */
22113 if (GET_CODE (e1) != PLUS
22114 || GET_CODE (XEXP (e1, 0)) != REG
22115 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22116 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22119 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22120 -INTVAL (XEXP (e1, 1)));
22122 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22124 HOST_WIDE_INT offset;
22126 if (GET_CODE (e1) == PLUS)
22128 if (GET_CODE (XEXP (e1, 0)) != REG
22129 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22131 reg = REGNO (XEXP (e1, 0));
22132 offset = INTVAL (XEXP (e1, 1));
22133 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22134 HARD_FRAME_POINTER_REGNUM, reg,
22137 else if (GET_CODE (e1) == REG)
22140 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22141 HARD_FRAME_POINTER_REGNUM, reg);
22146 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22148 /* Move from sp to reg. */
22149 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22151 else if (GET_CODE (e1) == PLUS
22152 && GET_CODE (XEXP (e1, 0)) == REG
22153 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22154 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22156 /* Set reg to offset from sp. */
22157 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22158 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22160 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22162 /* Stack pointer save before alignment. */
22164 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22177 /* Emit unwind directives for the given insn. */
22180 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22184 if (!ARM_EABI_UNWIND_TABLES)
22187 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22188 && (TREE_NOTHROW (current_function_decl)
22189 || crtl->all_throwers_are_sibcalls))
22192 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22195 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22197 pat = XEXP (pat, 0);
22199 pat = PATTERN (insn);
22201 switch (GET_CODE (pat))
22204 arm_unwind_emit_set (asm_out_file, pat);
22208 /* Store multiple. */
22209 arm_unwind_emit_sequence (asm_out_file, pat);
22218 /* Output a reference from a function exception table to the type_info
22219 object X. The EABI specifies that the symbol should be relocated by
22220 an R_ARM_TARGET2 relocation. */
22223 arm_output_ttype (rtx x)
22225 fputs ("\t.word\t", asm_out_file);
22226 output_addr_const (asm_out_file, x);
22227 /* Use special relocations for symbol references. */
22228 if (GET_CODE (x) != CONST_INT)
22229 fputs ("(TARGET2)", asm_out_file);
22230 fputc ('\n', asm_out_file);
22235 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22238 arm_asm_emit_except_personality (rtx personality)
22240 fputs ("\t.personality\t", asm_out_file);
22241 output_addr_const (asm_out_file, personality);
22242 fputc ('\n', asm_out_file);
22245 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22248 arm_asm_init_sections (void)
22250 exception_section = get_unnamed_section (0, output_section_asm_op,
22253 #endif /* TARGET_UNWIND_INFO */
22256 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22257 stack alignment. */
22260 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22262 rtx unspec = SET_SRC (pattern);
22263 gcc_assert (GET_CODE (unspec) == UNSPEC);
22267 case UNSPEC_STACK_ALIGN:
22268 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22269 put anything on the stack, so hopefully it won't matter.
22270 CFA = SP will be correct after alignment. */
22271 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22272 SET_DEST (pattern));
22275 gcc_unreachable ();
22280 /* Output unwind directives for the start/end of a function. */
22283 arm_output_fn_unwind (FILE * f, bool prologue)
22285 if (!ARM_EABI_UNWIND_TABLES)
22289 fputs ("\t.fnstart\n", f);
22292 /* If this function will never be unwound, then mark it as such.
22293 The came condition is used in arm_unwind_emit to suppress
22294 the frame annotations. */
22295 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22296 && (TREE_NOTHROW (current_function_decl)
22297 || crtl->all_throwers_are_sibcalls))
22298 fputs("\t.cantunwind\n", f);
22300 fputs ("\t.fnend\n", f);
22305 arm_emit_tls_decoration (FILE *fp, rtx x)
22307 enum tls_reloc reloc;
22310 val = XVECEXP (x, 0, 0);
22311 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22313 output_addr_const (fp, val);
22318 fputs ("(tlsgd)", fp);
22321 fputs ("(tlsldm)", fp);
22324 fputs ("(tlsldo)", fp);
22327 fputs ("(gottpoff)", fp);
22330 fputs ("(tpoff)", fp);
22333 gcc_unreachable ();
22341 fputs (" + (. - ", fp);
22342 output_addr_const (fp, XVECEXP (x, 0, 2));
22344 output_addr_const (fp, XVECEXP (x, 0, 3));
22354 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22357 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22359 gcc_assert (size == 4);
22360 fputs ("\t.word\t", file);
22361 output_addr_const (file, x);
22362 fputs ("(tlsldo)", file);
22366 arm_output_addr_const_extra (FILE *fp, rtx x)
22368 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22369 return arm_emit_tls_decoration (fp, x);
22370 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22373 int labelno = INTVAL (XVECEXP (x, 0, 0));
22375 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22376 assemble_name_raw (fp, label);
22380 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22382 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22386 output_addr_const (fp, XVECEXP (x, 0, 0));
22390 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22392 output_addr_const (fp, XVECEXP (x, 0, 0));
22396 output_addr_const (fp, XVECEXP (x, 0, 1));
22400 else if (GET_CODE (x) == CONST_VECTOR)
22401 return arm_emit_vector_const (fp, x);
22406 /* Output assembly for a shift instruction.
22407 SET_FLAGS determines how the instruction modifies the condition codes.
22408 0 - Do not set condition codes.
22409 1 - Set condition codes.
22410 2 - Use smallest instruction. */
22412 arm_output_shift(rtx * operands, int set_flags)
22415 static const char flag_chars[3] = {'?', '.', '!'};
22420 c = flag_chars[set_flags];
22421 if (TARGET_UNIFIED_ASM)
22423 shift = shift_op(operands[3], &val);
22427 operands[2] = GEN_INT(val);
22428 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22431 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22434 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22435 output_asm_insn (pattern, operands);
22439 /* Output a Thumb-1 casesi dispatch sequence. */
22441 thumb1_output_casesi (rtx *operands)
22443 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22445 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22447 switch (GET_MODE(diff_vec))
22450 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22451 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22453 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22454 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22456 return "bl\t%___gnu_thumb1_case_si";
22458 gcc_unreachable ();
22462 /* Output a Thumb-2 casesi instruction. */
22464 thumb2_output_casesi (rtx *operands)
22466 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22468 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22470 output_asm_insn ("cmp\t%0, %1", operands);
22471 output_asm_insn ("bhi\t%l3", operands);
22472 switch (GET_MODE(diff_vec))
22475 return "tbb\t[%|pc, %0]";
22477 return "tbh\t[%|pc, %0, lsl #1]";
22481 output_asm_insn ("adr\t%4, %l2", operands);
22482 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22483 output_asm_insn ("add\t%4, %4, %5", operands);
22488 output_asm_insn ("adr\t%4, %l2", operands);
22489 return "ldr\t%|pc, [%4, %0, lsl #2]";
22492 gcc_unreachable ();
22496 /* Most ARM cores are single issue, but some newer ones can dual issue.
22497 The scheduler descriptions rely on this being correct. */
22499 arm_issue_rate (void)
22515 /* A table and a function to perform ARM-specific name mangling for
22516 NEON vector types in order to conform to the AAPCS (see "Procedure
22517 Call Standard for the ARM Architecture", Appendix A). To qualify
22518 for emission with the mangled names defined in that document, a
22519 vector type must not only be of the correct mode but also be
22520 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22523 enum machine_mode mode;
22524 const char *element_type_name;
22525 const char *aapcs_name;
22526 } arm_mangle_map_entry;
22528 static arm_mangle_map_entry arm_mangle_map[] = {
22529 /* 64-bit containerized types. */
22530 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22531 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22532 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22533 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22534 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22535 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22536 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22537 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22538 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22539 /* 128-bit containerized types. */
22540 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22541 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22542 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22543 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22544 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22545 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22546 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22547 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22548 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22549 { VOIDmode, NULL, NULL }
22553 arm_mangle_type (const_tree type)
22555 arm_mangle_map_entry *pos = arm_mangle_map;
22557 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22558 has to be managled as if it is in the "std" namespace. */
22559 if (TARGET_AAPCS_BASED
22560 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22562 static bool warned;
22563 if (!warned && warn_psabi && !in_system_header)
22566 inform (input_location,
22567 "the mangling of %<va_list%> has changed in GCC 4.4");
22569 return "St9__va_list";
22572 /* Half-precision float. */
22573 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22576 if (TREE_CODE (type) != VECTOR_TYPE)
22579 /* Check the mode of the vector type, and the name of the vector
22580 element type, against the table. */
22581 while (pos->mode != VOIDmode)
22583 tree elt_type = TREE_TYPE (type);
22585 if (pos->mode == TYPE_MODE (type)
22586 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22587 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22588 pos->element_type_name))
22589 return pos->aapcs_name;
22594 /* Use the default mangling for unrecognized (possibly user-defined)
22599 /* Order of allocation of core registers for Thumb: this allocation is
22600 written over the corresponding initial entries of the array
22601 initialized with REG_ALLOC_ORDER. We allocate all low registers
22602 first. Saving and restoring a low register is usually cheaper than
22603 using a call-clobbered high register. */
22605 static const int thumb_core_reg_alloc_order[] =
22607 3, 2, 1, 0, 4, 5, 6, 7,
22608 14, 12, 8, 9, 10, 11, 13, 15
22611 /* Adjust register allocation order when compiling for Thumb. */
22614 arm_order_regs_for_local_alloc (void)
22616 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22617 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22619 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22620 sizeof (thumb_core_reg_alloc_order));
22623 /* Set default optimization options. */
22625 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22627 /* Enable section anchors by default at -O1 or higher.
22628 Use 2 to distinguish from an explicit -fsection-anchors
22629 given on the command line. */
22631 flag_section_anchors = 2;
22634 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22637 arm_frame_pointer_required (void)
22639 return (cfun->has_nonlocal_label
22640 || SUBTARGET_FRAME_POINTER_REQUIRED
22641 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22644 /* Only thumb1 can't support conditional execution, so return true if
22645 the target is not thumb1. */
22647 arm_have_conditional_execution (void)
22649 return !TARGET_THUMB1;
22652 /* Legitimize a memory reference for sync primitive implemented using
22653 ldrex / strex. We currently force the form of the reference to be
22654 indirect without offset. We do not yet support the indirect offset
22655 addressing supported by some ARM targets for these
22658 arm_legitimize_sync_memory (rtx memory)
22660 rtx addr = force_reg (Pmode, XEXP (memory, 0));
22661 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
22663 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
22664 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
22665 return legitimate_memory;
22668 /* An instruction emitter. */
22669 typedef void (* emit_f) (int label, const char *, rtx *);
22671 /* An instruction emitter that emits via the conventional
22672 output_asm_insn. */
22674 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
22676 output_asm_insn (pattern, operands);
22679 /* Count the number of emitted synchronization instructions. */
22680 static unsigned arm_insn_count;
22682 /* An emitter that counts emitted instructions but does not actually
22683 emit instruction into the the instruction stream. */
22685 arm_count (int label,
22686 const char *pattern ATTRIBUTE_UNUSED,
22687 rtx *operands ATTRIBUTE_UNUSED)
22693 /* Construct a pattern using conventional output formatting and feed
22694 it to output_asm_insn. Provides a mechanism to construct the
22695 output pattern on the fly. Note the hard limit on the pattern
22698 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
22699 const char *pattern, ...)
22704 va_start (ap, pattern);
22705 vsprintf (buffer, pattern, ap);
22707 emit (label, buffer, operands);
22710 /* Emit the memory barrier instruction, if any, provided by this
22711 target to a specified emitter. */
22713 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
22715 if (TARGET_HAVE_DMB)
22717 /* Note we issue a system level barrier. We should consider
22718 issuing a inner shareabilty zone barrier here instead, ie.
22720 emit (0, "dmb\tsy", operands);
22724 if (TARGET_HAVE_DMB_MCR)
22726 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
22730 gcc_unreachable ();
22733 /* Emit the memory barrier instruction, if any, provided by this
22736 arm_output_memory_barrier (rtx *operands)
22738 arm_process_output_memory_barrier (arm_emit, operands);
22742 /* Helper to figure out the instruction suffix required on ldrex/strex
22743 for operations on an object of the specified mode. */
22744 static const char *
22745 arm_ldrex_suffix (enum machine_mode mode)
22749 case QImode: return "b";
22750 case HImode: return "h";
22751 case SImode: return "";
22752 case DImode: return "d";
22754 gcc_unreachable ();
22759 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
22762 arm_output_ldrex (emit_f emit,
22763 enum machine_mode mode,
22767 const char *suffix = arm_ldrex_suffix (mode);
22770 operands[0] = target;
22771 operands[1] = memory;
22772 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
22775 /* Emit a strex{b,h,d, } instruction appropriate for the specified
22778 arm_output_strex (emit_f emit,
22779 enum machine_mode mode,
22785 const char *suffix = arm_ldrex_suffix (mode);
22788 operands[0] = result;
22789 operands[1] = value;
22790 operands[2] = memory;
22791 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
22795 /* Helper to emit a two operand instruction. */
22797 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
22803 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
22806 /* Helper to emit a three operand instruction. */
22808 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
22815 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
22818 /* Emit a load store exclusive synchronization loop.
22822 if old_value != required_value
22824 t1 = sync_op (old_value, new_value)
22825 [mem] = t1, t2 = [0|1]
22829 t1 == t2 is not permitted
22830 t1 == old_value is permitted
22834 RTX register or const_int representing the required old_value for
22835 the modify to continue, if NULL no comparsion is performed. */
22837 arm_output_sync_loop (emit_f emit,
22838 enum machine_mode mode,
22841 rtx required_value,
22845 enum attr_sync_op sync_op,
22846 int early_barrier_required)
22850 gcc_assert (t1 != t2);
22852 if (early_barrier_required)
22853 arm_process_output_memory_barrier (emit, NULL);
22855 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
22857 arm_output_ldrex (emit, mode, old_value, memory);
22859 if (required_value)
22863 operands[0] = old_value;
22864 operands[1] = required_value;
22865 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
22866 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
22872 arm_output_op3 (emit, "add", t1, old_value, new_value);
22876 arm_output_op3 (emit, "sub", t1, old_value, new_value);
22880 arm_output_op3 (emit, "orr", t1, old_value, new_value);
22884 arm_output_op3 (emit, "eor", t1, old_value, new_value);
22888 arm_output_op3 (emit,"and", t1, old_value, new_value);
22892 arm_output_op3 (emit, "and", t1, old_value, new_value);
22893 arm_output_op2 (emit, "mvn", t1, t1);
22901 arm_output_strex (emit, mode, "", t2, t1, memory);
22903 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
22904 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX);
22906 arm_process_output_memory_barrier (emit, NULL);
22907 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
22911 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
22914 default_value = operands[index - 1];
22916 return default_value;
22919 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
22920 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
22922 /* Extract the operands for a synchroniztion instruction from the
22923 instructions attributes and emit the instruction. */
22925 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
22927 rtx result, memory, required_value, new_value, t1, t2;
22929 enum machine_mode mode;
22930 enum attr_sync_op sync_op;
22932 result = FETCH_SYNC_OPERAND(result, 0);
22933 memory = FETCH_SYNC_OPERAND(memory, 0);
22934 required_value = FETCH_SYNC_OPERAND(required_value, 0);
22935 new_value = FETCH_SYNC_OPERAND(new_value, 0);
22936 t1 = FETCH_SYNC_OPERAND(t1, 0);
22937 t2 = FETCH_SYNC_OPERAND(t2, 0);
22939 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
22940 sync_op = get_attr_sync_op (insn);
22941 mode = GET_MODE (memory);
22943 arm_output_sync_loop (emit, mode, result, memory, required_value,
22944 new_value, t1, t2, sync_op, early_barrier);
22947 /* Emit a synchronization instruction loop. */
22949 arm_output_sync_insn (rtx insn, rtx *operands)
22951 arm_process_output_sync_insn (arm_emit, insn, operands);
22955 /* Count the number of machine instruction that will be emitted for a
22956 synchronization instruction. Note that the emitter used does not
22957 emit instructions, it just counts instructions being carefull not
22958 to count labels. */
22960 arm_sync_loop_insns (rtx insn, rtx *operands)
22962 arm_insn_count = 0;
22963 arm_process_output_sync_insn (arm_count, insn, operands);
22964 return arm_insn_count;
22967 /* Helper to call a target sync instruction generator, dealing with
22968 the variation in operands required by the different generators. */
22970 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
22971 rtx memory, rtx required_value, rtx new_value)
22973 switch (generator->op)
22975 case arm_sync_generator_omn:
22976 gcc_assert (! required_value);
22977 return generator->u.omn (old_value, memory, new_value);
22979 case arm_sync_generator_omrn:
22980 gcc_assert (required_value);
22981 return generator->u.omrn (old_value, memory, required_value, new_value);
22987 /* Expand a synchronization loop. The synchronization loop is expanded
22988 as an opaque block of instructions in order to ensure that we do
22989 not subsequently get extraneous memory accesses inserted within the
22990 critical region. The exclusive access property of ldrex/strex is
22991 only guaranteed in there are no intervening memory accesses. */
22993 arm_expand_sync (enum machine_mode mode,
22994 struct arm_sync_generator *generator,
22995 rtx target, rtx memory, rtx required_value, rtx new_value)
22997 if (target == NULL)
22998 target = gen_reg_rtx (mode);
23000 memory = arm_legitimize_sync_memory (memory);
23001 if (mode != SImode)
23003 rtx load_temp = gen_reg_rtx (SImode);
23005 if (required_value)
23006 required_value = convert_modes (SImode, mode, required_value, true);
23008 new_value = convert_modes (SImode, mode, new_value, true);
23009 emit_insn (arm_call_generator (generator, load_temp, memory,
23010 required_value, new_value));
23011 emit_move_insn (target, gen_lowpart (mode, load_temp));
23015 emit_insn (arm_call_generator (generator, target, memory, required_value,
23020 #include "gt-arm.h"