1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
166 static int aapcs_select_return_coproc (const_tree, const_tree);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
173 static void arm_encode_section_info (tree, rtx, int);
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static rtx arm_dwarf_register_span (rtx);
196 static tree arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree arm_get_cookie_size (tree);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree, rtx);
208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
212 static bool arm_cannot_copy_insn_p (rtx);
213 static bool arm_tls_symbol_p (rtx x);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t);
218 static const char *arm_invalid_return_type (const_tree t);
219 static tree arm_promoted_type (const_tree t);
220 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_scalar_mode_supported_p (enum machine_mode);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx);
229 /* Table of machine attributes. */
230 static const struct attribute_spec arm_attribute_table[] =
232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
233 /* Function calls made to this symbol must be done indirectly, because
234 it may lie outside of the 26 bit addressing range of a normal function
236 { "long_call", 0, 0, false, true, true, NULL },
237 /* Whereas these functions are always known to reside within the 26 bit
239 { "short_call", 0, 0, false, true, true, NULL },
240 /* Specify the procedure call conventions for a function. */
241 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
242 /* Interrupt Service Routines have special prologue and epilogue requirements. */
243 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
245 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
247 /* ARM/PE has three new attributes:
249 dllexport - for exporting a function/variable that will live in a dll
250 dllimport - for importing a function/variable from a dll
252 Microsoft allows multiple declspecs in one __declspec, separating
253 them with spaces. We do NOT support this. Instead, use __declspec
256 { "dllimport", 0, 0, true, false, false, NULL },
257 { "dllexport", 0, 0, true, false, false, NULL },
258 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
260 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
261 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
262 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
264 { NULL, 0, 0, false, false, false, NULL }
267 /* Initialize the GCC target structure. */
268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
269 #undef TARGET_MERGE_DECL_ATTRIBUTES
270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
273 #undef TARGET_LEGITIMIZE_ADDRESS
274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
279 #undef TARGET_ASM_FILE_START
280 #define TARGET_ASM_FILE_START arm_file_start
281 #undef TARGET_ASM_FILE_END
282 #define TARGET_ASM_FILE_END arm_file_end
284 #undef TARGET_ASM_ALIGNED_SI_OP
285 #define TARGET_ASM_ALIGNED_SI_OP NULL
286 #undef TARGET_ASM_INTEGER
287 #define TARGET_ASM_INTEGER arm_assemble_integer
289 #undef TARGET_ASM_FUNCTION_PROLOGUE
290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION arm_handle_option
300 #define TARGET_HELP arm_target_help
302 #undef TARGET_COMP_TYPE_ATTRIBUTES
303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
308 #undef TARGET_SCHED_ADJUST_COST
309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
311 #undef TARGET_ENCODE_SECTION_INFO
313 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
315 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
318 #undef TARGET_STRIP_NAME_ENCODING
319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
321 #undef TARGET_ASM_INTERNAL_LABEL
322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
324 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
327 #undef TARGET_FUNCTION_VALUE
328 #define TARGET_FUNCTION_VALUE arm_function_value
330 #undef TARGET_LIBCALL_VALUE
331 #define TARGET_LIBCALL_VALUE arm_libcall_value
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS arm_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST arm_address_cost
343 #undef TARGET_SHIFT_TRUNCATION_MASK
344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
348 #undef TARGET_MACHINE_DEPENDENT_REORG
349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
351 #undef TARGET_INIT_BUILTINS
352 #define TARGET_INIT_BUILTINS arm_init_builtins
353 #undef TARGET_EXPAND_BUILTIN
354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
356 #undef TARGET_INIT_LIBFUNCS
357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
359 #undef TARGET_PROMOTE_FUNCTION_MODE
360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
361 #undef TARGET_PROMOTE_PROTOTYPES
362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
365 #undef TARGET_ARG_PARTIAL_BYTES
366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
368 #undef TARGET_SETUP_INCOMING_VARARGS
369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
381 #undef TARGET_DEFAULT_SHORT_ENUMS
382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
384 #undef TARGET_ALIGN_ANON_BITFIELD
385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
387 #undef TARGET_NARROW_VOLATILE_BITFIELD
388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
390 #undef TARGET_CXX_GUARD_TYPE
391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
393 #undef TARGET_CXX_GUARD_MASK_BIT
394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
396 #undef TARGET_CXX_GET_COOKIE_SIZE
397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
399 #undef TARGET_CXX_COOKIE_HAS_SIZE
400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
402 #undef TARGET_CXX_CDTOR_RETURNS_THIS
403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
408 #undef TARGET_CXX_USE_AEABI_ATEXIT
409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
413 arm_cxx_determine_class_data_visibility
415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
418 #undef TARGET_RETURN_IN_MSB
419 #define TARGET_RETURN_IN_MSB arm_return_in_msb
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
424 #undef TARGET_MUST_PASS_IN_STACK
425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
427 #ifdef TARGET_UNWIND_INFO
428 #undef TARGET_UNWIND_EMIT
429 #define TARGET_UNWIND_EMIT arm_unwind_emit
431 /* EABI unwinding tables use a different format for the typeinfo tables. */
432 #undef TARGET_ASM_TTYPE
433 #define TARGET_ASM_TTYPE arm_output_ttype
435 #undef TARGET_ARM_EABI_UNWINDER
436 #define TARGET_ARM_EABI_UNWINDER true
437 #endif /* TARGET_UNWIND_INFO */
439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
442 #undef TARGET_DWARF_REGISTER_SPAN
443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
445 #undef TARGET_CANNOT_COPY_INSN_P
446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
456 #undef TARGET_CANNOT_FORCE_CONST_MEM
457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
459 #undef TARGET_MAX_ANCHOR_OFFSET
460 #define TARGET_MAX_ANCHOR_OFFSET 4095
462 /* The minimum is set such that the total size of the block
463 for a particular anchor is -4088 + 1 + 4095 bytes, which is
464 divisible by eight, ensuring natural spacing of anchors. */
465 #undef TARGET_MIN_ANCHOR_OFFSET
466 #define TARGET_MIN_ANCHOR_OFFSET -4088
468 #undef TARGET_SCHED_ISSUE_RATE
469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
471 #undef TARGET_MANGLE_TYPE
472 #define TARGET_MANGLE_TYPE arm_mangle_type
474 #undef TARGET_BUILD_BUILTIN_VA_LIST
475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
476 #undef TARGET_EXPAND_BUILTIN_VA_START
477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
486 #undef TARGET_LEGITIMATE_ADDRESS_P
487 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
489 #undef TARGET_INVALID_PARAMETER_TYPE
490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
492 #undef TARGET_INVALID_RETURN_TYPE
493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
495 #undef TARGET_PROMOTED_TYPE
496 #define TARGET_PROMOTED_TYPE arm_promoted_type
498 #undef TARGET_CONVERT_TO_TYPE
499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
501 #undef TARGET_SCALAR_MODE_SUPPORTED_P
502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
504 #undef TARGET_FRAME_POINTER_REQUIRED
505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
507 #undef TARGET_CAN_ELIMINATE
508 #define TARGET_CAN_ELIMINATE arm_can_eliminate
510 struct gcc_target targetm = TARGET_INITIALIZER;
512 /* Obstack for minipool constant handling. */
513 static struct obstack minipool_obstack;
514 static char * minipool_startobj;
516 /* The maximum number of insns skipped which
517 will be conditionalised if possible. */
518 static int max_insns_skipped = 5;
520 extern FILE * asm_out_file;
522 /* True if we are currently building a constant table. */
523 int making_const_table;
525 /* The processor for which instructions should be scheduled. */
526 enum processor_type arm_tune = arm_none;
528 /* The default processor used if not overridden by commandline. */
529 static enum processor_type arm_default_cpu = arm_none;
531 /* Which floating point hardware to schedule for. */
534 /* Which floating popint hardware to use. */
535 const struct arm_fpu_desc *arm_fpu_desc;
537 /* Whether to use floating point hardware. */
538 enum float_abi_type arm_float_abi;
540 /* Which __fp16 format to use. */
541 enum arm_fp16_format_type arm_fp16_format;
543 /* Which ABI to use. */
544 enum arm_abi_type arm_abi;
546 /* Which thread pointer model to use. */
547 enum arm_tp_type target_thread_pointer = TP_AUTO;
549 /* Used to parse -mstructure_size_boundary command line option. */
550 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
552 /* Used for Thumb call_via trampolines. */
553 rtx thumb_call_via_label[14];
554 static int thumb_call_reg_needed;
556 /* Bit values used to identify processor capabilities. */
557 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
558 #define FL_ARCH3M (1 << 1) /* Extended multiply */
559 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
560 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
561 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
562 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
563 #define FL_THUMB (1 << 6) /* Thumb aware */
564 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
565 #define FL_STRONG (1 << 8) /* StrongARM */
566 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
567 #define FL_XSCALE (1 << 10) /* XScale */
568 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
569 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
570 media instructions. */
571 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
572 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
573 Note: ARM6 & 7 derivatives only. */
574 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
575 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
576 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
578 #define FL_DIV (1 << 18) /* Hardware divide. */
579 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
580 #define FL_NEON (1 << 20) /* Neon instructions. */
581 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
584 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
586 #define FL_FOR_ARCH2 FL_NOTM
587 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
588 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
589 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
590 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
591 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
592 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
593 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
594 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
595 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
596 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
597 #define FL_FOR_ARCH6J FL_FOR_ARCH6
598 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
609 /* The bits in this mask specify which
610 instructions we are allowed to generate. */
611 static unsigned long insn_flags = 0;
613 /* The bits in this mask specify which instruction scheduling options should
615 static unsigned long tune_flags = 0;
617 /* The following are used in the arm.md file as equivalents to bits
618 in the above two flag variables. */
620 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
623 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
626 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
629 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
632 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
635 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
638 /* Nonzero if this chip supports the ARM 6K extensions. */
641 /* Nonzero if instructions not present in the 'M' profile can be used. */
642 int arm_arch_notm = 0;
644 /* Nonzero if instructions present in ARMv7E-M can be used. */
647 /* Nonzero if this chip can benefit from load scheduling. */
648 int arm_ld_sched = 0;
650 /* Nonzero if this chip is a StrongARM. */
651 int arm_tune_strongarm = 0;
653 /* Nonzero if this chip is a Cirrus variant. */
654 int arm_arch_cirrus = 0;
656 /* Nonzero if this chip supports Intel Wireless MMX technology. */
657 int arm_arch_iwmmxt = 0;
659 /* Nonzero if this chip is an XScale. */
660 int arm_arch_xscale = 0;
662 /* Nonzero if tuning for XScale */
663 int arm_tune_xscale = 0;
665 /* Nonzero if we want to tune for stores that access the write-buffer.
666 This typically means an ARM6 or ARM7 with MMU or MPU. */
667 int arm_tune_wbuf = 0;
669 /* Nonzero if tuning for Cortex-A9. */
670 int arm_tune_cortex_a9 = 0;
672 /* Nonzero if generating Thumb instructions. */
675 /* Nonzero if we should define __THUMB_INTERWORK__ in the
677 XXX This is a bit of a hack, it's intended to help work around
678 problems in GLD which doesn't understand that armv5t code is
679 interworking clean. */
680 int arm_cpp_interwork = 0;
682 /* Nonzero if chip supports Thumb 2. */
685 /* Nonzero if chip supports integer division instruction. */
688 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
689 must report the mode of the memory reference from PRINT_OPERAND to
690 PRINT_OPERAND_ADDRESS. */
691 enum machine_mode output_memory_reference_mode;
693 /* The register number to be used for the PIC offset register. */
694 unsigned arm_pic_register = INVALID_REGNUM;
696 /* Set to 1 after arm_reorg has started. Reset to start at the start of
697 the next function. */
698 static int after_arm_reorg = 0;
700 /* The maximum number of insns to be used when loading a constant. */
701 static int arm_constant_limit = 3;
703 static enum arm_pcs arm_pcs_default;
705 /* For an explanation of these variables, see final_prescan_insn below. */
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen = 0;
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes[] =
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
733 /* Initialization code. */
737 const char *const name;
738 enum processor_type core;
740 const unsigned long flags;
741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
744 /* Not all of these give usefully different compilation alternatives,
745 but there is no simple way of generalizing them. */
746 static const struct processors all_cores[] =
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
751 #include "arm-cores.def"
753 {NULL, arm_none, NULL, 0, NULL}
756 static const struct processors all_architectures[] =
758 /* ARM Architectures */
759 /* We don't specify rtx_costs here as it will be figured out
762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
765 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
766 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
767 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
768 implementations that support it, so we will leave it out for now. */
769 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
770 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
771 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
772 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
773 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
774 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
775 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
776 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
777 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
778 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
779 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
780 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
781 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
782 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
783 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
784 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
785 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
786 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
787 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
788 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
789 {NULL, arm_none, NULL, 0 , NULL}
792 struct arm_cpu_select
796 const struct processors * processors;
799 /* This is a magic structure. The 'string' field is magically filled in
800 with a pointer to the value specified by the user on the command line
801 assuming that the user has specified such a value. */
803 static struct arm_cpu_select arm_select[] =
805 /* string name processors */
806 { NULL, "-mcpu=", all_cores },
807 { NULL, "-march=", all_architectures },
808 { NULL, "-mtune=", all_cores }
811 /* Defines representing the indexes into the above table. */
812 #define ARM_OPT_SET_CPU 0
813 #define ARM_OPT_SET_ARCH 1
814 #define ARM_OPT_SET_TUNE 2
816 /* The name of the preprocessor macro to define for this architecture. */
818 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
820 /* Available values for -mfpu=. */
822 static const struct arm_fpu_desc all_fpus[] =
824 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
825 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
826 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
827 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
828 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
829 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
830 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
831 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
832 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
833 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
834 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
835 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
836 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
837 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
838 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
839 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
840 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
841 /* Compatibility aliases. */
842 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
849 enum float_abi_type abi_type;
853 /* Available values for -mfloat-abi=. */
855 static const struct float_abi all_float_abis[] =
857 {"soft", ARM_FLOAT_ABI_SOFT},
858 {"softfp", ARM_FLOAT_ABI_SOFTFP},
859 {"hard", ARM_FLOAT_ABI_HARD}
866 enum arm_fp16_format_type fp16_format_type;
870 /* Available values for -mfp16-format=. */
872 static const struct fp16_format all_fp16_formats[] =
874 {"none", ARM_FP16_FORMAT_NONE},
875 {"ieee", ARM_FP16_FORMAT_IEEE},
876 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
883 enum arm_abi_type abi_type;
887 /* Available values for -mabi=. */
889 static const struct abi_name arm_all_abis[] =
891 {"apcs-gnu", ARM_ABI_APCS},
892 {"atpcs", ARM_ABI_ATPCS},
893 {"aapcs", ARM_ABI_AAPCS},
894 {"iwmmxt", ARM_ABI_IWMMXT},
895 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
898 /* Supported TLS relocations. */
908 /* Emit an insn that's a simple single-set. Both the operands must be known
911 emit_set_insn (rtx x, rtx y)
913 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
916 /* Return the number of bits set in VALUE. */
918 bit_count (unsigned long value)
920 unsigned long count = 0;
925 value &= value - 1; /* Clear the least-significant set bit. */
931 /* Set up library functions unique to ARM. */
934 arm_init_libfuncs (void)
936 /* There are no special library functions unless we are using the
941 /* The functions below are described in Section 4 of the "Run-Time
942 ABI for the ARM architecture", Version 1.0. */
944 /* Double-precision floating-point arithmetic. Table 2. */
945 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
946 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
947 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
948 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
949 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
951 /* Double-precision comparisons. Table 3. */
952 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
953 set_optab_libfunc (ne_optab, DFmode, NULL);
954 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
955 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
956 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
957 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
958 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
960 /* Single-precision floating-point arithmetic. Table 4. */
961 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
962 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
963 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
964 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
965 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
967 /* Single-precision comparisons. Table 5. */
968 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
969 set_optab_libfunc (ne_optab, SFmode, NULL);
970 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
971 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
972 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
973 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
974 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
976 /* Floating-point to integer conversions. Table 6. */
977 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
978 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
979 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
980 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
981 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
982 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
983 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
984 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
986 /* Conversions between floating types. Table 7. */
987 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
988 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
990 /* Integer to floating-point conversions. Table 8. */
991 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
992 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
993 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
994 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
995 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
996 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
997 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
998 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1000 /* Long long. Table 9. */
1001 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1002 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1003 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1004 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1005 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1006 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1007 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1008 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1010 /* Integer (32/32->32) division. \S 4.3.1. */
1011 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1012 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1014 /* The divmod functions are designed so that they can be used for
1015 plain division, even though they return both the quotient and the
1016 remainder. The quotient is returned in the usual location (i.e.,
1017 r0 for SImode, {r0, r1} for DImode), just as would be expected
1018 for an ordinary division routine. Because the AAPCS calling
1019 conventions specify that all of { r0, r1, r2, r3 } are
1020 callee-saved registers, there is no need to tell the compiler
1021 explicitly that those registers are clobbered by these
1023 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1024 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1026 /* For SImode division the ABI provides div-without-mod routines,
1027 which are faster. */
1028 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1029 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1031 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1032 divmod libcalls instead. */
1033 set_optab_libfunc (smod_optab, DImode, NULL);
1034 set_optab_libfunc (umod_optab, DImode, NULL);
1035 set_optab_libfunc (smod_optab, SImode, NULL);
1036 set_optab_libfunc (umod_optab, SImode, NULL);
1038 /* Half-precision float operations. The compiler handles all operations
1039 with NULL libfuncs by converting the SFmode. */
1040 switch (arm_fp16_format)
1042 case ARM_FP16_FORMAT_IEEE:
1043 case ARM_FP16_FORMAT_ALTERNATIVE:
1046 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1047 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1049 : "__gnu_f2h_alternative"));
1050 set_conv_libfunc (sext_optab, SFmode, HFmode,
1051 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1053 : "__gnu_h2f_alternative"));
1056 set_optab_libfunc (add_optab, HFmode, NULL);
1057 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1058 set_optab_libfunc (smul_optab, HFmode, NULL);
1059 set_optab_libfunc (neg_optab, HFmode, NULL);
1060 set_optab_libfunc (sub_optab, HFmode, NULL);
1063 set_optab_libfunc (eq_optab, HFmode, NULL);
1064 set_optab_libfunc (ne_optab, HFmode, NULL);
1065 set_optab_libfunc (lt_optab, HFmode, NULL);
1066 set_optab_libfunc (le_optab, HFmode, NULL);
1067 set_optab_libfunc (ge_optab, HFmode, NULL);
1068 set_optab_libfunc (gt_optab, HFmode, NULL);
1069 set_optab_libfunc (unord_optab, HFmode, NULL);
1076 if (TARGET_AAPCS_BASED)
1077 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1080 /* On AAPCS systems, this is the "struct __va_list". */
1081 static GTY(()) tree va_list_type;
1083 /* Return the type to use as __builtin_va_list. */
1085 arm_build_builtin_va_list (void)
1090 if (!TARGET_AAPCS_BASED)
1091 return std_build_builtin_va_list ();
1093 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1101 The C Library ABI further reinforces this definition in \S
1104 We must follow this definition exactly. The structure tag
1105 name is visible in C++ mangled names, and thus forms a part
1106 of the ABI. The field name may be used by people who
1107 #include <stdarg.h>. */
1108 /* Create the type. */
1109 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1110 /* Give it the required name. */
1111 va_list_name = build_decl (BUILTINS_LOCATION,
1113 get_identifier ("__va_list"),
1115 DECL_ARTIFICIAL (va_list_name) = 1;
1116 TYPE_NAME (va_list_type) = va_list_name;
1117 /* Create the __ap field. */
1118 ap_field = build_decl (BUILTINS_LOCATION,
1120 get_identifier ("__ap"),
1122 DECL_ARTIFICIAL (ap_field) = 1;
1123 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1124 TYPE_FIELDS (va_list_type) = ap_field;
1125 /* Compute its layout. */
1126 layout_type (va_list_type);
1128 return va_list_type;
1131 /* Return an expression of type "void *" pointing to the next
1132 available argument in a variable-argument list. VALIST is the
1133 user-level va_list object, of type __builtin_va_list. */
1135 arm_extract_valist_ptr (tree valist)
1137 if (TREE_TYPE (valist) == error_mark_node)
1138 return error_mark_node;
1140 /* On an AAPCS target, the pointer is stored within "struct
1142 if (TARGET_AAPCS_BASED)
1144 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1145 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1146 valist, ap_field, NULL_TREE);
1152 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1154 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1156 valist = arm_extract_valist_ptr (valist);
1157 std_expand_builtin_va_start (valist, nextarg);
1160 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1162 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1165 valist = arm_extract_valist_ptr (valist);
1166 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1169 /* Implement TARGET_HANDLE_OPTION. */
1172 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1177 arm_select[1].string = arg;
1181 arm_select[0].string = arg;
1184 case OPT_mhard_float:
1185 target_float_abi_name = "hard";
1188 case OPT_msoft_float:
1189 target_float_abi_name = "soft";
1193 arm_select[2].string = arg;
1202 arm_target_help (void)
1205 static int columns = 0;
1208 /* If we have not done so already, obtain the desired maximum width of
1209 the output. Note - this is a duplication of the code at the start of
1210 gcc/opts.c:print_specific_help() - the two copies should probably be
1211 replaced by a single function. */
1216 GET_ENVIRONMENT (p, "COLUMNS");
1219 int value = atoi (p);
1226 /* Use a reasonable default. */
1230 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1232 /* The - 2 is because we know that the last entry in the array is NULL. */
1233 i = ARRAY_SIZE (all_cores) - 2;
1235 printf (" %s", all_cores[i].name);
1236 remaining = columns - (strlen (all_cores[i].name) + 4);
1237 gcc_assert (remaining >= 0);
1241 int len = strlen (all_cores[i].name);
1243 if (remaining > len + 2)
1245 printf (", %s", all_cores[i].name);
1246 remaining -= len + 2;
1252 printf ("\n %s", all_cores[i].name);
1253 remaining = columns - (len + 4);
1257 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1259 i = ARRAY_SIZE (all_architectures) - 2;
1262 printf (" %s", all_architectures[i].name);
1263 remaining = columns - (strlen (all_architectures[i].name) + 4);
1264 gcc_assert (remaining >= 0);
1268 int len = strlen (all_architectures[i].name);
1270 if (remaining > len + 2)
1272 printf (", %s", all_architectures[i].name);
1273 remaining -= len + 2;
1279 printf ("\n %s", all_architectures[i].name);
1280 remaining = columns - (len + 4);
1287 /* Fix up any incompatible options that the user has specified.
1288 This has now turned into a maze. */
1290 arm_override_options (void)
1293 enum processor_type target_arch_cpu = arm_none;
1294 enum processor_type selected_cpu = arm_none;
1296 /* Set up the flags based on the cpu/architecture selected by the user. */
1297 for (i = ARRAY_SIZE (arm_select); i--;)
1299 struct arm_cpu_select * ptr = arm_select + i;
1301 if (ptr->string != NULL && ptr->string[0] != '\0')
1303 const struct processors * sel;
1305 for (sel = ptr->processors; sel->name != NULL; sel++)
1306 if (streq (ptr->string, sel->name))
1308 /* Set the architecture define. */
1309 if (i != ARM_OPT_SET_TUNE)
1310 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1312 /* Determine the processor core for which we should
1313 tune code-generation. */
1314 if (/* -mcpu= is a sensible default. */
1315 i == ARM_OPT_SET_CPU
1316 /* -mtune= overrides -mcpu= and -march=. */
1317 || i == ARM_OPT_SET_TUNE)
1318 arm_tune = (enum processor_type) (sel - ptr->processors);
1320 /* Remember the CPU associated with this architecture.
1321 If no other option is used to set the CPU type,
1322 we'll use this to guess the most suitable tuning
1324 if (i == ARM_OPT_SET_ARCH)
1325 target_arch_cpu = sel->core;
1327 if (i == ARM_OPT_SET_CPU)
1328 selected_cpu = (enum processor_type) (sel - ptr->processors);
1330 if (i != ARM_OPT_SET_TUNE)
1332 /* If we have been given an architecture and a processor
1333 make sure that they are compatible. We only generate
1334 a warning though, and we prefer the CPU over the
1336 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1337 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1340 insn_flags = sel->flags;
1346 if (sel->name == NULL)
1347 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1351 /* Guess the tuning options from the architecture if necessary. */
1352 if (arm_tune == arm_none)
1353 arm_tune = target_arch_cpu;
1355 /* If the user did not specify a processor, choose one for them. */
1356 if (insn_flags == 0)
1358 const struct processors * sel;
1359 unsigned int sought;
1361 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1362 if (selected_cpu == arm_none)
1364 #ifdef SUBTARGET_CPU_DEFAULT
1365 /* Use the subtarget default CPU if none was specified by
1367 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1369 /* Default to ARM6. */
1370 if (selected_cpu == arm_none)
1371 selected_cpu = arm6;
1373 sel = &all_cores[selected_cpu];
1375 insn_flags = sel->flags;
1377 /* Now check to see if the user has specified some command line
1378 switch that require certain abilities from the cpu. */
1381 if (TARGET_INTERWORK || TARGET_THUMB)
1383 sought |= (FL_THUMB | FL_MODE32);
1385 /* There are no ARM processors that support both APCS-26 and
1386 interworking. Therefore we force FL_MODE26 to be removed
1387 from insn_flags here (if it was set), so that the search
1388 below will always be able to find a compatible processor. */
1389 insn_flags &= ~FL_MODE26;
1392 if (sought != 0 && ((sought & insn_flags) != sought))
1394 /* Try to locate a CPU type that supports all of the abilities
1395 of the default CPU, plus the extra abilities requested by
1397 for (sel = all_cores; sel->name != NULL; sel++)
1398 if ((sel->flags & sought) == (sought | insn_flags))
1401 if (sel->name == NULL)
1403 unsigned current_bit_count = 0;
1404 const struct processors * best_fit = NULL;
1406 /* Ideally we would like to issue an error message here
1407 saying that it was not possible to find a CPU compatible
1408 with the default CPU, but which also supports the command
1409 line options specified by the programmer, and so they
1410 ought to use the -mcpu=<name> command line option to
1411 override the default CPU type.
1413 If we cannot find a cpu that has both the
1414 characteristics of the default cpu and the given
1415 command line options we scan the array again looking
1416 for a best match. */
1417 for (sel = all_cores; sel->name != NULL; sel++)
1418 if ((sel->flags & sought) == sought)
1422 count = bit_count (sel->flags & insn_flags);
1424 if (count >= current_bit_count)
1427 current_bit_count = count;
1431 gcc_assert (best_fit);
1435 insn_flags = sel->flags;
1437 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1438 arm_default_cpu = (enum processor_type) (sel - all_cores);
1439 if (arm_tune == arm_none)
1440 arm_tune = arm_default_cpu;
1443 /* The processor for which we should tune should now have been
1445 gcc_assert (arm_tune != arm_none);
1447 tune_flags = all_cores[(int)arm_tune].flags;
1449 if (target_fp16_format_name)
1451 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1453 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1455 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1459 if (i == ARRAY_SIZE (all_fp16_formats))
1460 error ("invalid __fp16 format option: -mfp16-format=%s",
1461 target_fp16_format_name);
1464 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1466 if (target_abi_name)
1468 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1470 if (streq (arm_all_abis[i].name, target_abi_name))
1472 arm_abi = arm_all_abis[i].abi_type;
1476 if (i == ARRAY_SIZE (arm_all_abis))
1477 error ("invalid ABI option: -mabi=%s", target_abi_name);
1480 arm_abi = ARM_DEFAULT_ABI;
1482 /* Make sure that the processor choice does not conflict with any of the
1483 other command line choices. */
1484 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1485 error ("target CPU does not support ARM mode");
1487 /* BPABI targets use linker tricks to allow interworking on cores
1488 without thumb support. */
1489 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1491 warning (0, "target CPU does not support interworking" );
1492 target_flags &= ~MASK_INTERWORK;
1495 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1497 warning (0, "target CPU does not support THUMB instructions");
1498 target_flags &= ~MASK_THUMB;
1501 if (TARGET_APCS_FRAME && TARGET_THUMB)
1503 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1504 target_flags &= ~MASK_APCS_FRAME;
1507 /* Callee super interworking implies thumb interworking. Adding
1508 this to the flags here simplifies the logic elsewhere. */
1509 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1510 target_flags |= MASK_INTERWORK;
1512 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1513 from here where no function is being compiled currently. */
1514 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1515 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1517 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1518 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1520 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1521 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1523 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1525 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1526 target_flags |= MASK_APCS_FRAME;
1529 if (TARGET_POKE_FUNCTION_NAME)
1530 target_flags |= MASK_APCS_FRAME;
1532 if (TARGET_APCS_REENT && flag_pic)
1533 error ("-fpic and -mapcs-reent are incompatible");
1535 if (TARGET_APCS_REENT)
1536 warning (0, "APCS reentrant code not supported. Ignored");
1538 /* If this target is normally configured to use APCS frames, warn if they
1539 are turned off and debugging is turned on. */
1541 && write_symbols != NO_DEBUG
1542 && !TARGET_APCS_FRAME
1543 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1544 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1546 if (TARGET_APCS_FLOAT)
1547 warning (0, "passing floating point arguments in fp regs not yet supported");
1549 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1550 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1551 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1552 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1553 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1554 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1555 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1556 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1557 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1558 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1559 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1560 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1561 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1563 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1564 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1565 thumb_code = (TARGET_ARM == 0);
1566 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1567 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1568 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1569 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1570 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1572 /* If we are not using the default (ARM mode) section anchor offset
1573 ranges, then set the correct ranges now. */
1576 /* Thumb-1 LDR instructions cannot have negative offsets.
1577 Permissible positive offset ranges are 5-bit (for byte loads),
1578 6-bit (for halfword loads), or 7-bit (for word loads).
1579 Empirical results suggest a 7-bit anchor range gives the best
1580 overall code size. */
1581 targetm.min_anchor_offset = 0;
1582 targetm.max_anchor_offset = 127;
1584 else if (TARGET_THUMB2)
1586 /* The minimum is set such that the total size of the block
1587 for a particular anchor is 248 + 1 + 4095 bytes, which is
1588 divisible by eight, ensuring natural spacing of anchors. */
1589 targetm.min_anchor_offset = -248;
1590 targetm.max_anchor_offset = 4095;
1593 /* V5 code we generate is completely interworking capable, so we turn off
1594 TARGET_INTERWORK here to avoid many tests later on. */
1596 /* XXX However, we must pass the right pre-processor defines to CPP
1597 or GLD can get confused. This is a hack. */
1598 if (TARGET_INTERWORK)
1599 arm_cpp_interwork = 1;
1602 target_flags &= ~MASK_INTERWORK;
1604 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1605 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1607 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1608 error ("iwmmxt abi requires an iwmmxt capable cpu");
1610 if (target_fpu_name == NULL && target_fpe_name != NULL)
1612 if (streq (target_fpe_name, "2"))
1613 target_fpu_name = "fpe2";
1614 else if (streq (target_fpe_name, "3"))
1615 target_fpu_name = "fpe3";
1617 error ("invalid floating point emulation option: -mfpe=%s",
1621 if (target_fpu_name == NULL)
1623 #ifdef FPUTYPE_DEFAULT
1624 target_fpu_name = FPUTYPE_DEFAULT;
1626 if (arm_arch_cirrus)
1627 target_fpu_name = "maverick";
1629 target_fpu_name = "fpe2";
1633 arm_fpu_desc = NULL;
1634 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1636 if (streq (all_fpus[i].name, target_fpu_name))
1638 arm_fpu_desc = &all_fpus[i];
1645 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1649 switch (arm_fpu_desc->model)
1651 case ARM_FP_MODEL_FPA:
1652 if (arm_fpu_desc->rev == 2)
1653 arm_fpu_attr = FPU_FPE2;
1654 else if (arm_fpu_desc->rev == 3)
1655 arm_fpu_attr = FPU_FPE3;
1657 arm_fpu_attr = FPU_FPA;
1660 case ARM_FP_MODEL_MAVERICK:
1661 arm_fpu_attr = FPU_MAVERICK;
1664 case ARM_FP_MODEL_VFP:
1665 arm_fpu_attr = FPU_VFP;
1672 if (target_float_abi_name != NULL)
1674 /* The user specified a FP ABI. */
1675 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1677 if (streq (all_float_abis[i].name, target_float_abi_name))
1679 arm_float_abi = all_float_abis[i].abi_type;
1683 if (i == ARRAY_SIZE (all_float_abis))
1684 error ("invalid floating point abi: -mfloat-abi=%s",
1685 target_float_abi_name);
1688 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1690 if (TARGET_AAPCS_BASED
1691 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1692 error ("FPA is unsupported in the AAPCS");
1694 if (TARGET_AAPCS_BASED)
1696 if (TARGET_CALLER_INTERWORKING)
1697 error ("AAPCS does not support -mcaller-super-interworking");
1699 if (TARGET_CALLEE_INTERWORKING)
1700 error ("AAPCS does not support -mcallee-super-interworking");
1703 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1704 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1705 will ever exist. GCC makes no attempt to support this combination. */
1706 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1707 sorry ("iWMMXt and hardware floating point");
1709 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1710 if (TARGET_THUMB2 && TARGET_IWMMXT)
1711 sorry ("Thumb-2 iWMMXt");
1713 /* __fp16 support currently assumes the core has ldrh. */
1714 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1715 sorry ("__fp16 and no ldrh");
1717 /* If soft-float is specified then don't use FPU. */
1718 if (TARGET_SOFT_FLOAT)
1719 arm_fpu_attr = FPU_NONE;
1721 if (TARGET_AAPCS_BASED)
1723 if (arm_abi == ARM_ABI_IWMMXT)
1724 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1725 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1726 && TARGET_HARD_FLOAT
1728 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1730 arm_pcs_default = ARM_PCS_AAPCS;
1734 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1735 sorry ("-mfloat-abi=hard and VFP");
1737 if (arm_abi == ARM_ABI_APCS)
1738 arm_pcs_default = ARM_PCS_APCS;
1740 arm_pcs_default = ARM_PCS_ATPCS;
1743 /* For arm2/3 there is no need to do any scheduling if there is only
1744 a floating point emulator, or we are doing software floating-point. */
1745 if ((TARGET_SOFT_FLOAT
1746 || (TARGET_FPA && arm_fpu_desc->rev))
1747 && (tune_flags & FL_MODE32) == 0)
1748 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1750 if (target_thread_switch)
1752 if (strcmp (target_thread_switch, "soft") == 0)
1753 target_thread_pointer = TP_SOFT;
1754 else if (strcmp (target_thread_switch, "auto") == 0)
1755 target_thread_pointer = TP_AUTO;
1756 else if (strcmp (target_thread_switch, "cp15") == 0)
1757 target_thread_pointer = TP_CP15;
1759 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1762 /* Use the cp15 method if it is available. */
1763 if (target_thread_pointer == TP_AUTO)
1765 if (arm_arch6k && !TARGET_THUMB1)
1766 target_thread_pointer = TP_CP15;
1768 target_thread_pointer = TP_SOFT;
1771 if (TARGET_HARD_TP && TARGET_THUMB1)
1772 error ("can not use -mtp=cp15 with 16-bit Thumb");
1774 /* Override the default structure alignment for AAPCS ABI. */
1775 if (TARGET_AAPCS_BASED)
1776 arm_structure_size_boundary = 8;
1778 if (structure_size_string != NULL)
1780 int size = strtol (structure_size_string, NULL, 0);
1782 if (size == 8 || size == 32
1783 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1784 arm_structure_size_boundary = size;
1786 warning (0, "structure size boundary can only be set to %s",
1787 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1790 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1792 error ("RTP PIC is incompatible with Thumb");
1796 /* If stack checking is disabled, we can use r10 as the PIC register,
1797 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1798 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1800 if (TARGET_VXWORKS_RTP)
1801 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1802 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1805 if (flag_pic && TARGET_VXWORKS_RTP)
1806 arm_pic_register = 9;
1808 if (arm_pic_register_string != NULL)
1810 int pic_register = decode_reg_name (arm_pic_register_string);
1813 warning (0, "-mpic-register= is useless without -fpic");
1815 /* Prevent the user from choosing an obviously stupid PIC register. */
1816 else if (pic_register < 0 || call_used_regs[pic_register]
1817 || pic_register == HARD_FRAME_POINTER_REGNUM
1818 || pic_register == STACK_POINTER_REGNUM
1819 || pic_register >= PC_REGNUM
1820 || (TARGET_VXWORKS_RTP
1821 && (unsigned int) pic_register != arm_pic_register))
1822 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1824 arm_pic_register = pic_register;
1827 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1828 if (fix_cm3_ldrd == 2)
1830 if (selected_cpu == cortexm3)
1836 if (TARGET_THUMB1 && flag_schedule_insns)
1838 /* Don't warn since it's on by default in -O2. */
1839 flag_schedule_insns = 0;
1844 arm_constant_limit = 1;
1846 /* If optimizing for size, bump the number of instructions that we
1847 are prepared to conditionally execute (even on a StrongARM). */
1848 max_insns_skipped = 6;
1852 /* For processors with load scheduling, it never costs more than
1853 2 cycles to load a constant, and the load scheduler may well
1854 reduce that to 1. */
1856 arm_constant_limit = 1;
1858 /* On XScale the longer latency of a load makes it more difficult
1859 to achieve a good schedule, so it's faster to synthesize
1860 constants that can be done in two insns. */
1861 if (arm_tune_xscale)
1862 arm_constant_limit = 2;
1864 /* StrongARM has early execution of branches, so a sequence
1865 that is worth skipping is shorter. */
1866 if (arm_tune_strongarm)
1867 max_insns_skipped = 3;
1870 /* Hot/Cold partitioning is not currently supported, since we can't
1871 handle literal pool placement in that case. */
1872 if (flag_reorder_blocks_and_partition)
1874 inform (input_location,
1875 "-freorder-blocks-and-partition not supported on this architecture");
1876 flag_reorder_blocks_and_partition = 0;
1877 flag_reorder_blocks = 1;
1880 /* Register global variables with the garbage collector. */
1881 arm_add_gc_roots ();
1885 arm_add_gc_roots (void)
1887 gcc_obstack_init(&minipool_obstack);
1888 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1891 /* A table of known ARM exception types.
1892 For use with the interrupt function attribute. */
1896 const char *const arg;
1897 const unsigned long return_value;
1901 static const isr_attribute_arg isr_attribute_args [] =
1903 { "IRQ", ARM_FT_ISR },
1904 { "irq", ARM_FT_ISR },
1905 { "FIQ", ARM_FT_FIQ },
1906 { "fiq", ARM_FT_FIQ },
1907 { "ABORT", ARM_FT_ISR },
1908 { "abort", ARM_FT_ISR },
1909 { "ABORT", ARM_FT_ISR },
1910 { "abort", ARM_FT_ISR },
1911 { "UNDEF", ARM_FT_EXCEPTION },
1912 { "undef", ARM_FT_EXCEPTION },
1913 { "SWI", ARM_FT_EXCEPTION },
1914 { "swi", ARM_FT_EXCEPTION },
1915 { NULL, ARM_FT_NORMAL }
1918 /* Returns the (interrupt) function type of the current
1919 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1921 static unsigned long
1922 arm_isr_value (tree argument)
1924 const isr_attribute_arg * ptr;
1928 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1930 /* No argument - default to IRQ. */
1931 if (argument == NULL_TREE)
1934 /* Get the value of the argument. */
1935 if (TREE_VALUE (argument) == NULL_TREE
1936 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1937 return ARM_FT_UNKNOWN;
1939 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1941 /* Check it against the list of known arguments. */
1942 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1943 if (streq (arg, ptr->arg))
1944 return ptr->return_value;
1946 /* An unrecognized interrupt type. */
1947 return ARM_FT_UNKNOWN;
1950 /* Computes the type of the current function. */
1952 static unsigned long
1953 arm_compute_func_type (void)
1955 unsigned long type = ARM_FT_UNKNOWN;
1959 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1961 /* Decide if the current function is volatile. Such functions
1962 never return, and many memory cycles can be saved by not storing
1963 register values that will never be needed again. This optimization
1964 was added to speed up context switching in a kernel application. */
1966 && (TREE_NOTHROW (current_function_decl)
1967 || !(flag_unwind_tables
1968 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1969 && TREE_THIS_VOLATILE (current_function_decl))
1970 type |= ARM_FT_VOLATILE;
1972 if (cfun->static_chain_decl != NULL)
1973 type |= ARM_FT_NESTED;
1975 attr = DECL_ATTRIBUTES (current_function_decl);
1977 a = lookup_attribute ("naked", attr);
1979 type |= ARM_FT_NAKED;
1981 a = lookup_attribute ("isr", attr);
1983 a = lookup_attribute ("interrupt", attr);
1986 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1988 type |= arm_isr_value (TREE_VALUE (a));
1993 /* Returns the type of the current function. */
1996 arm_current_func_type (void)
1998 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1999 cfun->machine->func_type = arm_compute_func_type ();
2001 return cfun->machine->func_type;
2005 arm_allocate_stack_slots_for_args (void)
2007 /* Naked functions should not allocate stack slots for arguments. */
2008 return !IS_NAKED (arm_current_func_type ());
2012 /* Output assembler code for a block containing the constant parts
2013 of a trampoline, leaving space for the variable parts.
2015 On the ARM, (if r8 is the static chain regnum, and remembering that
2016 referencing pc adds an offset of 8) the trampoline looks like:
2019 .word static chain value
2020 .word function's address
2021 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2024 arm_asm_trampoline_template (FILE *f)
2028 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2029 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2031 else if (TARGET_THUMB2)
2033 /* The Thumb-2 trampoline is similar to the arm implementation.
2034 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2035 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2036 STATIC_CHAIN_REGNUM, PC_REGNUM);
2037 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2041 ASM_OUTPUT_ALIGN (f, 2);
2042 fprintf (f, "\t.code\t16\n");
2043 fprintf (f, ".Ltrampoline_start:\n");
2044 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2045 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2046 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2047 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2048 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2049 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2051 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2052 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2055 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2058 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2060 rtx fnaddr, mem, a_tramp;
2062 emit_block_move (m_tramp, assemble_trampoline_template (),
2063 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2065 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2066 emit_move_insn (mem, chain_value);
2068 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2069 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2070 emit_move_insn (mem, fnaddr);
2072 a_tramp = XEXP (m_tramp, 0);
2073 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2074 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2075 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2078 /* Thumb trampolines should be entered in thumb mode, so set
2079 the bottom bit of the address. */
2082 arm_trampoline_adjust_address (rtx addr)
2085 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2086 NULL, 0, OPTAB_LIB_WIDEN);
2090 /* Return 1 if it is possible to return using a single instruction.
2091 If SIBLING is non-null, this is a test for a return before a sibling
2092 call. SIBLING is the call insn, so we can examine its register usage. */
2095 use_return_insn (int iscond, rtx sibling)
2098 unsigned int func_type;
2099 unsigned long saved_int_regs;
2100 unsigned HOST_WIDE_INT stack_adjust;
2101 arm_stack_offsets *offsets;
2103 /* Never use a return instruction before reload has run. */
2104 if (!reload_completed)
2107 func_type = arm_current_func_type ();
2109 /* Naked, volatile and stack alignment functions need special
2111 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2114 /* So do interrupt functions that use the frame pointer and Thumb
2115 interrupt functions. */
2116 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2119 offsets = arm_get_frame_offsets ();
2120 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2122 /* As do variadic functions. */
2123 if (crtl->args.pretend_args_size
2124 || cfun->machine->uses_anonymous_args
2125 /* Or if the function calls __builtin_eh_return () */
2126 || crtl->calls_eh_return
2127 /* Or if the function calls alloca */
2128 || cfun->calls_alloca
2129 /* Or if there is a stack adjustment. However, if the stack pointer
2130 is saved on the stack, we can use a pre-incrementing stack load. */
2131 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2132 && stack_adjust == 4)))
2135 saved_int_regs = offsets->saved_regs_mask;
2137 /* Unfortunately, the insn
2139 ldmib sp, {..., sp, ...}
2141 triggers a bug on most SA-110 based devices, such that the stack
2142 pointer won't be correctly restored if the instruction takes a
2143 page fault. We work around this problem by popping r3 along with
2144 the other registers, since that is never slower than executing
2145 another instruction.
2147 We test for !arm_arch5 here, because code for any architecture
2148 less than this could potentially be run on one of the buggy
2150 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2152 /* Validate that r3 is a call-clobbered register (always true in
2153 the default abi) ... */
2154 if (!call_used_regs[3])
2157 /* ... that it isn't being used for a return value ... */
2158 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2161 /* ... or for a tail-call argument ... */
2164 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2166 if (find_regno_fusage (sibling, USE, 3))
2170 /* ... and that there are no call-saved registers in r0-r2
2171 (always true in the default ABI). */
2172 if (saved_int_regs & 0x7)
2176 /* Can't be done if interworking with Thumb, and any registers have been
2178 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2181 /* On StrongARM, conditional returns are expensive if they aren't
2182 taken and multiple registers have been stacked. */
2183 if (iscond && arm_tune_strongarm)
2185 /* Conditional return when just the LR is stored is a simple
2186 conditional-load instruction, that's not expensive. */
2187 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2191 && arm_pic_register != INVALID_REGNUM
2192 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2196 /* If there are saved registers but the LR isn't saved, then we need
2197 two instructions for the return. */
2198 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2201 /* Can't be done if any of the FPA regs are pushed,
2202 since this also requires an insn. */
2203 if (TARGET_HARD_FLOAT && TARGET_FPA)
2204 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2205 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2208 /* Likewise VFP regs. */
2209 if (TARGET_HARD_FLOAT && TARGET_VFP)
2210 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2211 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2214 if (TARGET_REALLY_IWMMXT)
2215 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2216 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2222 /* Return TRUE if int I is a valid immediate ARM constant. */
2225 const_ok_for_arm (HOST_WIDE_INT i)
2229 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2230 be all zero, or all one. */
2231 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2232 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2233 != ((~(unsigned HOST_WIDE_INT) 0)
2234 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2237 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2239 /* Fast return for 0 and small values. We must do this for zero, since
2240 the code below can't handle that one case. */
2241 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2244 /* Get the number of trailing zeros. */
2245 lowbit = ffs((int) i) - 1;
2247 /* Only even shifts are allowed in ARM mode so round down to the
2248 nearest even number. */
2252 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2257 /* Allow rotated constants in ARM mode. */
2259 && ((i & ~0xc000003f) == 0
2260 || (i & ~0xf000000f) == 0
2261 || (i & ~0xfc000003) == 0))
2268 /* Allow repeated pattern. */
2271 if (i == v || i == (v | (v << 8)))
2278 /* Return true if I is a valid constant for the operation CODE. */
2280 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2282 if (const_ok_for_arm (i))
2306 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2308 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2314 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2318 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2325 /* Emit a sequence of insns to handle a large constant.
2326 CODE is the code of the operation required, it can be any of SET, PLUS,
2327 IOR, AND, XOR, MINUS;
2328 MODE is the mode in which the operation is being performed;
2329 VAL is the integer to operate on;
2330 SOURCE is the other operand (a register, or a null-pointer for SET);
2331 SUBTARGETS means it is safe to create scratch registers if that will
2332 either produce a simpler sequence, or we will want to cse the values.
2333 Return value is the number of insns emitted. */
2335 /* ??? Tweak this for thumb2. */
2337 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2338 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2342 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2343 cond = COND_EXEC_TEST (PATTERN (insn));
2347 if (subtargets || code == SET
2348 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2349 && REGNO (target) != REGNO (source)))
2351 /* After arm_reorg has been called, we can't fix up expensive
2352 constants by pushing them into memory so we must synthesize
2353 them in-line, regardless of the cost. This is only likely to
2354 be more costly on chips that have load delay slots and we are
2355 compiling without running the scheduler (so no splitting
2356 occurred before the final instruction emission).
2358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2360 if (!after_arm_reorg
2362 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2364 > arm_constant_limit + (code != SET)))
2368 /* Currently SET is the only monadic value for CODE, all
2369 the rest are diadic. */
2370 if (TARGET_USE_MOVT)
2371 arm_emit_movpair (target, GEN_INT (val));
2373 emit_set_insn (target, GEN_INT (val));
2379 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2381 if (TARGET_USE_MOVT)
2382 arm_emit_movpair (temp, GEN_INT (val));
2384 emit_set_insn (temp, GEN_INT (val));
2386 /* For MINUS, the value is subtracted from, since we never
2387 have subtraction of a constant. */
2389 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2391 emit_set_insn (target,
2392 gen_rtx_fmt_ee (code, mode, source, temp));
2398 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2402 /* Return the number of instructions required to synthesize the given
2403 constant, if we start emitting them from bit-position I. */
2405 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2407 HOST_WIDE_INT temp1;
2408 int step_size = TARGET_ARM ? 2 : 1;
2411 gcc_assert (TARGET_ARM || i == 0);
2419 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2424 temp1 = remainder & ((0x0ff << end)
2425 | ((i < end) ? (0xff >> (32 - end)) : 0));
2426 remainder &= ~temp1;
2431 } while (remainder);
2436 find_best_start (unsigned HOST_WIDE_INT remainder)
2438 int best_consecutive_zeros = 0;
2442 /* If we aren't targetting ARM, the best place to start is always at
2447 for (i = 0; i < 32; i += 2)
2449 int consecutive_zeros = 0;
2451 if (!(remainder & (3 << i)))
2453 while ((i < 32) && !(remainder & (3 << i)))
2455 consecutive_zeros += 2;
2458 if (consecutive_zeros > best_consecutive_zeros)
2460 best_consecutive_zeros = consecutive_zeros;
2461 best_start = i - consecutive_zeros;
2467 /* So long as it won't require any more insns to do so, it's
2468 desirable to emit a small constant (in bits 0...9) in the last
2469 insn. This way there is more chance that it can be combined with
2470 a later addressing insn to form a pre-indexed load or store
2471 operation. Consider:
2473 *((volatile int *)0xe0000100) = 1;
2474 *((volatile int *)0xe0000110) = 2;
2476 We want this to wind up as:
2480 str rB, [rA, #0x100]
2482 str rB, [rA, #0x110]
2484 rather than having to synthesize both large constants from scratch.
2486 Therefore, we calculate how many insns would be required to emit
2487 the constant starting from `best_start', and also starting from
2488 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2489 yield a shorter sequence, we may as well use zero. */
2491 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2492 && (count_insns_for_constant (remainder, 0) <=
2493 count_insns_for_constant (remainder, best_start)))
2499 /* Emit an instruction with the indicated PATTERN. If COND is
2500 non-NULL, conditionalize the execution of the instruction on COND
2504 emit_constant_insn (rtx cond, rtx pattern)
2507 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2508 emit_insn (pattern);
2511 /* As above, but extra parameter GENERATE which, if clear, suppresses
2513 /* ??? This needs more work for thumb2. */
2516 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2517 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2522 int final_invert = 0;
2523 int can_negate_initial = 0;
2526 int num_bits_set = 0;
2527 int set_sign_bit_copies = 0;
2528 int clear_sign_bit_copies = 0;
2529 int clear_zero_bit_copies = 0;
2530 int set_zero_bit_copies = 0;
2532 unsigned HOST_WIDE_INT temp1, temp2;
2533 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2534 int step_size = TARGET_ARM ? 2 : 1;
2536 /* Find out which operations are safe for a given CODE. Also do a quick
2537 check for degenerate cases; these can occur when DImode operations
2549 can_negate_initial = 1;
2553 if (remainder == 0xffffffff)
2556 emit_constant_insn (cond,
2557 gen_rtx_SET (VOIDmode, target,
2558 GEN_INT (ARM_SIGN_EXTEND (val))));
2564 if (reload_completed && rtx_equal_p (target, source))
2568 emit_constant_insn (cond,
2569 gen_rtx_SET (VOIDmode, target, source));
2581 emit_constant_insn (cond,
2582 gen_rtx_SET (VOIDmode, target, const0_rtx));
2585 if (remainder == 0xffffffff)
2587 if (reload_completed && rtx_equal_p (target, source))
2590 emit_constant_insn (cond,
2591 gen_rtx_SET (VOIDmode, target, source));
2600 if (reload_completed && rtx_equal_p (target, source))
2603 emit_constant_insn (cond,
2604 gen_rtx_SET (VOIDmode, target, source));
2608 if (remainder == 0xffffffff)
2611 emit_constant_insn (cond,
2612 gen_rtx_SET (VOIDmode, target,
2613 gen_rtx_NOT (mode, source)));
2619 /* We treat MINUS as (val - source), since (source - val) is always
2620 passed as (source + (-val)). */
2624 emit_constant_insn (cond,
2625 gen_rtx_SET (VOIDmode, target,
2626 gen_rtx_NEG (mode, source)));
2629 if (const_ok_for_arm (val))
2632 emit_constant_insn (cond,
2633 gen_rtx_SET (VOIDmode, target,
2634 gen_rtx_MINUS (mode, GEN_INT (val),
2646 /* If we can do it in one insn get out quickly. */
2647 if (const_ok_for_arm (val)
2648 || (can_negate_initial && const_ok_for_arm (-val))
2649 || (can_invert && const_ok_for_arm (~val)))
2652 emit_constant_insn (cond,
2653 gen_rtx_SET (VOIDmode, target,
2655 ? gen_rtx_fmt_ee (code, mode, source,
2661 /* Calculate a few attributes that may be useful for specific
2663 /* Count number of leading zeros. */
2664 for (i = 31; i >= 0; i--)
2666 if ((remainder & (1 << i)) == 0)
2667 clear_sign_bit_copies++;
2672 /* Count number of leading 1's. */
2673 for (i = 31; i >= 0; i--)
2675 if ((remainder & (1 << i)) != 0)
2676 set_sign_bit_copies++;
2681 /* Count number of trailing zero's. */
2682 for (i = 0; i <= 31; i++)
2684 if ((remainder & (1 << i)) == 0)
2685 clear_zero_bit_copies++;
2690 /* Count number of trailing 1's. */
2691 for (i = 0; i <= 31; i++)
2693 if ((remainder & (1 << i)) != 0)
2694 set_zero_bit_copies++;
2702 /* See if we can use movw. */
2703 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2706 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2711 /* See if we can do this by sign_extending a constant that is known
2712 to be negative. This is a good, way of doing it, since the shift
2713 may well merge into a subsequent insn. */
2714 if (set_sign_bit_copies > 1)
2716 if (const_ok_for_arm
2717 (temp1 = ARM_SIGN_EXTEND (remainder
2718 << (set_sign_bit_copies - 1))))
2722 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2723 emit_constant_insn (cond,
2724 gen_rtx_SET (VOIDmode, new_src,
2726 emit_constant_insn (cond,
2727 gen_ashrsi3 (target, new_src,
2728 GEN_INT (set_sign_bit_copies - 1)));
2732 /* For an inverted constant, we will need to set the low bits,
2733 these will be shifted out of harm's way. */
2734 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2735 if (const_ok_for_arm (~temp1))
2739 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2740 emit_constant_insn (cond,
2741 gen_rtx_SET (VOIDmode, new_src,
2743 emit_constant_insn (cond,
2744 gen_ashrsi3 (target, new_src,
2745 GEN_INT (set_sign_bit_copies - 1)));
2751 /* See if we can calculate the value as the difference between two
2752 valid immediates. */
2753 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2755 int topshift = clear_sign_bit_copies & ~1;
2757 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2758 & (0xff000000 >> topshift));
2760 /* If temp1 is zero, then that means the 9 most significant
2761 bits of remainder were 1 and we've caused it to overflow.
2762 When topshift is 0 we don't need to do anything since we
2763 can borrow from 'bit 32'. */
2764 if (temp1 == 0 && topshift != 0)
2765 temp1 = 0x80000000 >> (topshift - 1);
2767 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2769 if (const_ok_for_arm (temp2))
2773 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2774 emit_constant_insn (cond,
2775 gen_rtx_SET (VOIDmode, new_src,
2777 emit_constant_insn (cond,
2778 gen_addsi3 (target, new_src,
2786 /* See if we can generate this by setting the bottom (or the top)
2787 16 bits, and then shifting these into the other half of the
2788 word. We only look for the simplest cases, to do more would cost
2789 too much. Be careful, however, not to generate this when the
2790 alternative would take fewer insns. */
2791 if (val & 0xffff0000)
2793 temp1 = remainder & 0xffff0000;
2794 temp2 = remainder & 0x0000ffff;
2796 /* Overlaps outside this range are best done using other methods. */
2797 for (i = 9; i < 24; i++)
2799 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2800 && !const_ok_for_arm (temp2))
2802 rtx new_src = (subtargets
2803 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2805 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2806 source, subtargets, generate);
2814 gen_rtx_ASHIFT (mode, source,
2821 /* Don't duplicate cases already considered. */
2822 for (i = 17; i < 24; i++)
2824 if (((temp1 | (temp1 >> i)) == remainder)
2825 && !const_ok_for_arm (temp1))
2827 rtx new_src = (subtargets
2828 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2830 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2831 source, subtargets, generate);
2836 gen_rtx_SET (VOIDmode, target,
2839 gen_rtx_LSHIFTRT (mode, source,
2850 /* If we have IOR or XOR, and the constant can be loaded in a
2851 single instruction, and we can find a temporary to put it in,
2852 then this can be done in two instructions instead of 3-4. */
2854 /* TARGET can't be NULL if SUBTARGETS is 0 */
2855 || (reload_completed && !reg_mentioned_p (target, source)))
2857 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2861 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2863 emit_constant_insn (cond,
2864 gen_rtx_SET (VOIDmode, sub,
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, target,
2868 gen_rtx_fmt_ee (code, mode,
2879 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2880 and the remainder 0s for e.g. 0xfff00000)
2881 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2883 This can be done in 2 instructions by using shifts with mov or mvn.
2888 mvn r0, r0, lsr #12 */
2889 if (set_sign_bit_copies > 8
2890 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2894 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2895 rtx shift = GEN_INT (set_sign_bit_copies);
2899 gen_rtx_SET (VOIDmode, sub,
2901 gen_rtx_ASHIFT (mode,
2906 gen_rtx_SET (VOIDmode, target,
2908 gen_rtx_LSHIFTRT (mode, sub,
2915 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2917 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2919 For eg. r0 = r0 | 0xfff
2924 if (set_zero_bit_copies > 8
2925 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2929 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2930 rtx shift = GEN_INT (set_zero_bit_copies);
2934 gen_rtx_SET (VOIDmode, sub,
2936 gen_rtx_LSHIFTRT (mode,
2941 gen_rtx_SET (VOIDmode, target,
2943 gen_rtx_ASHIFT (mode, sub,
2949 /* This will never be reached for Thumb2 because orn is a valid
2950 instruction. This is for Thumb1 and the ARM 32 bit cases.
2952 x = y | constant (such that ~constant is a valid constant)
2954 x = ~(~y & ~constant).
2956 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2960 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2961 emit_constant_insn (cond,
2962 gen_rtx_SET (VOIDmode, sub,
2963 gen_rtx_NOT (mode, source)));
2966 sub = gen_reg_rtx (mode);
2967 emit_constant_insn (cond,
2968 gen_rtx_SET (VOIDmode, sub,
2969 gen_rtx_AND (mode, source,
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, target,
2973 gen_rtx_NOT (mode, sub)));
2980 /* See if two shifts will do 2 or more insn's worth of work. */
2981 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2983 HOST_WIDE_INT shift_mask = ((0xffffffff
2984 << (32 - clear_sign_bit_copies))
2987 if ((remainder | shift_mask) != 0xffffffff)
2991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2992 insns = arm_gen_constant (AND, mode, cond,
2993 remainder | shift_mask,
2994 new_src, source, subtargets, 1);
2999 rtx targ = subtargets ? NULL_RTX : target;
3000 insns = arm_gen_constant (AND, mode, cond,
3001 remainder | shift_mask,
3002 targ, source, subtargets, 0);
3008 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3009 rtx shift = GEN_INT (clear_sign_bit_copies);
3011 emit_insn (gen_ashlsi3 (new_src, source, shift));
3012 emit_insn (gen_lshrsi3 (target, new_src, shift));
3018 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3020 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3022 if ((remainder | shift_mask) != 0xffffffff)
3026 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3028 insns = arm_gen_constant (AND, mode, cond,
3029 remainder | shift_mask,
3030 new_src, source, subtargets, 1);
3035 rtx targ = subtargets ? NULL_RTX : target;
3037 insns = arm_gen_constant (AND, mode, cond,
3038 remainder | shift_mask,
3039 targ, source, subtargets, 0);
3045 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3046 rtx shift = GEN_INT (clear_zero_bit_copies);
3048 emit_insn (gen_lshrsi3 (new_src, source, shift));
3049 emit_insn (gen_ashlsi3 (target, new_src, shift));
3061 for (i = 0; i < 32; i++)
3062 if (remainder & (1 << i))
3066 || (code != IOR && can_invert && num_bits_set > 16))
3067 remainder ^= 0xffffffff;
3068 else if (code == PLUS && num_bits_set > 16)
3069 remainder = (-remainder) & 0xffffffff;
3071 /* For XOR, if more than half the bits are set and there's a sequence
3072 of more than 8 consecutive ones in the pattern then we can XOR by the
3073 inverted constant and then invert the final result; this may save an
3074 instruction and might also lead to the final mvn being merged with
3075 some other operation. */
3076 else if (code == XOR && num_bits_set > 16
3077 && (count_insns_for_constant (remainder ^ 0xffffffff,
3079 (remainder ^ 0xffffffff))
3080 < count_insns_for_constant (remainder,
3081 find_best_start (remainder))))
3083 remainder ^= 0xffffffff;
3092 /* Now try and find a way of doing the job in either two or three
3094 We start by looking for the largest block of zeros that are aligned on
3095 a 2-bit boundary, we then fill up the temps, wrapping around to the
3096 top of the word when we drop off the bottom.
3097 In the worst case this code should produce no more than four insns.
3098 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3099 best place to start. */
3101 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3104 /* Now start emitting the insns. */
3105 i = find_best_start (remainder);
3112 if (remainder & (3 << (i - 2)))
3117 temp1 = remainder & ((0x0ff << end)
3118 | ((i < end) ? (0xff >> (32 - end)) : 0));
3119 remainder &= ~temp1;
3123 rtx new_src, temp1_rtx;
3125 if (code == SET || code == MINUS)
3127 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3128 if (can_invert && code != MINUS)
3133 if ((final_invert || remainder) && subtargets)
3134 new_src = gen_reg_rtx (mode);
3139 else if (can_negate)
3143 temp1 = trunc_int_for_mode (temp1, mode);
3144 temp1_rtx = GEN_INT (temp1);
3148 else if (code == MINUS)
3149 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3151 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3153 emit_constant_insn (cond,
3154 gen_rtx_SET (VOIDmode, new_src,
3164 else if (code == MINUS)
3170 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3180 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3181 gen_rtx_NOT (mode, source)));
3188 /* Canonicalize a comparison so that we are more likely to recognize it.
3189 This can be done for a few constant compares, where we can make the
3190 immediate value easier to load. */
3193 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3196 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3197 unsigned HOST_WIDE_INT maxval;
3198 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3209 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3211 *op1 = GEN_INT (i + 1);
3212 return code == GT ? GE : LT;
3219 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3221 *op1 = GEN_INT (i - 1);
3222 return code == GE ? GT : LE;
3228 if (i != ~((unsigned HOST_WIDE_INT) 0)
3229 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3231 *op1 = GEN_INT (i + 1);
3232 return code == GTU ? GEU : LTU;
3239 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3241 *op1 = GEN_INT (i - 1);
3242 return code == GEU ? GTU : LEU;
3254 /* Define how to find the value returned by a function. */
3257 arm_function_value(const_tree type, const_tree func,
3258 bool outgoing ATTRIBUTE_UNUSED)
3260 enum machine_mode mode;
3261 int unsignedp ATTRIBUTE_UNUSED;
3262 rtx r ATTRIBUTE_UNUSED;
3264 mode = TYPE_MODE (type);
3266 if (TARGET_AAPCS_BASED)
3267 return aapcs_allocate_return_reg (mode, type, func);
3269 /* Promote integer types. */
3270 if (INTEGRAL_TYPE_P (type))
3271 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3273 /* Promotes small structs returned in a register to full-word size
3274 for big-endian AAPCS. */
3275 if (arm_return_in_msb (type))
3277 HOST_WIDE_INT size = int_size_in_bytes (type);
3278 if (size % UNITS_PER_WORD != 0)
3280 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3281 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3285 return LIBCALL_VALUE (mode);
3289 libcall_eq (const void *p1, const void *p2)
3291 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3295 libcall_hash (const void *p1)
3297 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3301 add_libcall (htab_t htab, rtx libcall)
3303 *htab_find_slot (htab, libcall, INSERT) = libcall;
3307 arm_libcall_uses_aapcs_base (const_rtx libcall)
3309 static bool init_done = false;
3310 static htab_t libcall_htab;
3316 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3318 add_libcall (libcall_htab,
3319 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3320 add_libcall (libcall_htab,
3321 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3322 add_libcall (libcall_htab,
3323 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3324 add_libcall (libcall_htab,
3325 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3327 add_libcall (libcall_htab,
3328 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3329 add_libcall (libcall_htab,
3330 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3331 add_libcall (libcall_htab,
3332 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3333 add_libcall (libcall_htab,
3334 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3336 add_libcall (libcall_htab,
3337 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3338 add_libcall (libcall_htab,
3339 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3340 add_libcall (libcall_htab,
3341 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3342 add_libcall (libcall_htab,
3343 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3344 add_libcall (libcall_htab,
3345 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3346 add_libcall (libcall_htab,
3347 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3350 return libcall && htab_find (libcall_htab, libcall) != NULL;
3354 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3356 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3357 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3359 /* The following libcalls return their result in integer registers,
3360 even though they return a floating point value. */
3361 if (arm_libcall_uses_aapcs_base (libcall))
3362 return gen_rtx_REG (mode, ARG_REGISTER(1));
3366 return LIBCALL_VALUE (mode);
3369 /* Determine the amount of memory needed to store the possible return
3370 registers of an untyped call. */
3372 arm_apply_result_size (void)
3378 if (TARGET_HARD_FLOAT_ABI)
3384 if (TARGET_MAVERICK)
3387 if (TARGET_IWMMXT_ABI)
3394 /* Decide whether TYPE should be returned in memory (true)
3395 or in a register (false). FNTYPE is the type of the function making
3398 arm_return_in_memory (const_tree type, const_tree fntype)
3402 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3404 if (TARGET_AAPCS_BASED)
3406 /* Simple, non-aggregate types (ie not including vectors and
3407 complex) are always returned in a register (or registers).
3408 We don't care about which register here, so we can short-cut
3409 some of the detail. */
3410 if (!AGGREGATE_TYPE_P (type)
3411 && TREE_CODE (type) != VECTOR_TYPE
3412 && TREE_CODE (type) != COMPLEX_TYPE)
3415 /* Any return value that is no larger than one word can be
3417 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3420 /* Check any available co-processors to see if they accept the
3421 type as a register candidate (VFP, for example, can return
3422 some aggregates in consecutive registers). These aren't
3423 available if the call is variadic. */
3424 if (aapcs_select_return_coproc (type, fntype) >= 0)
3427 /* Vector values should be returned using ARM registers, not
3428 memory (unless they're over 16 bytes, which will break since
3429 we only have four call-clobbered registers to play with). */
3430 if (TREE_CODE (type) == VECTOR_TYPE)
3431 return (size < 0 || size > (4 * UNITS_PER_WORD));
3433 /* The rest go in memory. */
3437 if (TREE_CODE (type) == VECTOR_TYPE)
3438 return (size < 0 || size > (4 * UNITS_PER_WORD));
3440 if (!AGGREGATE_TYPE_P (type) &&
3441 (TREE_CODE (type) != VECTOR_TYPE))
3442 /* All simple types are returned in registers. */
3445 if (arm_abi != ARM_ABI_APCS)
3447 /* ATPCS and later return aggregate types in memory only if they are
3448 larger than a word (or are variable size). */
3449 return (size < 0 || size > UNITS_PER_WORD);
3452 /* For the arm-wince targets we choose to be compatible with Microsoft's
3453 ARM and Thumb compilers, which always return aggregates in memory. */
3455 /* All structures/unions bigger than one word are returned in memory.
3456 Also catch the case where int_size_in_bytes returns -1. In this case
3457 the aggregate is either huge or of variable size, and in either case
3458 we will want to return it via memory and not in a register. */
3459 if (size < 0 || size > UNITS_PER_WORD)
3462 if (TREE_CODE (type) == RECORD_TYPE)
3466 /* For a struct the APCS says that we only return in a register
3467 if the type is 'integer like' and every addressable element
3468 has an offset of zero. For practical purposes this means
3469 that the structure can have at most one non bit-field element
3470 and that this element must be the first one in the structure. */
3472 /* Find the first field, ignoring non FIELD_DECL things which will
3473 have been created by C++. */
3474 for (field = TYPE_FIELDS (type);
3475 field && TREE_CODE (field) != FIELD_DECL;
3476 field = TREE_CHAIN (field))
3480 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3482 /* Check that the first field is valid for returning in a register. */
3484 /* ... Floats are not allowed */
3485 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3488 /* ... Aggregates that are not themselves valid for returning in
3489 a register are not allowed. */
3490 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3493 /* Now check the remaining fields, if any. Only bitfields are allowed,
3494 since they are not addressable. */
3495 for (field = TREE_CHAIN (field);
3497 field = TREE_CHAIN (field))
3499 if (TREE_CODE (field) != FIELD_DECL)
3502 if (!DECL_BIT_FIELD_TYPE (field))
3509 if (TREE_CODE (type) == UNION_TYPE)
3513 /* Unions can be returned in registers if every element is
3514 integral, or can be returned in an integer register. */
3515 for (field = TYPE_FIELDS (type);
3517 field = TREE_CHAIN (field))
3519 if (TREE_CODE (field) != FIELD_DECL)
3522 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3525 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3531 #endif /* not ARM_WINCE */
3533 /* Return all other types in memory. */
3537 /* Indicate whether or not words of a double are in big-endian order. */
3540 arm_float_words_big_endian (void)
3542 if (TARGET_MAVERICK)
3545 /* For FPA, float words are always big-endian. For VFP, floats words
3546 follow the memory system mode. */
3554 return (TARGET_BIG_END ? 1 : 0);
3559 const struct pcs_attribute_arg
3563 } pcs_attribute_args[] =
3565 {"aapcs", ARM_PCS_AAPCS},
3566 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3568 /* We could recognize these, but changes would be needed elsewhere
3569 * to implement them. */
3570 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3571 {"atpcs", ARM_PCS_ATPCS},
3572 {"apcs", ARM_PCS_APCS},
3574 {NULL, ARM_PCS_UNKNOWN}
3578 arm_pcs_from_attribute (tree attr)
3580 const struct pcs_attribute_arg *ptr;
3583 /* Get the value of the argument. */
3584 if (TREE_VALUE (attr) == NULL_TREE
3585 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3586 return ARM_PCS_UNKNOWN;
3588 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3590 /* Check it against the list of known arguments. */
3591 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3592 if (streq (arg, ptr->arg))
3595 /* An unrecognized interrupt type. */
3596 return ARM_PCS_UNKNOWN;
3599 /* Get the PCS variant to use for this call. TYPE is the function's type
3600 specification, DECL is the specific declartion. DECL may be null if
3601 the call could be indirect or if this is a library call. */
3603 arm_get_pcs_model (const_tree type, const_tree decl)
3605 bool user_convention = false;
3606 enum arm_pcs user_pcs = arm_pcs_default;
3611 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3614 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3615 user_convention = true;
3618 if (TARGET_AAPCS_BASED)
3620 /* Detect varargs functions. These always use the base rules
3621 (no argument is ever a candidate for a co-processor
3623 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3624 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3625 != void_type_node));
3627 if (user_convention)
3629 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3630 sorry ("Non-AAPCS derived PCS variant");
3631 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3632 error ("Variadic functions must use the base AAPCS variant");
3636 return ARM_PCS_AAPCS;
3637 else if (user_convention)
3639 else if (decl && flag_unit_at_a_time)
3641 /* Local functions never leak outside this compilation unit,
3642 so we are free to use whatever conventions are
3644 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3645 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3647 return ARM_PCS_AAPCS_LOCAL;
3650 else if (user_convention && user_pcs != arm_pcs_default)
3651 sorry ("PCS variant");
3653 /* For everything else we use the target's default. */
3654 return arm_pcs_default;
3659 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3660 const_tree fntype ATTRIBUTE_UNUSED,
3661 rtx libcall ATTRIBUTE_UNUSED,
3662 const_tree fndecl ATTRIBUTE_UNUSED)
3664 /* Record the unallocated VFP registers. */
3665 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3666 pcum->aapcs_vfp_reg_alloc = 0;
3669 /* Walk down the type tree of TYPE counting consecutive base elements.
3670 If *MODEP is VOIDmode, then set it to the first valid floating point
3671 type. If a non-floating point type is found, or if a floating point
3672 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3673 otherwise return the count in the sub-tree. */
3675 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3677 enum machine_mode mode;
3680 switch (TREE_CODE (type))
3683 mode = TYPE_MODE (type);
3684 if (mode != DFmode && mode != SFmode)
3687 if (*modep == VOIDmode)
3696 mode = TYPE_MODE (TREE_TYPE (type));
3697 if (mode != DFmode && mode != SFmode)
3700 if (*modep == VOIDmode)
3709 /* Use V2SImode and V4SImode as representatives of all 64-bit
3710 and 128-bit vector types, whether or not those modes are
3711 supported with the present options. */
3712 size = int_size_in_bytes (type);
3725 if (*modep == VOIDmode)
3728 /* Vector modes are considered to be opaque: two vectors are
3729 equivalent for the purposes of being homogeneous aggregates
3730 if they are the same size. */
3739 tree index = TYPE_DOMAIN (type);
3741 /* Can't handle incomplete types. */
3742 if (!COMPLETE_TYPE_P(type))
3745 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3748 || !TYPE_MAX_VALUE (index)
3749 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3750 || !TYPE_MIN_VALUE (index)
3751 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3755 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3756 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3758 /* There must be no padding. */
3759 if (!host_integerp (TYPE_SIZE (type), 1)
3760 || (tree_low_cst (TYPE_SIZE (type), 1)
3761 != count * GET_MODE_BITSIZE (*modep)))
3773 /* Can't handle incomplete types. */
3774 if (!COMPLETE_TYPE_P(type))
3777 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3779 if (TREE_CODE (field) != FIELD_DECL)
3782 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3788 /* There must be no padding. */
3789 if (!host_integerp (TYPE_SIZE (type), 1)
3790 || (tree_low_cst (TYPE_SIZE (type), 1)
3791 != count * GET_MODE_BITSIZE (*modep)))
3798 case QUAL_UNION_TYPE:
3800 /* These aren't very interesting except in a degenerate case. */
3805 /* Can't handle incomplete types. */
3806 if (!COMPLETE_TYPE_P(type))
3809 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3811 if (TREE_CODE (field) != FIELD_DECL)
3814 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3817 count = count > sub_count ? count : sub_count;
3820 /* There must be no padding. */
3821 if (!host_integerp (TYPE_SIZE (type), 1)
3822 || (tree_low_cst (TYPE_SIZE (type), 1)
3823 != count * GET_MODE_BITSIZE (*modep)))
3836 /* Return true if PCS_VARIANT should use VFP registers. */
3838 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3840 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3843 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3846 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3847 (TARGET_VFP_DOUBLE || !is_double));
3851 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3852 enum machine_mode mode, const_tree type,
3853 enum machine_mode *base_mode, int *count)
3855 enum machine_mode new_mode = VOIDmode;
3857 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3858 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3859 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3864 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3867 new_mode = (mode == DCmode ? DFmode : SFmode);
3869 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3871 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3873 if (ag_count > 0 && ag_count <= 4)
3882 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3885 *base_mode = new_mode;
3890 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3891 enum machine_mode mode, const_tree type)
3893 int count ATTRIBUTE_UNUSED;
3894 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3896 if (!use_vfp_abi (pcs_variant, false))
3898 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3903 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3906 if (!use_vfp_abi (pcum->pcs_variant, false))
3909 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3910 &pcum->aapcs_vfp_rmode,
3911 &pcum->aapcs_vfp_rcount);
3915 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3916 const_tree type ATTRIBUTE_UNUSED)
3918 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3919 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3922 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3923 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3925 pcum->aapcs_vfp_reg_alloc = mask << regno;
3926 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3929 int rcount = pcum->aapcs_vfp_rcount;
3931 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3935 /* Avoid using unsupported vector modes. */
3936 if (rmode == V2SImode)
3938 else if (rmode == V4SImode)
3945 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3946 for (i = 0; i < rcount; i++)
3948 rtx tmp = gen_rtx_REG (rmode,
3949 FIRST_VFP_REGNUM + regno + i * rshift);
3950 tmp = gen_rtx_EXPR_LIST
3952 GEN_INT (i * GET_MODE_SIZE (rmode)));
3953 XVECEXP (par, 0, i) = tmp;
3956 pcum->aapcs_reg = par;
3959 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3966 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3967 enum machine_mode mode,
3968 const_tree type ATTRIBUTE_UNUSED)
3970 if (!use_vfp_abi (pcs_variant, false))
3973 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3976 enum machine_mode ag_mode;
3981 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3986 if (ag_mode == V2SImode)
3988 else if (ag_mode == V4SImode)
3994 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3995 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3996 for (i = 0; i < count; i++)
3998 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3999 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4000 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4001 XVECEXP (par, 0, i) = tmp;
4007 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4011 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4012 enum machine_mode mode ATTRIBUTE_UNUSED,
4013 const_tree type ATTRIBUTE_UNUSED)
4015 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4016 pcum->aapcs_vfp_reg_alloc = 0;
4020 #define AAPCS_CP(X) \
4022 aapcs_ ## X ## _cum_init, \
4023 aapcs_ ## X ## _is_call_candidate, \
4024 aapcs_ ## X ## _allocate, \
4025 aapcs_ ## X ## _is_return_candidate, \
4026 aapcs_ ## X ## _allocate_return_reg, \
4027 aapcs_ ## X ## _advance \
4030 /* Table of co-processors that can be used to pass arguments in
4031 registers. Idealy no arugment should be a candidate for more than
4032 one co-processor table entry, but the table is processed in order
4033 and stops after the first match. If that entry then fails to put
4034 the argument into a co-processor register, the argument will go on
4038 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4039 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4041 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4042 BLKmode) is a candidate for this co-processor's registers; this
4043 function should ignore any position-dependent state in
4044 CUMULATIVE_ARGS and only use call-type dependent information. */
4045 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4047 /* Return true if the argument does get a co-processor register; it
4048 should set aapcs_reg to an RTX of the register allocated as is
4049 required for a return from FUNCTION_ARG. */
4050 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4052 /* Return true if a result of mode MODE (or type TYPE if MODE is
4053 BLKmode) is can be returned in this co-processor's registers. */
4054 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4056 /* Allocate and return an RTX element to hold the return type of a
4057 call, this routine must not fail and will only be called if
4058 is_return_candidate returned true with the same parameters. */
4059 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4061 /* Finish processing this argument and prepare to start processing
4063 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4064 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4072 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4077 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4078 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4085 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4087 /* We aren't passed a decl, so we can't check that a call is local.
4088 However, it isn't clear that that would be a win anyway, since it
4089 might limit some tail-calling opportunities. */
4090 enum arm_pcs pcs_variant;
4094 const_tree fndecl = NULL_TREE;
4096 if (TREE_CODE (fntype) == FUNCTION_DECL)
4099 fntype = TREE_TYPE (fntype);
4102 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4105 pcs_variant = arm_pcs_default;
4107 if (pcs_variant != ARM_PCS_AAPCS)
4111 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4112 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4121 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4124 /* We aren't passed a decl, so we can't check that a call is local.
4125 However, it isn't clear that that would be a win anyway, since it
4126 might limit some tail-calling opportunities. */
4127 enum arm_pcs pcs_variant;
4128 int unsignedp ATTRIBUTE_UNUSED;
4132 const_tree fndecl = NULL_TREE;
4134 if (TREE_CODE (fntype) == FUNCTION_DECL)
4137 fntype = TREE_TYPE (fntype);
4140 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4143 pcs_variant = arm_pcs_default;
4145 /* Promote integer types. */
4146 if (type && INTEGRAL_TYPE_P (type))
4147 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4149 if (pcs_variant != ARM_PCS_AAPCS)
4153 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4154 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4156 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4160 /* Promotes small structs returned in a register to full-word size
4161 for big-endian AAPCS. */
4162 if (type && arm_return_in_msb (type))
4164 HOST_WIDE_INT size = int_size_in_bytes (type);
4165 if (size % UNITS_PER_WORD != 0)
4167 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4168 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4172 return gen_rtx_REG (mode, R0_REGNUM);
4176 aapcs_libcall_value (enum machine_mode mode)
4178 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4181 /* Lay out a function argument using the AAPCS rules. The rule
4182 numbers referred to here are those in the AAPCS. */
4184 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4185 tree type, int named)
4190 /* We only need to do this once per argument. */
4191 if (pcum->aapcs_arg_processed)
4194 pcum->aapcs_arg_processed = true;
4196 /* Special case: if named is false then we are handling an incoming
4197 anonymous argument which is on the stack. */
4201 /* Is this a potential co-processor register candidate? */
4202 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4204 int slot = aapcs_select_call_coproc (pcum, mode, type);
4205 pcum->aapcs_cprc_slot = slot;
4207 /* We don't have to apply any of the rules from part B of the
4208 preparation phase, these are handled elsewhere in the
4213 /* A Co-processor register candidate goes either in its own
4214 class of registers or on the stack. */
4215 if (!pcum->aapcs_cprc_failed[slot])
4217 /* C1.cp - Try to allocate the argument to co-processor
4219 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4222 /* C2.cp - Put the argument on the stack and note that we
4223 can't assign any more candidates in this slot. We also
4224 need to note that we have allocated stack space, so that
4225 we won't later try to split a non-cprc candidate between
4226 core registers and the stack. */
4227 pcum->aapcs_cprc_failed[slot] = true;
4228 pcum->can_split = false;
4231 /* We didn't get a register, so this argument goes on the
4233 gcc_assert (pcum->can_split == false);
4238 /* C3 - For double-word aligned arguments, round the NCRN up to the
4239 next even number. */
4240 ncrn = pcum->aapcs_ncrn;
4241 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4244 nregs = ARM_NUM_REGS2(mode, type);
4246 /* Sigh, this test should really assert that nregs > 0, but a GCC
4247 extension allows empty structs and then gives them empty size; it
4248 then allows such a structure to be passed by value. For some of
4249 the code below we have to pretend that such an argument has
4250 non-zero size so that we 'locate' it correctly either in
4251 registers or on the stack. */
4252 gcc_assert (nregs >= 0);
4254 nregs2 = nregs ? nregs : 1;
4256 /* C4 - Argument fits entirely in core registers. */
4257 if (ncrn + nregs2 <= NUM_ARG_REGS)
4259 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4260 pcum->aapcs_next_ncrn = ncrn + nregs;
4264 /* C5 - Some core registers left and there are no arguments already
4265 on the stack: split this argument between the remaining core
4266 registers and the stack. */
4267 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4269 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4270 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4271 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4275 /* C6 - NCRN is set to 4. */
4276 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4278 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4282 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4283 for a call to a function whose data type is FNTYPE.
4284 For a library call, FNTYPE is NULL. */
4286 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4288 tree fndecl ATTRIBUTE_UNUSED)
4290 /* Long call handling. */
4292 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4294 pcum->pcs_variant = arm_pcs_default;
4296 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4298 if (arm_libcall_uses_aapcs_base (libname))
4299 pcum->pcs_variant = ARM_PCS_AAPCS;
4301 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4302 pcum->aapcs_reg = NULL_RTX;
4303 pcum->aapcs_partial = 0;
4304 pcum->aapcs_arg_processed = false;
4305 pcum->aapcs_cprc_slot = -1;
4306 pcum->can_split = true;
4308 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4312 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4314 pcum->aapcs_cprc_failed[i] = false;
4315 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4323 /* On the ARM, the offset starts at 0. */
4325 pcum->iwmmxt_nregs = 0;
4326 pcum->can_split = true;
4328 /* Varargs vectors are treated the same as long long.
4329 named_count avoids having to change the way arm handles 'named' */
4330 pcum->named_count = 0;
4333 if (TARGET_REALLY_IWMMXT && fntype)
4337 for (fn_arg = TYPE_ARG_TYPES (fntype);
4339 fn_arg = TREE_CHAIN (fn_arg))
4340 pcum->named_count += 1;
4342 if (! pcum->named_count)
4343 pcum->named_count = INT_MAX;
4348 /* Return true if mode/type need doubleword alignment. */
4350 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4352 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4353 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4357 /* Determine where to put an argument to a function.
4358 Value is zero to push the argument on the stack,
4359 or a hard register in which to store the argument.
4361 MODE is the argument's machine mode.
4362 TYPE is the data type of the argument (as a tree).
4363 This is null for libcalls where that information may
4365 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4366 the preceding args and about the function being called.
4367 NAMED is nonzero if this argument is a named parameter
4368 (otherwise it is an extra parameter matching an ellipsis). */
4371 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4372 tree type, int named)
4376 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4377 a call insn (op3 of a call_value insn). */
4378 if (mode == VOIDmode)
4381 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4383 aapcs_layout_arg (pcum, mode, type, named);
4384 return pcum->aapcs_reg;
4387 /* Varargs vectors are treated the same as long long.
4388 named_count avoids having to change the way arm handles 'named' */
4389 if (TARGET_IWMMXT_ABI
4390 && arm_vector_mode_supported_p (mode)
4391 && pcum->named_count > pcum->nargs + 1)
4393 if (pcum->iwmmxt_nregs <= 9)
4394 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4397 pcum->can_split = false;
4402 /* Put doubleword aligned quantities in even register pairs. */
4404 && ARM_DOUBLEWORD_ALIGN
4405 && arm_needs_doubleword_align (mode, type))
4408 if (mode == VOIDmode)
4409 /* Pick an arbitrary value for operand 2 of the call insn. */
4412 /* Only allow splitting an arg between regs and memory if all preceding
4413 args were allocated to regs. For args passed by reference we only count
4414 the reference pointer. */
4415 if (pcum->can_split)
4418 nregs = ARM_NUM_REGS2 (mode, type);
4420 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4423 return gen_rtx_REG (mode, pcum->nregs);
4427 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4428 tree type, bool named)
4430 int nregs = pcum->nregs;
4432 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4434 aapcs_layout_arg (pcum, mode, type, named);
4435 return pcum->aapcs_partial;
4438 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4441 if (NUM_ARG_REGS > nregs
4442 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4444 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4450 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4451 tree type, bool named)
4453 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4455 aapcs_layout_arg (pcum, mode, type, named);
4457 if (pcum->aapcs_cprc_slot >= 0)
4459 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4461 pcum->aapcs_cprc_slot = -1;
4464 /* Generic stuff. */
4465 pcum->aapcs_arg_processed = false;
4466 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4467 pcum->aapcs_reg = NULL_RTX;
4468 pcum->aapcs_partial = 0;
4473 if (arm_vector_mode_supported_p (mode)
4474 && pcum->named_count > pcum->nargs
4475 && TARGET_IWMMXT_ABI)
4476 pcum->iwmmxt_nregs += 1;
4478 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4482 /* Variable sized types are passed by reference. This is a GCC
4483 extension to the ARM ABI. */
4486 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4487 enum machine_mode mode ATTRIBUTE_UNUSED,
4488 const_tree type, bool named ATTRIBUTE_UNUSED)
4490 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4493 /* Encode the current state of the #pragma [no_]long_calls. */
4496 OFF, /* No #pragma [no_]long_calls is in effect. */
4497 LONG, /* #pragma long_calls is in effect. */
4498 SHORT /* #pragma no_long_calls is in effect. */
4501 static arm_pragma_enum arm_pragma_long_calls = OFF;
4504 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4506 arm_pragma_long_calls = LONG;
4510 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4512 arm_pragma_long_calls = SHORT;
4516 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4518 arm_pragma_long_calls = OFF;
4521 /* Handle an attribute requiring a FUNCTION_DECL;
4522 arguments as in struct attribute_spec.handler. */
4524 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4525 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4527 if (TREE_CODE (*node) != FUNCTION_DECL)
4529 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4531 *no_add_attrs = true;
4537 /* Handle an "interrupt" or "isr" attribute;
4538 arguments as in struct attribute_spec.handler. */
4540 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4545 if (TREE_CODE (*node) != FUNCTION_DECL)
4547 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4549 *no_add_attrs = true;
4551 /* FIXME: the argument if any is checked for type attributes;
4552 should it be checked for decl ones? */
4556 if (TREE_CODE (*node) == FUNCTION_TYPE
4557 || TREE_CODE (*node) == METHOD_TYPE)
4559 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4561 warning (OPT_Wattributes, "%qE attribute ignored",
4563 *no_add_attrs = true;
4566 else if (TREE_CODE (*node) == POINTER_TYPE
4567 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4568 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4569 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4571 *node = build_variant_type_copy (*node);
4572 TREE_TYPE (*node) = build_type_attribute_variant
4574 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4575 *no_add_attrs = true;
4579 /* Possibly pass this attribute on from the type to a decl. */
4580 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4581 | (int) ATTR_FLAG_FUNCTION_NEXT
4582 | (int) ATTR_FLAG_ARRAY_NEXT))
4584 *no_add_attrs = true;
4585 return tree_cons (name, args, NULL_TREE);
4589 warning (OPT_Wattributes, "%qE attribute ignored",
4598 /* Handle a "pcs" attribute; arguments as in struct
4599 attribute_spec.handler. */
4601 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4604 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4606 warning (OPT_Wattributes, "%qE attribute ignored", name);
4607 *no_add_attrs = true;
4612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4613 /* Handle the "notshared" attribute. This attribute is another way of
4614 requesting hidden visibility. ARM's compiler supports
4615 "__declspec(notshared)"; we support the same thing via an
4619 arm_handle_notshared_attribute (tree *node,
4620 tree name ATTRIBUTE_UNUSED,
4621 tree args ATTRIBUTE_UNUSED,
4622 int flags ATTRIBUTE_UNUSED,
4625 tree decl = TYPE_NAME (*node);
4629 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4630 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4631 *no_add_attrs = false;
4637 /* Return 0 if the attributes for two types are incompatible, 1 if they
4638 are compatible, and 2 if they are nearly compatible (which causes a
4639 warning to be generated). */
4641 arm_comp_type_attributes (const_tree type1, const_tree type2)
4645 /* Check for mismatch of non-default calling convention. */
4646 if (TREE_CODE (type1) != FUNCTION_TYPE)
4649 /* Check for mismatched call attributes. */
4650 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4651 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4652 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4653 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4655 /* Only bother to check if an attribute is defined. */
4656 if (l1 | l2 | s1 | s2)
4658 /* If one type has an attribute, the other must have the same attribute. */
4659 if ((l1 != l2) || (s1 != s2))
4662 /* Disallow mixed attributes. */
4663 if ((l1 & s2) || (l2 & s1))
4667 /* Check for mismatched ISR attribute. */
4668 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4670 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4671 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4673 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4680 /* Assigns default attributes to newly defined type. This is used to
4681 set short_call/long_call attributes for function types of
4682 functions defined inside corresponding #pragma scopes. */
4684 arm_set_default_type_attributes (tree type)
4686 /* Add __attribute__ ((long_call)) to all functions, when
4687 inside #pragma long_calls or __attribute__ ((short_call)),
4688 when inside #pragma no_long_calls. */
4689 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4691 tree type_attr_list, attr_name;
4692 type_attr_list = TYPE_ATTRIBUTES (type);
4694 if (arm_pragma_long_calls == LONG)
4695 attr_name = get_identifier ("long_call");
4696 else if (arm_pragma_long_calls == SHORT)
4697 attr_name = get_identifier ("short_call");
4701 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4702 TYPE_ATTRIBUTES (type) = type_attr_list;
4706 /* Return true if DECL is known to be linked into section SECTION. */
4709 arm_function_in_section_p (tree decl, section *section)
4711 /* We can only be certain about functions defined in the same
4712 compilation unit. */
4713 if (!TREE_STATIC (decl))
4716 /* Make sure that SYMBOL always binds to the definition in this
4717 compilation unit. */
4718 if (!targetm.binds_local_p (decl))
4721 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4722 if (!DECL_SECTION_NAME (decl))
4724 /* Make sure that we will not create a unique section for DECL. */
4725 if (flag_function_sections || DECL_ONE_ONLY (decl))
4729 return function_section (decl) == section;
4732 /* Return nonzero if a 32-bit "long_call" should be generated for
4733 a call from the current function to DECL. We generate a long_call
4736 a. has an __attribute__((long call))
4737 or b. is within the scope of a #pragma long_calls
4738 or c. the -mlong-calls command line switch has been specified
4740 However we do not generate a long call if the function:
4742 d. has an __attribute__ ((short_call))
4743 or e. is inside the scope of a #pragma no_long_calls
4744 or f. is defined in the same section as the current function. */
4747 arm_is_long_call_p (tree decl)
4752 return TARGET_LONG_CALLS;
4754 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4755 if (lookup_attribute ("short_call", attrs))
4758 /* For "f", be conservative, and only cater for cases in which the
4759 whole of the current function is placed in the same section. */
4760 if (!flag_reorder_blocks_and_partition
4761 && TREE_CODE (decl) == FUNCTION_DECL
4762 && arm_function_in_section_p (decl, current_function_section ()))
4765 if (lookup_attribute ("long_call", attrs))
4768 return TARGET_LONG_CALLS;
4771 /* Return nonzero if it is ok to make a tail-call to DECL. */
4773 arm_function_ok_for_sibcall (tree decl, tree exp)
4775 unsigned long func_type;
4777 if (cfun->machine->sibcall_blocked)
4780 /* Never tailcall something for which we have no decl, or if we
4781 are in Thumb mode. */
4782 if (decl == NULL || TARGET_THUMB)
4785 /* The PIC register is live on entry to VxWorks PLT entries, so we
4786 must make the call before restoring the PIC register. */
4787 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4790 /* Cannot tail-call to long calls, since these are out of range of
4791 a branch instruction. */
4792 if (arm_is_long_call_p (decl))
4795 /* If we are interworking and the function is not declared static
4796 then we can't tail-call it unless we know that it exists in this
4797 compilation unit (since it might be a Thumb routine). */
4798 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4801 func_type = arm_current_func_type ();
4802 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4803 if (IS_INTERRUPT (func_type))
4806 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4808 /* Check that the return value locations are the same. For
4809 example that we aren't returning a value from the sibling in
4810 a VFP register but then need to transfer it to a core
4814 a = arm_function_value (TREE_TYPE (exp), decl, false);
4815 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4817 if (!rtx_equal_p (a, b))
4821 /* Never tailcall if function may be called with a misaligned SP. */
4822 if (IS_STACKALIGN (func_type))
4825 /* Everything else is ok. */
4830 /* Addressing mode support functions. */
4832 /* Return nonzero if X is a legitimate immediate operand when compiling
4833 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4835 legitimate_pic_operand_p (rtx x)
4837 if (GET_CODE (x) == SYMBOL_REF
4838 || (GET_CODE (x) == CONST
4839 && GET_CODE (XEXP (x, 0)) == PLUS
4840 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4846 /* Record that the current function needs a PIC register. Initialize
4847 cfun->machine->pic_reg if we have not already done so. */
4850 require_pic_register (void)
4852 /* A lot of the logic here is made obscure by the fact that this
4853 routine gets called as part of the rtx cost estimation process.
4854 We don't want those calls to affect any assumptions about the real
4855 function; and further, we can't call entry_of_function() until we
4856 start the real expansion process. */
4857 if (!crtl->uses_pic_offset_table)
4859 gcc_assert (can_create_pseudo_p ());
4860 if (arm_pic_register != INVALID_REGNUM)
4862 if (!cfun->machine->pic_reg)
4863 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4865 /* Play games to avoid marking the function as needing pic
4866 if we are being called as part of the cost-estimation
4868 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4869 crtl->uses_pic_offset_table = 1;
4875 if (!cfun->machine->pic_reg)
4876 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4878 /* Play games to avoid marking the function as needing pic
4879 if we are being called as part of the cost-estimation
4881 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4883 crtl->uses_pic_offset_table = 1;
4886 arm_load_pic_register (0UL);
4890 /* We can be called during expansion of PHI nodes, where
4891 we can't yet emit instructions directly in the final
4892 insn stream. Queue the insns on the entry edge, they will
4893 be committed after everything else is expanded. */
4894 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4901 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4903 if (GET_CODE (orig) == SYMBOL_REF
4904 || GET_CODE (orig) == LABEL_REF)
4906 rtx pic_ref, address;
4910 /* If this function doesn't have a pic register, create one now. */
4911 require_pic_register ();
4915 gcc_assert (can_create_pseudo_p ());
4916 reg = gen_reg_rtx (Pmode);
4922 address = gen_reg_rtx (Pmode);
4927 emit_insn (gen_pic_load_addr_32bit (address, orig));
4928 else /* TARGET_THUMB1 */
4929 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4931 /* VxWorks does not impose a fixed gap between segments; the run-time
4932 gap can be different from the object-file gap. We therefore can't
4933 use GOTOFF unless we are absolutely sure that the symbol is in the
4934 same segment as the GOT. Unfortunately, the flexibility of linker
4935 scripts means that we can't be sure of that in general, so assume
4936 that GOTOFF is never valid on VxWorks. */
4937 if ((GET_CODE (orig) == LABEL_REF
4938 || (GET_CODE (orig) == SYMBOL_REF &&
4939 SYMBOL_REF_LOCAL_P (orig)))
4941 && !TARGET_VXWORKS_RTP)
4942 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4945 pic_ref = gen_const_mem (Pmode,
4946 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4950 insn = emit_move_insn (reg, pic_ref);
4952 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4954 set_unique_reg_note (insn, REG_EQUAL, orig);
4958 else if (GET_CODE (orig) == CONST)
4962 if (GET_CODE (XEXP (orig, 0)) == PLUS
4963 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4966 /* Handle the case where we have: const (UNSPEC_TLS). */
4967 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4968 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4971 /* Handle the case where we have:
4972 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4974 if (GET_CODE (XEXP (orig, 0)) == PLUS
4975 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4976 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4978 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4984 gcc_assert (can_create_pseudo_p ());
4985 reg = gen_reg_rtx (Pmode);
4988 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4990 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4991 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4992 base == reg ? 0 : reg);
4994 if (GET_CODE (offset) == CONST_INT)
4996 /* The base register doesn't really matter, we only want to
4997 test the index for the appropriate mode. */
4998 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5000 gcc_assert (can_create_pseudo_p ());
5001 offset = force_reg (Pmode, offset);
5004 if (GET_CODE (offset) == CONST_INT)
5005 return plus_constant (base, INTVAL (offset));
5008 if (GET_MODE_SIZE (mode) > 4
5009 && (GET_MODE_CLASS (mode) == MODE_INT
5010 || TARGET_SOFT_FLOAT))
5012 emit_insn (gen_addsi3 (reg, base, offset));
5016 return gen_rtx_PLUS (Pmode, base, offset);
5023 /* Find a spare register to use during the prolog of a function. */
5026 thumb_find_work_register (unsigned long pushed_regs_mask)
5030 /* Check the argument registers first as these are call-used. The
5031 register allocation order means that sometimes r3 might be used
5032 but earlier argument registers might not, so check them all. */
5033 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5034 if (!df_regs_ever_live_p (reg))
5037 /* Before going on to check the call-saved registers we can try a couple
5038 more ways of deducing that r3 is available. The first is when we are
5039 pushing anonymous arguments onto the stack and we have less than 4
5040 registers worth of fixed arguments(*). In this case r3 will be part of
5041 the variable argument list and so we can be sure that it will be
5042 pushed right at the start of the function. Hence it will be available
5043 for the rest of the prologue.
5044 (*): ie crtl->args.pretend_args_size is greater than 0. */
5045 if (cfun->machine->uses_anonymous_args
5046 && crtl->args.pretend_args_size > 0)
5047 return LAST_ARG_REGNUM;
5049 /* The other case is when we have fixed arguments but less than 4 registers
5050 worth. In this case r3 might be used in the body of the function, but
5051 it is not being used to convey an argument into the function. In theory
5052 we could just check crtl->args.size to see how many bytes are
5053 being passed in argument registers, but it seems that it is unreliable.
5054 Sometimes it will have the value 0 when in fact arguments are being
5055 passed. (See testcase execute/20021111-1.c for an example). So we also
5056 check the args_info.nregs field as well. The problem with this field is
5057 that it makes no allowances for arguments that are passed to the
5058 function but which are not used. Hence we could miss an opportunity
5059 when a function has an unused argument in r3. But it is better to be
5060 safe than to be sorry. */
5061 if (! cfun->machine->uses_anonymous_args
5062 && crtl->args.size >= 0
5063 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5064 && crtl->args.info.nregs < 4)
5065 return LAST_ARG_REGNUM;
5067 /* Otherwise look for a call-saved register that is going to be pushed. */
5068 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5069 if (pushed_regs_mask & (1 << reg))
5074 /* Thumb-2 can use high regs. */
5075 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5076 if (pushed_regs_mask & (1 << reg))
5079 /* Something went wrong - thumb_compute_save_reg_mask()
5080 should have arranged for a suitable register to be pushed. */
5084 static GTY(()) int pic_labelno;
5086 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5090 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5092 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5094 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5097 gcc_assert (flag_pic);
5099 pic_reg = cfun->machine->pic_reg;
5100 if (TARGET_VXWORKS_RTP)
5102 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5103 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5104 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5106 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5108 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5109 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5113 /* We use an UNSPEC rather than a LABEL_REF because this label
5114 never appears in the code stream. */
5116 labelno = GEN_INT (pic_labelno++);
5117 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5118 l1 = gen_rtx_CONST (VOIDmode, l1);
5120 /* On the ARM the PC register contains 'dot + 8' at the time of the
5121 addition, on the Thumb it is 'dot + 4'. */
5122 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5123 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5125 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5129 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5131 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5133 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5135 else /* TARGET_THUMB1 */
5137 if (arm_pic_register != INVALID_REGNUM
5138 && REGNO (pic_reg) > LAST_LO_REGNUM)
5140 /* We will have pushed the pic register, so we should always be
5141 able to find a work register. */
5142 pic_tmp = gen_rtx_REG (SImode,
5143 thumb_find_work_register (saved_regs));
5144 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5145 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5148 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5149 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5153 /* Need to emit this whether or not we obey regdecls,
5154 since setjmp/longjmp can cause life info to screw up. */
5159 /* Return nonzero if X is valid as an ARM state addressing register. */
5161 arm_address_register_rtx_p (rtx x, int strict_p)
5165 if (GET_CODE (x) != REG)
5171 return ARM_REGNO_OK_FOR_BASE_P (regno);
5173 return (regno <= LAST_ARM_REGNUM
5174 || regno >= FIRST_PSEUDO_REGISTER
5175 || regno == FRAME_POINTER_REGNUM
5176 || regno == ARG_POINTER_REGNUM);
5179 /* Return TRUE if this rtx is the difference of a symbol and a label,
5180 and will reduce to a PC-relative relocation in the object file.
5181 Expressions like this can be left alone when generating PIC, rather
5182 than forced through the GOT. */
5184 pcrel_constant_p (rtx x)
5186 if (GET_CODE (x) == MINUS)
5187 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5192 /* Return nonzero if X is a valid ARM state address operand. */
5194 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5198 enum rtx_code code = GET_CODE (x);
5200 if (arm_address_register_rtx_p (x, strict_p))
5203 use_ldrd = (TARGET_LDRD
5205 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5207 if (code == POST_INC || code == PRE_DEC
5208 || ((code == PRE_INC || code == POST_DEC)
5209 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5210 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5212 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5213 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5214 && GET_CODE (XEXP (x, 1)) == PLUS
5215 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5217 rtx addend = XEXP (XEXP (x, 1), 1);
5219 /* Don't allow ldrd post increment by register because it's hard
5220 to fixup invalid register choices. */
5222 && GET_CODE (x) == POST_MODIFY
5223 && GET_CODE (addend) == REG)
5226 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5227 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5230 /* After reload constants split into minipools will have addresses
5231 from a LABEL_REF. */
5232 else if (reload_completed
5233 && (code == LABEL_REF
5235 && GET_CODE (XEXP (x, 0)) == PLUS
5236 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5240 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5243 else if (code == PLUS)
5245 rtx xop0 = XEXP (x, 0);
5246 rtx xop1 = XEXP (x, 1);
5248 return ((arm_address_register_rtx_p (xop0, strict_p)
5249 && GET_CODE(xop1) == CONST_INT
5250 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5251 || (arm_address_register_rtx_p (xop1, strict_p)
5252 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5256 /* Reload currently can't handle MINUS, so disable this for now */
5257 else if (GET_CODE (x) == MINUS)
5259 rtx xop0 = XEXP (x, 0);
5260 rtx xop1 = XEXP (x, 1);
5262 return (arm_address_register_rtx_p (xop0, strict_p)
5263 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5267 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5268 && code == SYMBOL_REF
5269 && CONSTANT_POOL_ADDRESS_P (x)
5271 && symbol_mentioned_p (get_pool_constant (x))
5272 && ! pcrel_constant_p (get_pool_constant (x))))
5278 /* Return nonzero if X is a valid Thumb-2 address operand. */
5280 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5283 enum rtx_code code = GET_CODE (x);
5285 if (arm_address_register_rtx_p (x, strict_p))
5288 use_ldrd = (TARGET_LDRD
5290 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5292 if (code == POST_INC || code == PRE_DEC
5293 || ((code == PRE_INC || code == POST_DEC)
5294 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5295 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5297 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5298 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5299 && GET_CODE (XEXP (x, 1)) == PLUS
5300 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5302 /* Thumb-2 only has autoincrement by constant. */
5303 rtx addend = XEXP (XEXP (x, 1), 1);
5304 HOST_WIDE_INT offset;
5306 if (GET_CODE (addend) != CONST_INT)
5309 offset = INTVAL(addend);
5310 if (GET_MODE_SIZE (mode) <= 4)
5311 return (offset > -256 && offset < 256);
5313 return (use_ldrd && offset > -1024 && offset < 1024
5314 && (offset & 3) == 0);
5317 /* After reload constants split into minipools will have addresses
5318 from a LABEL_REF. */
5319 else if (reload_completed
5320 && (code == LABEL_REF
5322 && GET_CODE (XEXP (x, 0)) == PLUS
5323 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5324 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5327 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5330 else if (code == PLUS)
5332 rtx xop0 = XEXP (x, 0);
5333 rtx xop1 = XEXP (x, 1);
5335 return ((arm_address_register_rtx_p (xop0, strict_p)
5336 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5337 || (arm_address_register_rtx_p (xop1, strict_p)
5338 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5341 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5342 && code == SYMBOL_REF
5343 && CONSTANT_POOL_ADDRESS_P (x)
5345 && symbol_mentioned_p (get_pool_constant (x))
5346 && ! pcrel_constant_p (get_pool_constant (x))))
5352 /* Return nonzero if INDEX is valid for an address index operand in
5355 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5358 HOST_WIDE_INT range;
5359 enum rtx_code code = GET_CODE (index);
5361 /* Standard coprocessor addressing modes. */
5362 if (TARGET_HARD_FLOAT
5363 && (TARGET_FPA || TARGET_MAVERICK)
5364 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5365 || (TARGET_MAVERICK && mode == DImode)))
5366 return (code == CONST_INT && INTVAL (index) < 1024
5367 && INTVAL (index) > -1024
5368 && (INTVAL (index) & 3) == 0);
5371 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5372 return (code == CONST_INT
5373 && INTVAL (index) < 1016
5374 && INTVAL (index) > -1024
5375 && (INTVAL (index) & 3) == 0);
5377 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5378 return (code == CONST_INT
5379 && INTVAL (index) < 1024
5380 && INTVAL (index) > -1024
5381 && (INTVAL (index) & 3) == 0);
5383 if (arm_address_register_rtx_p (index, strict_p)
5384 && (GET_MODE_SIZE (mode) <= 4))
5387 if (mode == DImode || mode == DFmode)
5389 if (code == CONST_INT)
5391 HOST_WIDE_INT val = INTVAL (index);
5394 return val > -256 && val < 256;
5396 return val > -4096 && val < 4092;
5399 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5402 if (GET_MODE_SIZE (mode) <= 4
5406 || (mode == QImode && outer == SIGN_EXTEND))))
5410 rtx xiop0 = XEXP (index, 0);
5411 rtx xiop1 = XEXP (index, 1);
5413 return ((arm_address_register_rtx_p (xiop0, strict_p)
5414 && power_of_two_operand (xiop1, SImode))
5415 || (arm_address_register_rtx_p (xiop1, strict_p)
5416 && power_of_two_operand (xiop0, SImode)));
5418 else if (code == LSHIFTRT || code == ASHIFTRT
5419 || code == ASHIFT || code == ROTATERT)
5421 rtx op = XEXP (index, 1);
5423 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5424 && GET_CODE (op) == CONST_INT
5426 && INTVAL (op) <= 31);
5430 /* For ARM v4 we may be doing a sign-extend operation during the
5436 || (outer == SIGN_EXTEND && mode == QImode))
5442 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5444 return (code == CONST_INT
5445 && INTVAL (index) < range
5446 && INTVAL (index) > -range);
5449 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5450 index operand. i.e. 1, 2, 4 or 8. */
5452 thumb2_index_mul_operand (rtx op)
5456 if (GET_CODE(op) != CONST_INT)
5460 return (val == 1 || val == 2 || val == 4 || val == 8);
5463 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5465 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5467 enum rtx_code code = GET_CODE (index);
5469 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5470 /* Standard coprocessor addressing modes. */
5471 if (TARGET_HARD_FLOAT
5472 && (TARGET_FPA || TARGET_MAVERICK)
5473 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5474 || (TARGET_MAVERICK && mode == DImode)))
5475 return (code == CONST_INT && INTVAL (index) < 1024
5476 && INTVAL (index) > -1024
5477 && (INTVAL (index) & 3) == 0);
5479 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5481 /* For DImode assume values will usually live in core regs
5482 and only allow LDRD addressing modes. */
5483 if (!TARGET_LDRD || mode != DImode)
5484 return (code == CONST_INT
5485 && INTVAL (index) < 1024
5486 && INTVAL (index) > -1024
5487 && (INTVAL (index) & 3) == 0);
5491 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5492 return (code == CONST_INT
5493 && INTVAL (index) < 1016
5494 && INTVAL (index) > -1024
5495 && (INTVAL (index) & 3) == 0);
5497 if (arm_address_register_rtx_p (index, strict_p)
5498 && (GET_MODE_SIZE (mode) <= 4))
5501 if (mode == DImode || mode == DFmode)
5503 if (code == CONST_INT)
5505 HOST_WIDE_INT val = INTVAL (index);
5506 /* ??? Can we assume ldrd for thumb2? */
5507 /* Thumb-2 ldrd only has reg+const addressing modes. */
5508 /* ldrd supports offsets of +-1020.
5509 However the ldr fallback does not. */
5510 return val > -256 && val < 256 && (val & 3) == 0;
5518 rtx xiop0 = XEXP (index, 0);
5519 rtx xiop1 = XEXP (index, 1);
5521 return ((arm_address_register_rtx_p (xiop0, strict_p)
5522 && thumb2_index_mul_operand (xiop1))
5523 || (arm_address_register_rtx_p (xiop1, strict_p)
5524 && thumb2_index_mul_operand (xiop0)));
5526 else if (code == ASHIFT)
5528 rtx op = XEXP (index, 1);
5530 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5531 && GET_CODE (op) == CONST_INT
5533 && INTVAL (op) <= 3);
5536 return (code == CONST_INT
5537 && INTVAL (index) < 4096
5538 && INTVAL (index) > -256);
5541 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5543 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5547 if (GET_CODE (x) != REG)
5553 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5555 return (regno <= LAST_LO_REGNUM
5556 || regno > LAST_VIRTUAL_REGISTER
5557 || regno == FRAME_POINTER_REGNUM
5558 || (GET_MODE_SIZE (mode) >= 4
5559 && (regno == STACK_POINTER_REGNUM
5560 || regno >= FIRST_PSEUDO_REGISTER
5561 || x == hard_frame_pointer_rtx
5562 || x == arg_pointer_rtx)));
5565 /* Return nonzero if x is a legitimate index register. This is the case
5566 for any base register that can access a QImode object. */
5568 thumb1_index_register_rtx_p (rtx x, int strict_p)
5570 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5573 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5575 The AP may be eliminated to either the SP or the FP, so we use the
5576 least common denominator, e.g. SImode, and offsets from 0 to 64.
5578 ??? Verify whether the above is the right approach.
5580 ??? Also, the FP may be eliminated to the SP, so perhaps that
5581 needs special handling also.
5583 ??? Look at how the mips16 port solves this problem. It probably uses
5584 better ways to solve some of these problems.
5586 Although it is not incorrect, we don't accept QImode and HImode
5587 addresses based on the frame pointer or arg pointer until the
5588 reload pass starts. This is so that eliminating such addresses
5589 into stack based ones won't produce impossible code. */
5591 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5593 /* ??? Not clear if this is right. Experiment. */
5594 if (GET_MODE_SIZE (mode) < 4
5595 && !(reload_in_progress || reload_completed)
5596 && (reg_mentioned_p (frame_pointer_rtx, x)
5597 || reg_mentioned_p (arg_pointer_rtx, x)
5598 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5599 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5600 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5601 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5604 /* Accept any base register. SP only in SImode or larger. */
5605 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5608 /* This is PC relative data before arm_reorg runs. */
5609 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5610 && GET_CODE (x) == SYMBOL_REF
5611 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5614 /* This is PC relative data after arm_reorg runs. */
5615 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5617 && (GET_CODE (x) == LABEL_REF
5618 || (GET_CODE (x) == CONST
5619 && GET_CODE (XEXP (x, 0)) == PLUS
5620 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5621 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5624 /* Post-inc indexing only supported for SImode and larger. */
5625 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5626 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5629 else if (GET_CODE (x) == PLUS)
5631 /* REG+REG address can be any two index registers. */
5632 /* We disallow FRAME+REG addressing since we know that FRAME
5633 will be replaced with STACK, and SP relative addressing only
5634 permits SP+OFFSET. */
5635 if (GET_MODE_SIZE (mode) <= 4
5636 && XEXP (x, 0) != frame_pointer_rtx
5637 && XEXP (x, 1) != frame_pointer_rtx
5638 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5639 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5642 /* REG+const has 5-7 bit offset for non-SP registers. */
5643 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5644 || XEXP (x, 0) == arg_pointer_rtx)
5645 && GET_CODE (XEXP (x, 1)) == CONST_INT
5646 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5649 /* REG+const has 10-bit offset for SP, but only SImode and
5650 larger is supported. */
5651 /* ??? Should probably check for DI/DFmode overflow here
5652 just like GO_IF_LEGITIMATE_OFFSET does. */
5653 else if (GET_CODE (XEXP (x, 0)) == REG
5654 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5655 && GET_MODE_SIZE (mode) >= 4
5656 && GET_CODE (XEXP (x, 1)) == CONST_INT
5657 && INTVAL (XEXP (x, 1)) >= 0
5658 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5659 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5662 else if (GET_CODE (XEXP (x, 0)) == REG
5663 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5664 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5665 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5666 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5667 && GET_MODE_SIZE (mode) >= 4
5668 && GET_CODE (XEXP (x, 1)) == CONST_INT
5669 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5673 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5674 && GET_MODE_SIZE (mode) == 4
5675 && GET_CODE (x) == SYMBOL_REF
5676 && CONSTANT_POOL_ADDRESS_P (x)
5678 && symbol_mentioned_p (get_pool_constant (x))
5679 && ! pcrel_constant_p (get_pool_constant (x))))
5685 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5686 instruction of mode MODE. */
5688 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5690 switch (GET_MODE_SIZE (mode))
5693 return val >= 0 && val < 32;
5696 return val >= 0 && val < 64 && (val & 1) == 0;
5700 && (val + GET_MODE_SIZE (mode)) <= 128
5706 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5709 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5710 else if (TARGET_THUMB2)
5711 return thumb2_legitimate_address_p (mode, x, strict_p);
5712 else /* if (TARGET_THUMB1) */
5713 return thumb1_legitimate_address_p (mode, x, strict_p);
5716 /* Build the SYMBOL_REF for __tls_get_addr. */
5718 static GTY(()) rtx tls_get_addr_libfunc;
5721 get_tls_get_addr (void)
5723 if (!tls_get_addr_libfunc)
5724 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5725 return tls_get_addr_libfunc;
5729 arm_load_tp (rtx target)
5732 target = gen_reg_rtx (SImode);
5736 /* Can return in any reg. */
5737 emit_insn (gen_load_tp_hard (target));
5741 /* Always returned in r0. Immediately copy the result into a pseudo,
5742 otherwise other uses of r0 (e.g. setting up function arguments) may
5743 clobber the value. */
5747 emit_insn (gen_load_tp_soft ());
5749 tmp = gen_rtx_REG (SImode, 0);
5750 emit_move_insn (target, tmp);
5756 load_tls_operand (rtx x, rtx reg)
5760 if (reg == NULL_RTX)
5761 reg = gen_reg_rtx (SImode);
5763 tmp = gen_rtx_CONST (SImode, x);
5765 emit_move_insn (reg, tmp);
5771 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5773 rtx insns, label, labelno, sum;
5777 labelno = GEN_INT (pic_labelno++);
5778 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5779 label = gen_rtx_CONST (VOIDmode, label);
5781 sum = gen_rtx_UNSPEC (Pmode,
5782 gen_rtvec (4, x, GEN_INT (reloc), label,
5783 GEN_INT (TARGET_ARM ? 8 : 4)),
5785 reg = load_tls_operand (sum, reg);
5788 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5789 else if (TARGET_THUMB2)
5790 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5791 else /* TARGET_THUMB1 */
5792 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5794 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5795 Pmode, 1, reg, Pmode);
5797 insns = get_insns ();
5804 legitimize_tls_address (rtx x, rtx reg)
5806 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5807 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5811 case TLS_MODEL_GLOBAL_DYNAMIC:
5812 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5813 dest = gen_reg_rtx (Pmode);
5814 emit_libcall_block (insns, dest, ret, x);
5817 case TLS_MODEL_LOCAL_DYNAMIC:
5818 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5820 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5821 share the LDM result with other LD model accesses. */
5822 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5824 dest = gen_reg_rtx (Pmode);
5825 emit_libcall_block (insns, dest, ret, eqv);
5827 /* Load the addend. */
5828 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5830 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5831 return gen_rtx_PLUS (Pmode, dest, addend);
5833 case TLS_MODEL_INITIAL_EXEC:
5834 labelno = GEN_INT (pic_labelno++);
5835 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5836 label = gen_rtx_CONST (VOIDmode, label);
5837 sum = gen_rtx_UNSPEC (Pmode,
5838 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5839 GEN_INT (TARGET_ARM ? 8 : 4)),
5841 reg = load_tls_operand (sum, reg);
5844 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5845 else if (TARGET_THUMB2)
5846 emit_insn (gen_tls_load_dot_plus_four (reg, reg, labelno));
5849 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5850 emit_move_insn (reg, gen_const_mem (SImode, reg));
5853 tp = arm_load_tp (NULL_RTX);
5855 return gen_rtx_PLUS (Pmode, tp, reg);
5857 case TLS_MODEL_LOCAL_EXEC:
5858 tp = arm_load_tp (NULL_RTX);
5860 reg = gen_rtx_UNSPEC (Pmode,
5861 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5863 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5865 return gen_rtx_PLUS (Pmode, tp, reg);
5872 /* Try machine-dependent ways of modifying an illegitimate address
5873 to be legitimate. If we find one, return the new, valid address. */
5875 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5879 /* TODO: legitimize_address for Thumb2. */
5882 return thumb_legitimize_address (x, orig_x, mode);
5885 if (arm_tls_symbol_p (x))
5886 return legitimize_tls_address (x, NULL_RTX);
5888 if (GET_CODE (x) == PLUS)
5890 rtx xop0 = XEXP (x, 0);
5891 rtx xop1 = XEXP (x, 1);
5893 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5894 xop0 = force_reg (SImode, xop0);
5896 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5897 xop1 = force_reg (SImode, xop1);
5899 if (ARM_BASE_REGISTER_RTX_P (xop0)
5900 && GET_CODE (xop1) == CONST_INT)
5902 HOST_WIDE_INT n, low_n;
5906 /* VFP addressing modes actually allow greater offsets, but for
5907 now we just stick with the lowest common denominator. */
5909 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5921 low_n = ((mode) == TImode ? 0
5922 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5926 base_reg = gen_reg_rtx (SImode);
5927 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5928 emit_move_insn (base_reg, val);
5929 x = plus_constant (base_reg, low_n);
5931 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5932 x = gen_rtx_PLUS (SImode, xop0, xop1);
5935 /* XXX We don't allow MINUS any more -- see comment in
5936 arm_legitimate_address_outer_p (). */
5937 else if (GET_CODE (x) == MINUS)
5939 rtx xop0 = XEXP (x, 0);
5940 rtx xop1 = XEXP (x, 1);
5942 if (CONSTANT_P (xop0))
5943 xop0 = force_reg (SImode, xop0);
5945 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5946 xop1 = force_reg (SImode, xop1);
5948 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5949 x = gen_rtx_MINUS (SImode, xop0, xop1);
5952 /* Make sure to take full advantage of the pre-indexed addressing mode
5953 with absolute addresses which often allows for the base register to
5954 be factorized for multiple adjacent memory references, and it might
5955 even allows for the mini pool to be avoided entirely. */
5956 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5959 HOST_WIDE_INT mask, base, index;
5962 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5963 use a 8-bit index. So let's use a 12-bit index for SImode only and
5964 hope that arm_gen_constant will enable ldrb to use more bits. */
5965 bits = (mode == SImode) ? 12 : 8;
5966 mask = (1 << bits) - 1;
5967 base = INTVAL (x) & ~mask;
5968 index = INTVAL (x) & mask;
5969 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5971 /* It'll most probably be more efficient to generate the base
5972 with more bits set and use a negative index instead. */
5976 base_reg = force_reg (SImode, GEN_INT (base));
5977 x = plus_constant (base_reg, index);
5982 /* We need to find and carefully transform any SYMBOL and LABEL
5983 references; so go back to the original address expression. */
5984 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5986 if (new_x != orig_x)
5994 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5995 to be legitimate. If we find one, return the new, valid address. */
5997 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5999 if (arm_tls_symbol_p (x))
6000 return legitimize_tls_address (x, NULL_RTX);
6002 if (GET_CODE (x) == PLUS
6003 && GET_CODE (XEXP (x, 1)) == CONST_INT
6004 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6005 || INTVAL (XEXP (x, 1)) < 0))
6007 rtx xop0 = XEXP (x, 0);
6008 rtx xop1 = XEXP (x, 1);
6009 HOST_WIDE_INT offset = INTVAL (xop1);
6011 /* Try and fold the offset into a biasing of the base register and
6012 then offsetting that. Don't do this when optimizing for space
6013 since it can cause too many CSEs. */
6014 if (optimize_size && offset >= 0
6015 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6017 HOST_WIDE_INT delta;
6020 delta = offset - (256 - GET_MODE_SIZE (mode));
6021 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6022 delta = 31 * GET_MODE_SIZE (mode);
6024 delta = offset & (~31 * GET_MODE_SIZE (mode));
6026 xop0 = force_operand (plus_constant (xop0, offset - delta),
6028 x = plus_constant (xop0, delta);
6030 else if (offset < 0 && offset > -256)
6031 /* Small negative offsets are best done with a subtract before the
6032 dereference, forcing these into a register normally takes two
6034 x = force_operand (x, NULL_RTX);
6037 /* For the remaining cases, force the constant into a register. */
6038 xop1 = force_reg (SImode, xop1);
6039 x = gen_rtx_PLUS (SImode, xop0, xop1);
6042 else if (GET_CODE (x) == PLUS
6043 && s_register_operand (XEXP (x, 1), SImode)
6044 && !s_register_operand (XEXP (x, 0), SImode))
6046 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6048 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6053 /* We need to find and carefully transform any SYMBOL and LABEL
6054 references; so go back to the original address expression. */
6055 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6057 if (new_x != orig_x)
6065 thumb_legitimize_reload_address (rtx *x_p,
6066 enum machine_mode mode,
6067 int opnum, int type,
6068 int ind_levels ATTRIBUTE_UNUSED)
6072 if (GET_CODE (x) == PLUS
6073 && GET_MODE_SIZE (mode) < 4
6074 && REG_P (XEXP (x, 0))
6075 && XEXP (x, 0) == stack_pointer_rtx
6076 && GET_CODE (XEXP (x, 1)) == CONST_INT
6077 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6082 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6083 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6087 /* If both registers are hi-regs, then it's better to reload the
6088 entire expression rather than each register individually. That
6089 only requires one reload register rather than two. */
6090 if (GET_CODE (x) == PLUS
6091 && REG_P (XEXP (x, 0))
6092 && REG_P (XEXP (x, 1))
6093 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6094 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6099 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6100 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6107 /* Test for various thread-local symbols. */
6109 /* Return TRUE if X is a thread-local symbol. */
6112 arm_tls_symbol_p (rtx x)
6114 if (! TARGET_HAVE_TLS)
6117 if (GET_CODE (x) != SYMBOL_REF)
6120 return SYMBOL_REF_TLS_MODEL (x) != 0;
6123 /* Helper for arm_tls_referenced_p. */
6126 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6128 if (GET_CODE (*x) == SYMBOL_REF)
6129 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6131 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6132 TLS offsets, not real symbol references. */
6133 if (GET_CODE (*x) == UNSPEC
6134 && XINT (*x, 1) == UNSPEC_TLS)
6140 /* Return TRUE if X contains any TLS symbol references. */
6143 arm_tls_referenced_p (rtx x)
6145 if (! TARGET_HAVE_TLS)
6148 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6151 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6154 arm_cannot_force_const_mem (rtx x)
6158 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6160 split_const (x, &base, &offset);
6161 if (GET_CODE (base) == SYMBOL_REF
6162 && !offset_within_block_p (base, INTVAL (offset)))
6165 return arm_tls_referenced_p (x);
6168 #define REG_OR_SUBREG_REG(X) \
6169 (GET_CODE (X) == REG \
6170 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6172 #define REG_OR_SUBREG_RTX(X) \
6173 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6175 #ifndef COSTS_N_INSNS
6176 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6179 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6181 enum machine_mode mode = GET_MODE (x);
6194 return COSTS_N_INSNS (1);
6197 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6200 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6207 return COSTS_N_INSNS (2) + cycles;
6209 return COSTS_N_INSNS (1) + 16;
6212 return (COSTS_N_INSNS (1)
6213 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6214 + GET_CODE (SET_DEST (x)) == MEM));
6219 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6221 if (thumb_shiftable_const (INTVAL (x)))
6222 return COSTS_N_INSNS (2);
6223 return COSTS_N_INSNS (3);
6225 else if ((outer == PLUS || outer == COMPARE)
6226 && INTVAL (x) < 256 && INTVAL (x) > -256)
6228 else if ((outer == IOR || outer == XOR || outer == AND)
6229 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6230 return COSTS_N_INSNS (1);
6231 else if (outer == AND)
6234 /* This duplicates the tests in the andsi3 expander. */
6235 for (i = 9; i <= 31; i++)
6236 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6237 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6238 return COSTS_N_INSNS (2);
6240 else if (outer == ASHIFT || outer == ASHIFTRT
6241 || outer == LSHIFTRT)
6243 return COSTS_N_INSNS (2);
6249 return COSTS_N_INSNS (3);
6267 /* XXX another guess. */
6268 /* Memory costs quite a lot for the first word, but subsequent words
6269 load at the equivalent of a single insn each. */
6270 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6271 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6276 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6281 /* XXX still guessing. */
6282 switch (GET_MODE (XEXP (x, 0)))
6285 return (1 + (mode == DImode ? 4 : 0)
6286 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6289 return (4 + (mode == DImode ? 4 : 0)
6290 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6293 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6305 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6307 enum machine_mode mode = GET_MODE (x);
6308 enum rtx_code subcode;
6310 enum rtx_code code = GET_CODE (x);
6317 /* Memory costs quite a lot for the first word, but subsequent words
6318 load at the equivalent of a single insn each. */
6319 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6326 if (TARGET_HARD_FLOAT && mode == SFmode)
6327 *total = COSTS_N_INSNS (2);
6328 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6329 *total = COSTS_N_INSNS (4);
6331 *total = COSTS_N_INSNS (20);
6335 if (GET_CODE (XEXP (x, 1)) == REG)
6336 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6337 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6338 *total = rtx_cost (XEXP (x, 1), code, speed);
6344 *total += COSTS_N_INSNS (4);
6349 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6350 *total += rtx_cost (XEXP (x, 0), code, speed);
6353 *total += COSTS_N_INSNS (3);
6357 *total += COSTS_N_INSNS (1);
6358 /* Increase the cost of complex shifts because they aren't any faster,
6359 and reduce dual issue opportunities. */
6360 if (arm_tune_cortex_a9
6361 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6369 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6371 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6372 *total = COSTS_N_INSNS (1);
6374 *total = COSTS_N_INSNS (20);
6377 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6378 /* Thumb2 does not have RSB, so all arguments must be
6379 registers (subtracting a constant is canonicalized as
6380 addition of the negated constant). */
6386 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6387 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6388 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6390 *total += rtx_cost (XEXP (x, 1), code, speed);
6394 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6395 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6397 *total += rtx_cost (XEXP (x, 0), code, speed);
6404 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6406 if (TARGET_HARD_FLOAT
6408 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6410 *total = COSTS_N_INSNS (1);
6411 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6412 && arm_const_double_rtx (XEXP (x, 0)))
6414 *total += rtx_cost (XEXP (x, 1), code, speed);
6418 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6419 && arm_const_double_rtx (XEXP (x, 1)))
6421 *total += rtx_cost (XEXP (x, 0), code, speed);
6427 *total = COSTS_N_INSNS (20);
6431 *total = COSTS_N_INSNS (1);
6432 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6433 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6435 *total += rtx_cost (XEXP (x, 1), code, speed);
6439 subcode = GET_CODE (XEXP (x, 1));
6440 if (subcode == ASHIFT || subcode == ASHIFTRT
6441 || subcode == LSHIFTRT
6442 || subcode == ROTATE || subcode == ROTATERT)
6444 *total += rtx_cost (XEXP (x, 0), code, speed);
6445 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6449 /* A shift as a part of RSB costs no more than RSB itself. */
6450 if (GET_CODE (XEXP (x, 0)) == MULT
6451 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6453 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6454 *total += rtx_cost (XEXP (x, 1), code, speed);
6459 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6461 *total += rtx_cost (XEXP (x, 0), code, speed);
6462 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6466 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6467 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6469 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6470 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6471 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6472 *total += COSTS_N_INSNS (1);
6480 if (code == PLUS && arm_arch6 && mode == SImode
6481 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6482 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6484 *total = COSTS_N_INSNS (1);
6485 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6487 *total += rtx_cost (XEXP (x, 1), code, speed);
6491 /* MLA: All arguments must be registers. We filter out
6492 multiplication by a power of two, so that we fall down into
6494 if (GET_CODE (XEXP (x, 0)) == MULT
6495 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6497 /* The cost comes from the cost of the multiply. */
6501 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6503 if (TARGET_HARD_FLOAT
6505 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6507 *total = COSTS_N_INSNS (1);
6508 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6509 && arm_const_double_rtx (XEXP (x, 1)))
6511 *total += rtx_cost (XEXP (x, 0), code, speed);
6518 *total = COSTS_N_INSNS (20);
6522 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6523 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6525 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6526 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6527 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6528 *total += COSTS_N_INSNS (1);
6534 case AND: case XOR: case IOR:
6537 /* Normally the frame registers will be spilt into reg+const during
6538 reload, so it is a bad idea to combine them with other instructions,
6539 since then they might not be moved outside of loops. As a compromise
6540 we allow integration with ops that have a constant as their second
6542 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6543 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6544 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6545 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6546 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6551 *total += COSTS_N_INSNS (2);
6552 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6553 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6555 *total += rtx_cost (XEXP (x, 0), code, speed);
6562 *total += COSTS_N_INSNS (1);
6563 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6564 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6566 *total += rtx_cost (XEXP (x, 0), code, speed);
6569 subcode = GET_CODE (XEXP (x, 0));
6570 if (subcode == ASHIFT || subcode == ASHIFTRT
6571 || subcode == LSHIFTRT
6572 || subcode == ROTATE || subcode == ROTATERT)
6574 *total += rtx_cost (XEXP (x, 1), code, speed);
6575 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6580 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6582 *total += rtx_cost (XEXP (x, 1), code, speed);
6583 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6587 if (subcode == UMIN || subcode == UMAX
6588 || subcode == SMIN || subcode == SMAX)
6590 *total = COSTS_N_INSNS (3);
6597 /* This should have been handled by the CPU specific routines. */
6601 if (arm_arch3m && mode == SImode
6602 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6603 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6604 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6605 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6606 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6607 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6609 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6612 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6616 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6618 if (TARGET_HARD_FLOAT
6620 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6622 *total = COSTS_N_INSNS (1);
6625 *total = COSTS_N_INSNS (2);
6631 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6632 if (mode == SImode && code == NOT)
6634 subcode = GET_CODE (XEXP (x, 0));
6635 if (subcode == ASHIFT || subcode == ASHIFTRT
6636 || subcode == LSHIFTRT
6637 || subcode == ROTATE || subcode == ROTATERT
6639 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6641 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6642 /* Register shifts cost an extra cycle. */
6643 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6644 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6653 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6655 *total = COSTS_N_INSNS (4);
6659 operand = XEXP (x, 0);
6661 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6662 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6663 && GET_CODE (XEXP (operand, 0)) == REG
6664 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6665 *total += COSTS_N_INSNS (1);
6666 *total += (rtx_cost (XEXP (x, 1), code, speed)
6667 + rtx_cost (XEXP (x, 2), code, speed));
6671 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6673 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6679 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6680 && mode == SImode && XEXP (x, 1) == const0_rtx)
6682 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6688 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6689 && mode == SImode && XEXP (x, 1) == const0_rtx)
6691 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6711 /* SCC insns. In the case where the comparison has already been
6712 performed, then they cost 2 instructions. Otherwise they need
6713 an additional comparison before them. */
6714 *total = COSTS_N_INSNS (2);
6715 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6722 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6728 *total += COSTS_N_INSNS (1);
6729 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6730 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6732 *total += rtx_cost (XEXP (x, 0), code, speed);
6736 subcode = GET_CODE (XEXP (x, 0));
6737 if (subcode == ASHIFT || subcode == ASHIFTRT
6738 || subcode == LSHIFTRT
6739 || subcode == ROTATE || subcode == ROTATERT)
6741 *total += rtx_cost (XEXP (x, 1), code, speed);
6742 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6747 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6749 *total += rtx_cost (XEXP (x, 1), code, speed);
6750 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6760 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6761 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6762 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6763 *total += rtx_cost (XEXP (x, 1), code, speed);
6767 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6769 if (TARGET_HARD_FLOAT
6771 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6773 *total = COSTS_N_INSNS (1);
6776 *total = COSTS_N_INSNS (20);
6779 *total = COSTS_N_INSNS (1);
6781 *total += COSTS_N_INSNS (3);
6785 if (GET_MODE_CLASS (mode) == MODE_INT)
6789 *total += COSTS_N_INSNS (1);
6791 if (GET_MODE (XEXP (x, 0)) != SImode)
6795 if (GET_CODE (XEXP (x, 0)) != MEM)
6796 *total += COSTS_N_INSNS (1);
6798 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6799 *total += COSTS_N_INSNS (2);
6808 if (GET_MODE_CLASS (mode) == MODE_INT)
6811 *total += COSTS_N_INSNS (1);
6813 if (GET_MODE (XEXP (x, 0)) != SImode)
6817 if (GET_CODE (XEXP (x, 0)) != MEM)
6818 *total += COSTS_N_INSNS (1);
6820 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6821 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6828 switch (GET_MODE (XEXP (x, 0)))
6835 *total = COSTS_N_INSNS (1);
6845 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6849 if (const_ok_for_arm (INTVAL (x))
6850 || const_ok_for_arm (~INTVAL (x)))
6851 *total = COSTS_N_INSNS (1);
6853 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6854 INTVAL (x), NULL_RTX,
6861 *total = COSTS_N_INSNS (3);
6865 *total = COSTS_N_INSNS (1);
6869 *total = COSTS_N_INSNS (1);
6870 *total += rtx_cost (XEXP (x, 0), code, speed);
6874 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6875 && (mode == SFmode || !TARGET_VFP_SINGLE))
6876 *total = COSTS_N_INSNS (1);
6878 *total = COSTS_N_INSNS (4);
6882 *total = COSTS_N_INSNS (4);
6887 /* RTX costs when optimizing for size. */
6889 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6892 enum machine_mode mode = GET_MODE (x);
6895 /* XXX TBD. For now, use the standard costs. */
6896 *total = thumb1_rtx_costs (x, code, outer_code);
6900 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6904 /* A memory access costs 1 insn if the mode is small, or the address is
6905 a single register, otherwise it costs one insn per word. */
6906 if (REG_P (XEXP (x, 0)))
6907 *total = COSTS_N_INSNS (1);
6909 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6916 /* Needs a libcall, so it costs about this. */
6917 *total = COSTS_N_INSNS (2);
6921 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6923 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6931 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6933 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6936 else if (mode == SImode)
6938 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6939 /* Slightly disparage register shifts, but not by much. */
6940 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6941 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6945 /* Needs a libcall. */
6946 *total = COSTS_N_INSNS (2);
6950 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6951 && (mode == SFmode || !TARGET_VFP_SINGLE))
6953 *total = COSTS_N_INSNS (1);
6959 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6960 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6962 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6963 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6964 || subcode1 == ROTATE || subcode1 == ROTATERT
6965 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6966 || subcode1 == ASHIFTRT)
6968 /* It's just the cost of the two operands. */
6973 *total = COSTS_N_INSNS (1);
6977 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6981 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6982 && (mode == SFmode || !TARGET_VFP_SINGLE))
6984 *total = COSTS_N_INSNS (1);
6988 /* A shift as a part of ADD costs nothing. */
6989 if (GET_CODE (XEXP (x, 0)) == MULT
6990 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6992 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6994 *total += rtx_cost (XEXP (x, 1), code, false);
6999 case AND: case XOR: case IOR:
7002 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7004 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7005 || subcode == LSHIFTRT || subcode == ASHIFTRT
7006 || (code == AND && subcode == NOT))
7008 /* It's just the cost of the two operands. */
7014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7018 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7022 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7023 && (mode == SFmode || !TARGET_VFP_SINGLE))
7025 *total = COSTS_N_INSNS (1);
7031 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7040 if (cc_register (XEXP (x, 0), VOIDmode))
7043 *total = COSTS_N_INSNS (1);
7047 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7048 && (mode == SFmode || !TARGET_VFP_SINGLE))
7049 *total = COSTS_N_INSNS (1);
7051 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7056 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7058 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7059 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7062 *total += COSTS_N_INSNS (1);
7067 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7069 switch (GET_MODE (XEXP (x, 0)))
7072 *total += COSTS_N_INSNS (1);
7076 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7082 *total += COSTS_N_INSNS (2);
7087 *total += COSTS_N_INSNS (1);
7092 if (const_ok_for_arm (INTVAL (x)))
7093 /* A multiplication by a constant requires another instruction
7094 to load the constant to a register. */
7095 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7097 else if (const_ok_for_arm (~INTVAL (x)))
7098 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7099 else if (const_ok_for_arm (-INTVAL (x)))
7101 if (outer_code == COMPARE || outer_code == PLUS
7102 || outer_code == MINUS)
7105 *total = COSTS_N_INSNS (1);
7108 *total = COSTS_N_INSNS (2);
7114 *total = COSTS_N_INSNS (2);
7118 *total = COSTS_N_INSNS (4);
7123 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7124 cost of these slightly. */
7125 *total = COSTS_N_INSNS (1) + 1;
7129 if (mode != VOIDmode)
7130 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7132 *total = COSTS_N_INSNS (4); /* How knows? */
7137 /* RTX costs when optimizing for size. */
7139 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7143 return arm_size_rtx_costs (x, (enum rtx_code) code,
7144 (enum rtx_code) outer_code, total);
7146 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7147 (enum rtx_code) outer_code,
7151 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7152 supported on any "slowmul" cores, so it can be ignored. */
7155 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7156 int *total, bool speed)
7158 enum machine_mode mode = GET_MODE (x);
7162 *total = thumb1_rtx_costs (x, code, outer_code);
7169 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7172 *total = COSTS_N_INSNS (20);
7176 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7178 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7179 & (unsigned HOST_WIDE_INT) 0xffffffff);
7180 int cost, const_ok = const_ok_for_arm (i);
7181 int j, booth_unit_size;
7183 /* Tune as appropriate. */
7184 cost = const_ok ? 4 : 8;
7185 booth_unit_size = 2;
7186 for (j = 0; i && j < 32; j += booth_unit_size)
7188 i >>= booth_unit_size;
7192 *total = COSTS_N_INSNS (cost);
7193 *total += rtx_cost (XEXP (x, 0), code, speed);
7197 *total = COSTS_N_INSNS (20);
7201 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7206 /* RTX cost for cores with a fast multiply unit (M variants). */
7209 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7210 int *total, bool speed)
7212 enum machine_mode mode = GET_MODE (x);
7216 *total = thumb1_rtx_costs (x, code, outer_code);
7220 /* ??? should thumb2 use different costs? */
7224 /* There is no point basing this on the tuning, since it is always the
7225 fast variant if it exists at all. */
7227 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7228 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7229 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7231 *total = COSTS_N_INSNS(2);
7238 *total = COSTS_N_INSNS (5);
7242 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7244 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7245 & (unsigned HOST_WIDE_INT) 0xffffffff);
7246 int cost, const_ok = const_ok_for_arm (i);
7247 int j, booth_unit_size;
7249 /* Tune as appropriate. */
7250 cost = const_ok ? 4 : 8;
7251 booth_unit_size = 8;
7252 for (j = 0; i && j < 32; j += booth_unit_size)
7254 i >>= booth_unit_size;
7258 *total = COSTS_N_INSNS(cost);
7264 *total = COSTS_N_INSNS (4);
7268 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7270 if (TARGET_HARD_FLOAT
7272 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7274 *total = COSTS_N_INSNS (1);
7279 /* Requires a lib call */
7280 *total = COSTS_N_INSNS (20);
7284 return arm_rtx_costs_1 (x, outer_code, total, speed);
7289 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7290 so it can be ignored. */
7293 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7295 enum machine_mode mode = GET_MODE (x);
7299 *total = thumb1_rtx_costs (x, code, outer_code);
7306 if (GET_CODE (XEXP (x, 0)) != MULT)
7307 return arm_rtx_costs_1 (x, outer_code, total, speed);
7309 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7310 will stall until the multiplication is complete. */
7311 *total = COSTS_N_INSNS (3);
7315 /* There is no point basing this on the tuning, since it is always the
7316 fast variant if it exists at all. */
7318 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7319 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7320 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7322 *total = COSTS_N_INSNS (2);
7329 *total = COSTS_N_INSNS (5);
7333 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7335 /* If operand 1 is a constant we can more accurately
7336 calculate the cost of the multiply. The multiplier can
7337 retire 15 bits on the first cycle and a further 12 on the
7338 second. We do, of course, have to load the constant into
7339 a register first. */
7340 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7341 /* There's a general overhead of one cycle. */
7343 unsigned HOST_WIDE_INT masked_const;
7348 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7350 masked_const = i & 0xffff8000;
7351 if (masked_const != 0)
7354 masked_const = i & 0xf8000000;
7355 if (masked_const != 0)
7358 *total = COSTS_N_INSNS (cost);
7364 *total = COSTS_N_INSNS (3);
7368 /* Requires a lib call */
7369 *total = COSTS_N_INSNS (20);
7373 return arm_rtx_costs_1 (x, outer_code, total, speed);
7378 /* RTX costs for 9e (and later) cores. */
7381 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7382 int *total, bool speed)
7384 enum machine_mode mode = GET_MODE (x);
7391 *total = COSTS_N_INSNS (3);
7395 *total = thumb1_rtx_costs (x, code, outer_code);
7403 /* There is no point basing this on the tuning, since it is always the
7404 fast variant if it exists at all. */
7406 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7407 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7408 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7410 *total = COSTS_N_INSNS (2);
7417 *total = COSTS_N_INSNS (5);
7423 *total = COSTS_N_INSNS (2);
7427 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7429 if (TARGET_HARD_FLOAT
7431 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7433 *total = COSTS_N_INSNS (1);
7438 *total = COSTS_N_INSNS (20);
7442 return arm_rtx_costs_1 (x, outer_code, total, speed);
7445 /* All address computations that can be done are free, but rtx cost returns
7446 the same for practically all of them. So we weight the different types
7447 of address here in the order (most pref first):
7448 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7450 arm_arm_address_cost (rtx x)
7452 enum rtx_code c = GET_CODE (x);
7454 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7456 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7461 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7464 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7474 arm_thumb_address_cost (rtx x)
7476 enum rtx_code c = GET_CODE (x);
7481 && GET_CODE (XEXP (x, 0)) == REG
7482 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7489 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7491 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7495 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7499 /* Some true dependencies can have a higher cost depending
7500 on precisely how certain input operands are used. */
7502 && REG_NOTE_KIND (link) == 0
7503 && recog_memoized (insn) >= 0
7504 && recog_memoized (dep) >= 0)
7506 int shift_opnum = get_attr_shift (insn);
7507 enum attr_type attr_type = get_attr_type (dep);
7509 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7510 operand for INSN. If we have a shifted input operand and the
7511 instruction we depend on is another ALU instruction, then we may
7512 have to account for an additional stall. */
7513 if (shift_opnum != 0
7514 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7516 rtx shifted_operand;
7519 /* Get the shifted operand. */
7520 extract_insn (insn);
7521 shifted_operand = recog_data.operand[shift_opnum];
7523 /* Iterate over all the operands in DEP. If we write an operand
7524 that overlaps with SHIFTED_OPERAND, then we have increase the
7525 cost of this dependency. */
7527 preprocess_constraints ();
7528 for (opno = 0; opno < recog_data.n_operands; opno++)
7530 /* We can ignore strict inputs. */
7531 if (recog_data.operand_type[opno] == OP_IN)
7534 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7541 /* XXX This is not strictly true for the FPA. */
7542 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7543 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7546 /* Call insns don't incur a stall, even if they follow a load. */
7547 if (REG_NOTE_KIND (link) == 0
7548 && GET_CODE (insn) == CALL_INSN)
7551 if ((i_pat = single_set (insn)) != NULL
7552 && GET_CODE (SET_SRC (i_pat)) == MEM
7553 && (d_pat = single_set (dep)) != NULL
7554 && GET_CODE (SET_DEST (d_pat)) == MEM)
7556 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7557 /* This is a load after a store, there is no conflict if the load reads
7558 from a cached area. Assume that loads from the stack, and from the
7559 constant pool are cached, and that others will miss. This is a
7562 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7563 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7564 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7565 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7572 static int fp_consts_inited = 0;
7574 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7575 static const char * const strings_fp[8] =
7578 "4", "5", "0.5", "10"
7581 static REAL_VALUE_TYPE values_fp[8];
7584 init_fp_table (void)
7590 fp_consts_inited = 1;
7592 fp_consts_inited = 8;
7594 for (i = 0; i < fp_consts_inited; i++)
7596 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7601 /* Return TRUE if rtx X is a valid immediate FP constant. */
7603 arm_const_double_rtx (rtx x)
7608 if (!fp_consts_inited)
7611 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7612 if (REAL_VALUE_MINUS_ZERO (r))
7615 for (i = 0; i < fp_consts_inited; i++)
7616 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7622 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7624 neg_const_double_rtx_ok_for_fpa (rtx x)
7629 if (!fp_consts_inited)
7632 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7633 r = REAL_VALUE_NEGATE (r);
7634 if (REAL_VALUE_MINUS_ZERO (r))
7637 for (i = 0; i < 8; i++)
7638 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7645 /* VFPv3 has a fairly wide range of representable immediates, formed from
7646 "quarter-precision" floating-point values. These can be evaluated using this
7647 formula (with ^ for exponentiation):
7651 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7652 16 <= n <= 31 and 0 <= r <= 7.
7654 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7656 - A (most-significant) is the sign bit.
7657 - BCD are the exponent (encoded as r XOR 3).
7658 - EFGH are the mantissa (encoded as n - 16).
7661 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7662 fconst[sd] instruction, or -1 if X isn't suitable. */
7664 vfp3_const_double_index (rtx x)
7666 REAL_VALUE_TYPE r, m;
7668 unsigned HOST_WIDE_INT mantissa, mant_hi;
7669 unsigned HOST_WIDE_INT mask;
7670 HOST_WIDE_INT m1, m2;
7671 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7673 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7678 /* We can't represent these things, so detect them first. */
7679 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7682 /* Extract sign, exponent and mantissa. */
7683 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7684 r = REAL_VALUE_ABS (r);
7685 exponent = REAL_EXP (&r);
7686 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7687 highest (sign) bit, with a fixed binary point at bit point_pos.
7688 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7689 bits for the mantissa, this may fail (low bits would be lost). */
7690 real_ldexp (&m, &r, point_pos - exponent);
7691 REAL_VALUE_TO_INT (&m1, &m2, m);
7695 /* If there are bits set in the low part of the mantissa, we can't
7696 represent this value. */
7700 /* Now make it so that mantissa contains the most-significant bits, and move
7701 the point_pos to indicate that the least-significant bits have been
7703 point_pos -= HOST_BITS_PER_WIDE_INT;
7706 /* We can permit four significant bits of mantissa only, plus a high bit
7707 which is always 1. */
7708 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7709 if ((mantissa & mask) != 0)
7712 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7713 mantissa >>= point_pos - 5;
7715 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7716 floating-point immediate zero with Neon using an integer-zero load, but
7717 that case is handled elsewhere.) */
7721 gcc_assert (mantissa >= 16 && mantissa <= 31);
7723 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7724 normalized significands are in the range [1, 2). (Our mantissa is shifted
7725 left 4 places at this point relative to normalized IEEE754 values). GCC
7726 internally uses [0.5, 1) (see real.c), so the exponent returned from
7727 REAL_EXP must be altered. */
7728 exponent = 5 - exponent;
7730 if (exponent < 0 || exponent > 7)
7733 /* Sign, mantissa and exponent are now in the correct form to plug into the
7734 formula described in the comment above. */
7735 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7738 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7740 vfp3_const_double_rtx (rtx x)
7745 return vfp3_const_double_index (x) != -1;
7748 /* Recognize immediates which can be used in various Neon instructions. Legal
7749 immediates are described by the following table (for VMVN variants, the
7750 bitwise inverse of the constant shown is recognized. In either case, VMOV
7751 is output and the correct instruction to use for a given constant is chosen
7752 by the assembler). The constant shown is replicated across all elements of
7753 the destination vector.
7755 insn elems variant constant (binary)
7756 ---- ----- ------- -----------------
7757 vmov i32 0 00000000 00000000 00000000 abcdefgh
7758 vmov i32 1 00000000 00000000 abcdefgh 00000000
7759 vmov i32 2 00000000 abcdefgh 00000000 00000000
7760 vmov i32 3 abcdefgh 00000000 00000000 00000000
7761 vmov i16 4 00000000 abcdefgh
7762 vmov i16 5 abcdefgh 00000000
7763 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7764 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7765 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7766 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7767 vmvn i16 10 00000000 abcdefgh
7768 vmvn i16 11 abcdefgh 00000000
7769 vmov i32 12 00000000 00000000 abcdefgh 11111111
7770 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7771 vmov i32 14 00000000 abcdefgh 11111111 11111111
7772 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7774 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7775 eeeeeeee ffffffff gggggggg hhhhhhhh
7776 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7778 For case 18, B = !b. Representable values are exactly those accepted by
7779 vfp3_const_double_index, but are output as floating-point numbers rather
7782 Variants 0-5 (inclusive) may also be used as immediates for the second
7783 operand of VORR/VBIC instructions.
7785 The INVERSE argument causes the bitwise inverse of the given operand to be
7786 recognized instead (used for recognizing legal immediates for the VAND/VORN
7787 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7788 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7789 output, rather than the real insns vbic/vorr).
7791 INVERSE makes no difference to the recognition of float vectors.
7793 The return value is the variant of immediate as shown in the above table, or
7794 -1 if the given value doesn't match any of the listed patterns.
7797 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7798 rtx *modconst, int *elementwidth)
7800 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7802 for (i = 0; i < idx; i += (STRIDE)) \
7807 immtype = (CLASS); \
7808 elsize = (ELSIZE); \
7812 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7813 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7814 unsigned char bytes[16];
7815 int immtype = -1, matches;
7816 unsigned int invmask = inverse ? 0xff : 0;
7818 /* Vectors of float constants. */
7819 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7821 rtx el0 = CONST_VECTOR_ELT (op, 0);
7824 if (!vfp3_const_double_rtx (el0))
7827 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7829 for (i = 1; i < n_elts; i++)
7831 rtx elt = CONST_VECTOR_ELT (op, i);
7834 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7836 if (!REAL_VALUES_EQUAL (r0, re))
7841 *modconst = CONST_VECTOR_ELT (op, 0);
7849 /* Splat vector constant out into a byte vector. */
7850 for (i = 0; i < n_elts; i++)
7852 rtx el = CONST_VECTOR_ELT (op, i);
7853 unsigned HOST_WIDE_INT elpart;
7854 unsigned int part, parts;
7856 if (GET_CODE (el) == CONST_INT)
7858 elpart = INTVAL (el);
7861 else if (GET_CODE (el) == CONST_DOUBLE)
7863 elpart = CONST_DOUBLE_LOW (el);
7869 for (part = 0; part < parts; part++)
7872 for (byte = 0; byte < innersize; byte++)
7874 bytes[idx++] = (elpart & 0xff) ^ invmask;
7875 elpart >>= BITS_PER_UNIT;
7877 if (GET_CODE (el) == CONST_DOUBLE)
7878 elpart = CONST_DOUBLE_HIGH (el);
7883 gcc_assert (idx == GET_MODE_SIZE (mode));
7887 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7888 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7890 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7891 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7893 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7894 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7896 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7897 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7899 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7901 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7903 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7904 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7906 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7907 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7909 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7910 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7912 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7913 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7915 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7917 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7919 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7920 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7922 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7923 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7925 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7926 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7928 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7929 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7931 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7933 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7934 && bytes[i] == bytes[(i + 8) % idx]);
7942 *elementwidth = elsize;
7946 unsigned HOST_WIDE_INT imm = 0;
7948 /* Un-invert bytes of recognized vector, if necessary. */
7950 for (i = 0; i < idx; i++)
7951 bytes[i] ^= invmask;
7955 /* FIXME: Broken on 32-bit H_W_I hosts. */
7956 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7958 for (i = 0; i < 8; i++)
7959 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7960 << (i * BITS_PER_UNIT);
7962 *modconst = GEN_INT (imm);
7966 unsigned HOST_WIDE_INT imm = 0;
7968 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7969 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7971 *modconst = GEN_INT (imm);
7979 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7980 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7981 float elements), and a modified constant (whatever should be output for a
7982 VMOV) in *MODCONST. */
7985 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7986 rtx *modconst, int *elementwidth)
7990 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7996 *modconst = tmpconst;
7999 *elementwidth = tmpwidth;
8004 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8005 the immediate is valid, write a constant suitable for using as an operand
8006 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8007 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8010 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8011 rtx *modconst, int *elementwidth)
8015 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8017 if (retval < 0 || retval > 5)
8021 *modconst = tmpconst;
8024 *elementwidth = tmpwidth;
8029 /* Return a string suitable for output of Neon immediate logic operation
8033 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8034 int inverse, int quad)
8036 int width, is_valid;
8037 static char templ[40];
8039 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8041 gcc_assert (is_valid != 0);
8044 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8046 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8051 /* Output a sequence of pairwise operations to implement a reduction.
8052 NOTE: We do "too much work" here, because pairwise operations work on two
8053 registers-worth of operands in one go. Unfortunately we can't exploit those
8054 extra calculations to do the full operation in fewer steps, I don't think.
8055 Although all vector elements of the result but the first are ignored, we
8056 actually calculate the same result in each of the elements. An alternative
8057 such as initially loading a vector with zero to use as each of the second
8058 operands would use up an additional register and take an extra instruction,
8059 for no particular gain. */
8062 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8063 rtx (*reduc) (rtx, rtx, rtx))
8065 enum machine_mode inner = GET_MODE_INNER (mode);
8066 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8069 for (i = parts / 2; i >= 1; i /= 2)
8071 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8072 emit_insn (reduc (dest, tmpsum, tmpsum));
8077 /* If VALS is a vector constant that can be loaded into a register
8078 using VDUP, generate instructions to do so and return an RTX to
8079 assign to the register. Otherwise return NULL_RTX. */
8082 neon_vdup_constant (rtx vals)
8084 enum machine_mode mode = GET_MODE (vals);
8085 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8086 int n_elts = GET_MODE_NUNITS (mode);
8087 bool all_same = true;
8091 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8094 for (i = 0; i < n_elts; ++i)
8096 x = XVECEXP (vals, 0, i);
8097 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8102 /* The elements are not all the same. We could handle repeating
8103 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8104 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8108 /* We can load this constant by using VDUP and a constant in a
8109 single ARM register. This will be cheaper than a vector
8112 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8113 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8117 /* Generate code to load VALS, which is a PARALLEL containing only
8118 constants (for vec_init) or CONST_VECTOR, efficiently into a
8119 register. Returns an RTX to copy into the register, or NULL_RTX
8120 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8123 neon_make_constant (rtx vals)
8125 enum machine_mode mode = GET_MODE (vals);
8127 rtx const_vec = NULL_RTX;
8128 int n_elts = GET_MODE_NUNITS (mode);
8132 if (GET_CODE (vals) == CONST_VECTOR)
8134 else if (GET_CODE (vals) == PARALLEL)
8136 /* A CONST_VECTOR must contain only CONST_INTs and
8137 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8138 Only store valid constants in a CONST_VECTOR. */
8139 for (i = 0; i < n_elts; ++i)
8141 rtx x = XVECEXP (vals, 0, i);
8142 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8145 if (n_const == n_elts)
8146 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8151 if (const_vec != NULL
8152 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8153 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8155 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8156 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8157 pipeline cycle; creating the constant takes one or two ARM
8160 else if (const_vec != NULL_RTX)
8161 /* Load from constant pool. On Cortex-A8 this takes two cycles
8162 (for either double or quad vectors). We can not take advantage
8163 of single-cycle VLD1 because we need a PC-relative addressing
8167 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8168 We can not construct an initializer. */
8172 /* Initialize vector TARGET to VALS. */
8175 neon_expand_vector_init (rtx target, rtx vals)
8177 enum machine_mode mode = GET_MODE (target);
8178 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8179 int n_elts = GET_MODE_NUNITS (mode);
8180 int n_var = 0, one_var = -1;
8181 bool all_same = true;
8185 for (i = 0; i < n_elts; ++i)
8187 x = XVECEXP (vals, 0, i);
8188 if (!CONSTANT_P (x))
8189 ++n_var, one_var = i;
8191 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8197 rtx constant = neon_make_constant (vals);
8198 if (constant != NULL_RTX)
8200 emit_move_insn (target, constant);
8205 /* Splat a single non-constant element if we can. */
8206 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8208 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8209 emit_insn (gen_rtx_SET (VOIDmode, target,
8210 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8215 /* One field is non-constant. Load constant then overwrite varying
8216 field. This is more efficient than using the stack. */
8219 rtx copy = copy_rtx (vals);
8222 /* Load constant part of vector, substitute neighboring value for
8224 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8225 neon_expand_vector_init (target, copy);
8227 /* Insert variable. */
8228 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8229 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8230 emit_insn (gen_rtx_SET (VOIDmode, target,
8231 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8235 /* Construct the vector in memory one field at a time
8236 and load the whole vector. */
8237 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8238 for (i = 0; i < n_elts; i++)
8239 emit_move_insn (adjust_address_nv (mem, inner_mode,
8240 i * GET_MODE_SIZE (inner_mode)),
8241 XVECEXP (vals, 0, i));
8242 emit_move_insn (target, mem);
8245 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8246 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8247 reported source locations are bogus. */
8250 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8255 gcc_assert (GET_CODE (operand) == CONST_INT);
8257 lane = INTVAL (operand);
8259 if (lane < low || lane >= high)
8263 /* Bounds-check lanes. */
8266 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8268 bounds_check (operand, low, high, "lane out of range");
8271 /* Bounds-check constants. */
8274 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8276 bounds_check (operand, low, high, "constant out of range");
8280 neon_element_bits (enum machine_mode mode)
8283 return GET_MODE_BITSIZE (mode);
8285 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8289 /* Predicates for `match_operand' and `match_operator'. */
8291 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8293 cirrus_memory_offset (rtx op)
8295 /* Reject eliminable registers. */
8296 if (! (reload_in_progress || reload_completed)
8297 && ( reg_mentioned_p (frame_pointer_rtx, op)
8298 || reg_mentioned_p (arg_pointer_rtx, op)
8299 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8300 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8301 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8302 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8305 if (GET_CODE (op) == MEM)
8311 /* Match: (mem (reg)). */
8312 if (GET_CODE (ind) == REG)
8318 if (GET_CODE (ind) == PLUS
8319 && GET_CODE (XEXP (ind, 0)) == REG
8320 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8321 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8328 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8329 WB is true if full writeback address modes are allowed and is false
8330 if limited writeback address modes (POST_INC and PRE_DEC) are
8334 arm_coproc_mem_operand (rtx op, bool wb)
8338 /* Reject eliminable registers. */
8339 if (! (reload_in_progress || reload_completed)
8340 && ( reg_mentioned_p (frame_pointer_rtx, op)
8341 || reg_mentioned_p (arg_pointer_rtx, op)
8342 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8343 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8344 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8345 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8348 /* Constants are converted into offsets from labels. */
8349 if (GET_CODE (op) != MEM)
8354 if (reload_completed
8355 && (GET_CODE (ind) == LABEL_REF
8356 || (GET_CODE (ind) == CONST
8357 && GET_CODE (XEXP (ind, 0)) == PLUS
8358 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8359 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8362 /* Match: (mem (reg)). */
8363 if (GET_CODE (ind) == REG)
8364 return arm_address_register_rtx_p (ind, 0);
8366 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8367 acceptable in any case (subject to verification by
8368 arm_address_register_rtx_p). We need WB to be true to accept
8369 PRE_INC and POST_DEC. */
8370 if (GET_CODE (ind) == POST_INC
8371 || GET_CODE (ind) == PRE_DEC
8373 && (GET_CODE (ind) == PRE_INC
8374 || GET_CODE (ind) == POST_DEC)))
8375 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8378 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8379 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8380 && GET_CODE (XEXP (ind, 1)) == PLUS
8381 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8382 ind = XEXP (ind, 1);
8387 if (GET_CODE (ind) == PLUS
8388 && GET_CODE (XEXP (ind, 0)) == REG
8389 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8390 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8391 && INTVAL (XEXP (ind, 1)) > -1024
8392 && INTVAL (XEXP (ind, 1)) < 1024
8393 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8399 /* Return TRUE if OP is a memory operand which we can load or store a vector
8400 to/from. TYPE is one of the following values:
8401 0 - Vector load/stor (vldr)
8402 1 - Core registers (ldm)
8403 2 - Element/structure loads (vld1)
8406 neon_vector_mem_operand (rtx op, int type)
8410 /* Reject eliminable registers. */
8411 if (! (reload_in_progress || reload_completed)
8412 && ( reg_mentioned_p (frame_pointer_rtx, op)
8413 || reg_mentioned_p (arg_pointer_rtx, op)
8414 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8415 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8416 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8417 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8420 /* Constants are converted into offsets from labels. */
8421 if (GET_CODE (op) != MEM)
8426 if (reload_completed
8427 && (GET_CODE (ind) == LABEL_REF
8428 || (GET_CODE (ind) == CONST
8429 && GET_CODE (XEXP (ind, 0)) == PLUS
8430 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8431 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8434 /* Match: (mem (reg)). */
8435 if (GET_CODE (ind) == REG)
8436 return arm_address_register_rtx_p (ind, 0);
8438 /* Allow post-increment with Neon registers. */
8439 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8440 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8442 /* FIXME: vld1 allows register post-modify. */
8448 && GET_CODE (ind) == PLUS
8449 && GET_CODE (XEXP (ind, 0)) == REG
8450 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8451 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8452 && INTVAL (XEXP (ind, 1)) > -1024
8453 && INTVAL (XEXP (ind, 1)) < 1016
8454 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8460 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8463 neon_struct_mem_operand (rtx op)
8467 /* Reject eliminable registers. */
8468 if (! (reload_in_progress || reload_completed)
8469 && ( reg_mentioned_p (frame_pointer_rtx, op)
8470 || reg_mentioned_p (arg_pointer_rtx, op)
8471 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8472 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8473 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8474 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8477 /* Constants are converted into offsets from labels. */
8478 if (GET_CODE (op) != MEM)
8483 if (reload_completed
8484 && (GET_CODE (ind) == LABEL_REF
8485 || (GET_CODE (ind) == CONST
8486 && GET_CODE (XEXP (ind, 0)) == PLUS
8487 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8488 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8491 /* Match: (mem (reg)). */
8492 if (GET_CODE (ind) == REG)
8493 return arm_address_register_rtx_p (ind, 0);
8498 /* Return true if X is a register that will be eliminated later on. */
8500 arm_eliminable_register (rtx x)
8502 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8503 || REGNO (x) == ARG_POINTER_REGNUM
8504 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8505 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8508 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8509 coprocessor registers. Otherwise return NO_REGS. */
8512 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8516 if (!TARGET_NEON_FP16)
8517 return GENERAL_REGS;
8518 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8520 return GENERAL_REGS;
8524 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8525 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8526 && neon_vector_mem_operand (x, 0))
8529 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8532 return GENERAL_REGS;
8535 /* Values which must be returned in the most-significant end of the return
8539 arm_return_in_msb (const_tree valtype)
8541 return (TARGET_AAPCS_BASED
8543 && (AGGREGATE_TYPE_P (valtype)
8544 || TREE_CODE (valtype) == COMPLEX_TYPE));
8547 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8548 Use by the Cirrus Maverick code which has to workaround
8549 a hardware bug triggered by such instructions. */
8551 arm_memory_load_p (rtx insn)
8553 rtx body, lhs, rhs;;
8555 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8558 body = PATTERN (insn);
8560 if (GET_CODE (body) != SET)
8563 lhs = XEXP (body, 0);
8564 rhs = XEXP (body, 1);
8566 lhs = REG_OR_SUBREG_RTX (lhs);
8568 /* If the destination is not a general purpose
8569 register we do not have to worry. */
8570 if (GET_CODE (lhs) != REG
8571 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8574 /* As well as loads from memory we also have to react
8575 to loads of invalid constants which will be turned
8576 into loads from the minipool. */
8577 return (GET_CODE (rhs) == MEM
8578 || GET_CODE (rhs) == SYMBOL_REF
8579 || note_invalid_constants (insn, -1, false));
8582 /* Return TRUE if INSN is a Cirrus instruction. */
8584 arm_cirrus_insn_p (rtx insn)
8586 enum attr_cirrus attr;
8588 /* get_attr cannot accept USE or CLOBBER. */
8590 || GET_CODE (insn) != INSN
8591 || GET_CODE (PATTERN (insn)) == USE
8592 || GET_CODE (PATTERN (insn)) == CLOBBER)
8595 attr = get_attr_cirrus (insn);
8597 return attr != CIRRUS_NOT;
8600 /* Cirrus reorg for invalid instruction combinations. */
8602 cirrus_reorg (rtx first)
8604 enum attr_cirrus attr;
8605 rtx body = PATTERN (first);
8609 /* Any branch must be followed by 2 non Cirrus instructions. */
8610 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8613 t = next_nonnote_insn (first);
8615 if (arm_cirrus_insn_p (t))
8618 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8622 emit_insn_after (gen_nop (), first);
8627 /* (float (blah)) is in parallel with a clobber. */
8628 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8629 body = XVECEXP (body, 0, 0);
8631 if (GET_CODE (body) == SET)
8633 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8635 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8636 be followed by a non Cirrus insn. */
8637 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8639 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8640 emit_insn_after (gen_nop (), first);
8644 else if (arm_memory_load_p (first))
8646 unsigned int arm_regno;
8648 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8649 ldr/cfmv64hr combination where the Rd field is the same
8650 in both instructions must be split with a non Cirrus
8657 /* Get Arm register number for ldr insn. */
8658 if (GET_CODE (lhs) == REG)
8659 arm_regno = REGNO (lhs);
8662 gcc_assert (GET_CODE (rhs) == REG);
8663 arm_regno = REGNO (rhs);
8667 first = next_nonnote_insn (first);
8669 if (! arm_cirrus_insn_p (first))
8672 body = PATTERN (first);
8674 /* (float (blah)) is in parallel with a clobber. */
8675 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8676 body = XVECEXP (body, 0, 0);
8678 if (GET_CODE (body) == FLOAT)
8679 body = XEXP (body, 0);
8681 if (get_attr_cirrus (first) == CIRRUS_MOVE
8682 && GET_CODE (XEXP (body, 1)) == REG
8683 && arm_regno == REGNO (XEXP (body, 1)))
8684 emit_insn_after (gen_nop (), first);
8690 /* get_attr cannot accept USE or CLOBBER. */
8692 || GET_CODE (first) != INSN
8693 || GET_CODE (PATTERN (first)) == USE
8694 || GET_CODE (PATTERN (first)) == CLOBBER)
8697 attr = get_attr_cirrus (first);
8699 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8700 must be followed by a non-coprocessor instruction. */
8701 if (attr == CIRRUS_COMPARE)
8705 t = next_nonnote_insn (first);
8707 if (arm_cirrus_insn_p (t))
8710 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8714 emit_insn_after (gen_nop (), first);
8720 /* Return TRUE if X references a SYMBOL_REF. */
8722 symbol_mentioned_p (rtx x)
8727 if (GET_CODE (x) == SYMBOL_REF)
8730 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8731 are constant offsets, not symbols. */
8732 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8735 fmt = GET_RTX_FORMAT (GET_CODE (x));
8737 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8743 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8744 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8747 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8754 /* Return TRUE if X references a LABEL_REF. */
8756 label_mentioned_p (rtx x)
8761 if (GET_CODE (x) == LABEL_REF)
8764 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8765 instruction, but they are constant offsets, not symbols. */
8766 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8769 fmt = GET_RTX_FORMAT (GET_CODE (x));
8770 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8776 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8777 if (label_mentioned_p (XVECEXP (x, i, j)))
8780 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8788 tls_mentioned_p (rtx x)
8790 switch (GET_CODE (x))
8793 return tls_mentioned_p (XEXP (x, 0));
8796 if (XINT (x, 1) == UNSPEC_TLS)
8804 /* Must not copy a SET whose source operand is PC-relative. */
8807 arm_cannot_copy_insn_p (rtx insn)
8809 rtx pat = PATTERN (insn);
8811 if (GET_CODE (pat) == SET)
8813 rtx rhs = SET_SRC (pat);
8815 if (GET_CODE (rhs) == UNSPEC
8816 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8819 if (GET_CODE (rhs) == MEM
8820 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8821 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8831 enum rtx_code code = GET_CODE (x);
8848 /* Return 1 if memory locations are adjacent. */
8850 adjacent_mem_locations (rtx a, rtx b)
8852 /* We don't guarantee to preserve the order of these memory refs. */
8853 if (volatile_refs_p (a) || volatile_refs_p (b))
8856 if ((GET_CODE (XEXP (a, 0)) == REG
8857 || (GET_CODE (XEXP (a, 0)) == PLUS
8858 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8859 && (GET_CODE (XEXP (b, 0)) == REG
8860 || (GET_CODE (XEXP (b, 0)) == PLUS
8861 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8863 HOST_WIDE_INT val0 = 0, val1 = 0;
8867 if (GET_CODE (XEXP (a, 0)) == PLUS)
8869 reg0 = XEXP (XEXP (a, 0), 0);
8870 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8875 if (GET_CODE (XEXP (b, 0)) == PLUS)
8877 reg1 = XEXP (XEXP (b, 0), 0);
8878 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8883 /* Don't accept any offset that will require multiple
8884 instructions to handle, since this would cause the
8885 arith_adjacentmem pattern to output an overlong sequence. */
8886 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8889 /* Don't allow an eliminable register: register elimination can make
8890 the offset too large. */
8891 if (arm_eliminable_register (reg0))
8894 val_diff = val1 - val0;
8898 /* If the target has load delay slots, then there's no benefit
8899 to using an ldm instruction unless the offset is zero and
8900 we are optimizing for size. */
8901 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8902 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8903 && (val_diff == 4 || val_diff == -4));
8906 return ((REGNO (reg0) == REGNO (reg1))
8907 && (val_diff == 4 || val_diff == -4));
8914 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8915 HOST_WIDE_INT *load_offset)
8917 int unsorted_regs[4];
8918 HOST_WIDE_INT unsorted_offsets[4];
8923 /* Can only handle 2, 3, or 4 insns at present,
8924 though could be easily extended if required. */
8925 gcc_assert (nops >= 2 && nops <= 4);
8927 memset (order, 0, 4 * sizeof (int));
8929 /* Loop over the operands and check that the memory references are
8930 suitable (i.e. immediate offsets from the same base register). At
8931 the same time, extract the target register, and the memory
8933 for (i = 0; i < nops; i++)
8938 /* Convert a subreg of a mem into the mem itself. */
8939 if (GET_CODE (operands[nops + i]) == SUBREG)
8940 operands[nops + i] = alter_subreg (operands + (nops + i));
8942 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8944 /* Don't reorder volatile memory references; it doesn't seem worth
8945 looking for the case where the order is ok anyway. */
8946 if (MEM_VOLATILE_P (operands[nops + i]))
8949 offset = const0_rtx;
8951 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8952 || (GET_CODE (reg) == SUBREG
8953 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8954 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8955 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8957 || (GET_CODE (reg) == SUBREG
8958 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8959 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8964 base_reg = REGNO (reg);
8965 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8966 ? REGNO (operands[i])
8967 : REGNO (SUBREG_REG (operands[i])));
8972 if (base_reg != (int) REGNO (reg))
8973 /* Not addressed from the same base register. */
8976 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8977 ? REGNO (operands[i])
8978 : REGNO (SUBREG_REG (operands[i])));
8979 if (unsorted_regs[i] < unsorted_regs[order[0]])
8983 /* If it isn't an integer register, or if it overwrites the
8984 base register but isn't the last insn in the list, then
8985 we can't do this. */
8986 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8987 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8990 unsorted_offsets[i] = INTVAL (offset);
8993 /* Not a suitable memory address. */
8997 /* All the useful information has now been extracted from the
8998 operands into unsorted_regs and unsorted_offsets; additionally,
8999 order[0] has been set to the lowest numbered register in the
9000 list. Sort the registers into order, and check that the memory
9001 offsets are ascending and adjacent. */
9003 for (i = 1; i < nops; i++)
9007 order[i] = order[i - 1];
9008 for (j = 0; j < nops; j++)
9009 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9010 && (order[i] == order[i - 1]
9011 || unsorted_regs[j] < unsorted_regs[order[i]]))
9014 /* Have we found a suitable register? if not, one must be used more
9016 if (order[i] == order[i - 1])
9019 /* Is the memory address adjacent and ascending? */
9020 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9028 for (i = 0; i < nops; i++)
9029 regs[i] = unsorted_regs[order[i]];
9031 *load_offset = unsorted_offsets[order[0]];
9034 if (unsorted_offsets[order[0]] == 0)
9035 return 1; /* ldmia */
9037 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9038 return 2; /* ldmib */
9040 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9041 return 3; /* ldmda */
9043 if (unsorted_offsets[order[nops - 1]] == -4)
9044 return 4; /* ldmdb */
9046 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9047 if the offset isn't small enough. The reason 2 ldrs are faster
9048 is because these ARMs are able to do more than one cache access
9049 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9050 whilst the ARM8 has a double bandwidth cache. This means that
9051 these cores can do both an instruction fetch and a data fetch in
9052 a single cycle, so the trick of calculating the address into a
9053 scratch register (one of the result regs) and then doing a load
9054 multiple actually becomes slower (and no smaller in code size).
9055 That is the transformation
9057 ldr rd1, [rbase + offset]
9058 ldr rd2, [rbase + offset + 4]
9062 add rd1, rbase, offset
9063 ldmia rd1, {rd1, rd2}
9065 produces worse code -- '3 cycles + any stalls on rd2' instead of
9066 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9067 access per cycle, the first sequence could never complete in less
9068 than 6 cycles, whereas the ldm sequence would only take 5 and
9069 would make better use of sequential accesses if not hitting the
9072 We cheat here and test 'arm_ld_sched' which we currently know to
9073 only be true for the ARM8, ARM9 and StrongARM. If this ever
9074 changes, then the test below needs to be reworked. */
9075 if (nops == 2 && arm_ld_sched)
9078 /* Can't do it without setting up the offset, only do this if it takes
9079 no more than one insn. */
9080 return (const_ok_for_arm (unsorted_offsets[order[0]])
9081 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9085 emit_ldm_seq (rtx *operands, int nops)
9089 HOST_WIDE_INT offset;
9093 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9096 strcpy (buf, "ldm%(ia%)\t");
9100 strcpy (buf, "ldm%(ib%)\t");
9104 strcpy (buf, "ldm%(da%)\t");
9108 strcpy (buf, "ldm%(db%)\t");
9113 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9114 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9117 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9118 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9120 output_asm_insn (buf, operands);
9122 strcpy (buf, "ldm%(ia%)\t");
9129 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9130 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9132 for (i = 1; i < nops; i++)
9133 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9134 reg_names[regs[i]]);
9136 strcat (buf, "}\t%@ phole ldm");
9138 output_asm_insn (buf, operands);
9143 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9144 HOST_WIDE_INT * load_offset)
9146 int unsorted_regs[4];
9147 HOST_WIDE_INT unsorted_offsets[4];
9152 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9153 extended if required. */
9154 gcc_assert (nops >= 2 && nops <= 4);
9156 memset (order, 0, 4 * sizeof (int));
9158 /* Loop over the operands and check that the memory references are
9159 suitable (i.e. immediate offsets from the same base register). At
9160 the same time, extract the target register, and the memory
9162 for (i = 0; i < nops; i++)
9167 /* Convert a subreg of a mem into the mem itself. */
9168 if (GET_CODE (operands[nops + i]) == SUBREG)
9169 operands[nops + i] = alter_subreg (operands + (nops + i));
9171 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9173 /* Don't reorder volatile memory references; it doesn't seem worth
9174 looking for the case where the order is ok anyway. */
9175 if (MEM_VOLATILE_P (operands[nops + i]))
9178 offset = const0_rtx;
9180 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9181 || (GET_CODE (reg) == SUBREG
9182 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9183 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9184 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9186 || (GET_CODE (reg) == SUBREG
9187 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9188 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9193 base_reg = REGNO (reg);
9194 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9195 ? REGNO (operands[i])
9196 : REGNO (SUBREG_REG (operands[i])));
9201 if (base_reg != (int) REGNO (reg))
9202 /* Not addressed from the same base register. */
9205 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9206 ? REGNO (operands[i])
9207 : REGNO (SUBREG_REG (operands[i])));
9208 if (unsorted_regs[i] < unsorted_regs[order[0]])
9212 /* If it isn't an integer register, then we can't do this. */
9213 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9216 unsorted_offsets[i] = INTVAL (offset);
9219 /* Not a suitable memory address. */
9223 /* All the useful information has now been extracted from the
9224 operands into unsorted_regs and unsorted_offsets; additionally,
9225 order[0] has been set to the lowest numbered register in the
9226 list. Sort the registers into order, and check that the memory
9227 offsets are ascending and adjacent. */
9229 for (i = 1; i < nops; i++)
9233 order[i] = order[i - 1];
9234 for (j = 0; j < nops; j++)
9235 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9236 && (order[i] == order[i - 1]
9237 || unsorted_regs[j] < unsorted_regs[order[i]]))
9240 /* Have we found a suitable register? if not, one must be used more
9242 if (order[i] == order[i - 1])
9245 /* Is the memory address adjacent and ascending? */
9246 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9254 for (i = 0; i < nops; i++)
9255 regs[i] = unsorted_regs[order[i]];
9257 *load_offset = unsorted_offsets[order[0]];
9260 if (unsorted_offsets[order[0]] == 0)
9261 return 1; /* stmia */
9263 if (unsorted_offsets[order[0]] == 4)
9264 return 2; /* stmib */
9266 if (unsorted_offsets[order[nops - 1]] == 0)
9267 return 3; /* stmda */
9269 if (unsorted_offsets[order[nops - 1]] == -4)
9270 return 4; /* stmdb */
9276 emit_stm_seq (rtx *operands, int nops)
9280 HOST_WIDE_INT offset;
9284 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9287 strcpy (buf, "stm%(ia%)\t");
9291 strcpy (buf, "stm%(ib%)\t");
9295 strcpy (buf, "stm%(da%)\t");
9299 strcpy (buf, "stm%(db%)\t");
9306 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9307 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9309 for (i = 1; i < nops; i++)
9310 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9311 reg_names[regs[i]]);
9313 strcat (buf, "}\t%@ phole stm");
9315 output_asm_insn (buf, operands);
9319 /* Routines for use in generating RTL. */
9322 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9323 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9325 HOST_WIDE_INT offset = *offsetp;
9328 int sign = up ? 1 : -1;
9331 /* XScale has load-store double instructions, but they have stricter
9332 alignment requirements than load-store multiple, so we cannot
9335 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9336 the pipeline until completion.
9344 An ldr instruction takes 1-3 cycles, but does not block the
9353 Best case ldr will always win. However, the more ldr instructions
9354 we issue, the less likely we are to be able to schedule them well.
9355 Using ldr instructions also increases code size.
9357 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9358 for counts of 3 or 4 regs. */
9359 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9365 for (i = 0; i < count; i++)
9367 addr = plus_constant (from, i * 4 * sign);
9368 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9369 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9375 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9385 result = gen_rtx_PARALLEL (VOIDmode,
9386 rtvec_alloc (count + (write_back ? 1 : 0)));
9389 XVECEXP (result, 0, 0)
9390 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9395 for (j = 0; i < count; i++, j++)
9397 addr = plus_constant (from, j * 4 * sign);
9398 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9399 XVECEXP (result, 0, i)
9400 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9411 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9412 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9414 HOST_WIDE_INT offset = *offsetp;
9417 int sign = up ? 1 : -1;
9420 /* See arm_gen_load_multiple for discussion of
9421 the pros/cons of ldm/stm usage for XScale. */
9422 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9428 for (i = 0; i < count; i++)
9430 addr = plus_constant (to, i * 4 * sign);
9431 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9432 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9438 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9448 result = gen_rtx_PARALLEL (VOIDmode,
9449 rtvec_alloc (count + (write_back ? 1 : 0)));
9452 XVECEXP (result, 0, 0)
9453 = gen_rtx_SET (VOIDmode, to,
9454 plus_constant (to, count * 4 * sign));
9459 for (j = 0; i < count; i++, j++)
9461 addr = plus_constant (to, j * 4 * sign);
9462 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9463 XVECEXP (result, 0, i)
9464 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9475 arm_gen_movmemqi (rtx *operands)
9477 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9478 HOST_WIDE_INT srcoffset, dstoffset;
9480 rtx src, dst, srcbase, dstbase;
9481 rtx part_bytes_reg = NULL;
9484 if (GET_CODE (operands[2]) != CONST_INT
9485 || GET_CODE (operands[3]) != CONST_INT
9486 || INTVAL (operands[2]) > 64
9487 || INTVAL (operands[3]) & 3)
9490 dstbase = operands[0];
9491 srcbase = operands[1];
9493 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9494 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9496 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9497 out_words_to_go = INTVAL (operands[2]) / 4;
9498 last_bytes = INTVAL (operands[2]) & 3;
9499 dstoffset = srcoffset = 0;
9501 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9502 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9504 for (i = 0; in_words_to_go >= 2; i+=4)
9506 if (in_words_to_go > 4)
9507 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9508 srcbase, &srcoffset));
9510 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9511 FALSE, srcbase, &srcoffset));
9513 if (out_words_to_go)
9515 if (out_words_to_go > 4)
9516 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9517 dstbase, &dstoffset));
9518 else if (out_words_to_go != 1)
9519 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9523 dstbase, &dstoffset));
9526 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9527 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9528 if (last_bytes != 0)
9530 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9536 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9537 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9540 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9541 if (out_words_to_go)
9545 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9546 sreg = copy_to_reg (mem);
9548 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9549 emit_move_insn (mem, sreg);
9552 gcc_assert (!in_words_to_go); /* Sanity check */
9557 gcc_assert (in_words_to_go > 0);
9559 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9560 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9563 gcc_assert (!last_bytes || part_bytes_reg);
9565 if (BYTES_BIG_ENDIAN && last_bytes)
9567 rtx tmp = gen_reg_rtx (SImode);
9569 /* The bytes we want are in the top end of the word. */
9570 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9571 GEN_INT (8 * (4 - last_bytes))));
9572 part_bytes_reg = tmp;
9576 mem = adjust_automodify_address (dstbase, QImode,
9577 plus_constant (dst, last_bytes - 1),
9578 dstoffset + last_bytes - 1);
9579 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9583 tmp = gen_reg_rtx (SImode);
9584 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9585 part_bytes_reg = tmp;
9594 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9595 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9599 rtx tmp = gen_reg_rtx (SImode);
9600 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9601 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9602 part_bytes_reg = tmp;
9609 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9610 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9617 /* Select a dominance comparison mode if possible for a test of the general
9618 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9619 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9620 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9621 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9622 In all cases OP will be either EQ or NE, but we don't need to know which
9623 here. If we are unable to support a dominance comparison we return
9624 CC mode. This will then fail to match for the RTL expressions that
9625 generate this call. */
9627 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9629 enum rtx_code cond1, cond2;
9632 /* Currently we will probably get the wrong result if the individual
9633 comparisons are not simple. This also ensures that it is safe to
9634 reverse a comparison if necessary. */
9635 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9637 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9641 /* The if_then_else variant of this tests the second condition if the
9642 first passes, but is true if the first fails. Reverse the first
9643 condition to get a true "inclusive-or" expression. */
9644 if (cond_or == DOM_CC_NX_OR_Y)
9645 cond1 = reverse_condition (cond1);
9647 /* If the comparisons are not equal, and one doesn't dominate the other,
9648 then we can't do this. */
9650 && !comparison_dominates_p (cond1, cond2)
9651 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9656 enum rtx_code temp = cond1;
9664 if (cond_or == DOM_CC_X_AND_Y)
9669 case EQ: return CC_DEQmode;
9670 case LE: return CC_DLEmode;
9671 case LEU: return CC_DLEUmode;
9672 case GE: return CC_DGEmode;
9673 case GEU: return CC_DGEUmode;
9674 default: gcc_unreachable ();
9678 if (cond_or == DOM_CC_X_AND_Y)
9694 if (cond_or == DOM_CC_X_AND_Y)
9710 if (cond_or == DOM_CC_X_AND_Y)
9726 if (cond_or == DOM_CC_X_AND_Y)
9741 /* The remaining cases only occur when both comparisons are the
9744 gcc_assert (cond1 == cond2);
9748 gcc_assert (cond1 == cond2);
9752 gcc_assert (cond1 == cond2);
9756 gcc_assert (cond1 == cond2);
9760 gcc_assert (cond1 == cond2);
9769 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9771 /* All floating point compares return CCFP if it is an equality
9772 comparison, and CCFPE otherwise. */
9773 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9793 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9802 /* A compare with a shifted operand. Because of canonicalization, the
9803 comparison will have to be swapped when we emit the assembler. */
9804 if (GET_MODE (y) == SImode
9805 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9806 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9807 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9808 || GET_CODE (x) == ROTATERT))
9811 /* This operation is performed swapped, but since we only rely on the Z
9812 flag we don't need an additional mode. */
9813 if (GET_MODE (y) == SImode
9814 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9815 && GET_CODE (x) == NEG
9816 && (op == EQ || op == NE))
9819 /* This is a special case that is used by combine to allow a
9820 comparison of a shifted byte load to be split into a zero-extend
9821 followed by a comparison of the shifted integer (only valid for
9822 equalities and unsigned inequalities). */
9823 if (GET_MODE (x) == SImode
9824 && GET_CODE (x) == ASHIFT
9825 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9826 && GET_CODE (XEXP (x, 0)) == SUBREG
9827 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9828 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9829 && (op == EQ || op == NE
9830 || op == GEU || op == GTU || op == LTU || op == LEU)
9831 && GET_CODE (y) == CONST_INT)
9834 /* A construct for a conditional compare, if the false arm contains
9835 0, then both conditions must be true, otherwise either condition
9836 must be true. Not all conditions are possible, so CCmode is
9837 returned if it can't be done. */
9838 if (GET_CODE (x) == IF_THEN_ELSE
9839 && (XEXP (x, 2) == const0_rtx
9840 || XEXP (x, 2) == const1_rtx)
9841 && COMPARISON_P (XEXP (x, 0))
9842 && COMPARISON_P (XEXP (x, 1)))
9843 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9844 INTVAL (XEXP (x, 2)));
9846 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9847 if (GET_CODE (x) == AND
9848 && COMPARISON_P (XEXP (x, 0))
9849 && COMPARISON_P (XEXP (x, 1)))
9850 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9853 if (GET_CODE (x) == IOR
9854 && COMPARISON_P (XEXP (x, 0))
9855 && COMPARISON_P (XEXP (x, 1)))
9856 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9859 /* An operation (on Thumb) where we want to test for a single bit.
9860 This is done by shifting that bit up into the top bit of a
9861 scratch register; we can then branch on the sign bit. */
9863 && GET_MODE (x) == SImode
9864 && (op == EQ || op == NE)
9865 && GET_CODE (x) == ZERO_EXTRACT
9866 && XEXP (x, 1) == const1_rtx)
9869 /* An operation that sets the condition codes as a side-effect, the
9870 V flag is not set correctly, so we can only use comparisons where
9871 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9873 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9874 if (GET_MODE (x) == SImode
9876 && (op == EQ || op == NE || op == LT || op == GE)
9877 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9878 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9879 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9880 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9881 || GET_CODE (x) == LSHIFTRT
9882 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9883 || GET_CODE (x) == ROTATERT
9884 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9887 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9890 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9891 && GET_CODE (x) == PLUS
9892 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9898 /* X and Y are two things to compare using CODE. Emit the compare insn and
9899 return the rtx for register 0 in the proper mode. FP means this is a
9900 floating point compare: I don't think that it is needed on the arm. */
9902 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9904 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9905 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9907 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9912 /* Generate a sequence of insns that will generate the correct return
9913 address mask depending on the physical architecture that the program
9916 arm_gen_return_addr_mask (void)
9918 rtx reg = gen_reg_rtx (Pmode);
9920 emit_insn (gen_return_addr_mask (reg));
9925 arm_reload_in_hi (rtx *operands)
9927 rtx ref = operands[1];
9929 HOST_WIDE_INT offset = 0;
9931 if (GET_CODE (ref) == SUBREG)
9933 offset = SUBREG_BYTE (ref);
9934 ref = SUBREG_REG (ref);
9937 if (GET_CODE (ref) == REG)
9939 /* We have a pseudo which has been spilt onto the stack; there
9940 are two cases here: the first where there is a simple
9941 stack-slot replacement and a second where the stack-slot is
9942 out of range, or is used as a subreg. */
9943 if (reg_equiv_mem[REGNO (ref)])
9945 ref = reg_equiv_mem[REGNO (ref)];
9946 base = find_replacement (&XEXP (ref, 0));
9949 /* The slot is out of range, or was dressed up in a SUBREG. */
9950 base = reg_equiv_address[REGNO (ref)];
9953 base = find_replacement (&XEXP (ref, 0));
9955 /* Handle the case where the address is too complex to be offset by 1. */
9956 if (GET_CODE (base) == MINUS
9957 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9959 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9961 emit_set_insn (base_plus, base);
9964 else if (GET_CODE (base) == PLUS)
9966 /* The addend must be CONST_INT, or we would have dealt with it above. */
9967 HOST_WIDE_INT hi, lo;
9969 offset += INTVAL (XEXP (base, 1));
9970 base = XEXP (base, 0);
9972 /* Rework the address into a legal sequence of insns. */
9973 /* Valid range for lo is -4095 -> 4095 */
9976 : -((-offset) & 0xfff));
9978 /* Corner case, if lo is the max offset then we would be out of range
9979 once we have added the additional 1 below, so bump the msb into the
9980 pre-loading insn(s). */
9984 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9985 ^ (HOST_WIDE_INT) 0x80000000)
9986 - (HOST_WIDE_INT) 0x80000000);
9988 gcc_assert (hi + lo == offset);
9992 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9994 /* Get the base address; addsi3 knows how to handle constants
9995 that require more than one insn. */
9996 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10002 /* Operands[2] may overlap operands[0] (though it won't overlap
10003 operands[1]), that's why we asked for a DImode reg -- so we can
10004 use the bit that does not overlap. */
10005 if (REGNO (operands[2]) == REGNO (operands[0]))
10006 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10008 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10010 emit_insn (gen_zero_extendqisi2 (scratch,
10011 gen_rtx_MEM (QImode,
10012 plus_constant (base,
10014 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10015 gen_rtx_MEM (QImode,
10016 plus_constant (base,
10018 if (!BYTES_BIG_ENDIAN)
10019 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10020 gen_rtx_IOR (SImode,
10023 gen_rtx_SUBREG (SImode, operands[0], 0),
10027 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10028 gen_rtx_IOR (SImode,
10029 gen_rtx_ASHIFT (SImode, scratch,
10031 gen_rtx_SUBREG (SImode, operands[0], 0)));
10034 /* Handle storing a half-word to memory during reload by synthesizing as two
10035 byte stores. Take care not to clobber the input values until after we
10036 have moved them somewhere safe. This code assumes that if the DImode
10037 scratch in operands[2] overlaps either the input value or output address
10038 in some way, then that value must die in this insn (we absolutely need
10039 two scratch registers for some corner cases). */
10041 arm_reload_out_hi (rtx *operands)
10043 rtx ref = operands[0];
10044 rtx outval = operands[1];
10046 HOST_WIDE_INT offset = 0;
10048 if (GET_CODE (ref) == SUBREG)
10050 offset = SUBREG_BYTE (ref);
10051 ref = SUBREG_REG (ref);
10054 if (GET_CODE (ref) == REG)
10056 /* We have a pseudo which has been spilt onto the stack; there
10057 are two cases here: the first where there is a simple
10058 stack-slot replacement and a second where the stack-slot is
10059 out of range, or is used as a subreg. */
10060 if (reg_equiv_mem[REGNO (ref)])
10062 ref = reg_equiv_mem[REGNO (ref)];
10063 base = find_replacement (&XEXP (ref, 0));
10066 /* The slot is out of range, or was dressed up in a SUBREG. */
10067 base = reg_equiv_address[REGNO (ref)];
10070 base = find_replacement (&XEXP (ref, 0));
10072 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10074 /* Handle the case where the address is too complex to be offset by 1. */
10075 if (GET_CODE (base) == MINUS
10076 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10078 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10080 /* Be careful not to destroy OUTVAL. */
10081 if (reg_overlap_mentioned_p (base_plus, outval))
10083 /* Updating base_plus might destroy outval, see if we can
10084 swap the scratch and base_plus. */
10085 if (!reg_overlap_mentioned_p (scratch, outval))
10088 scratch = base_plus;
10093 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10095 /* Be conservative and copy OUTVAL into the scratch now,
10096 this should only be necessary if outval is a subreg
10097 of something larger than a word. */
10098 /* XXX Might this clobber base? I can't see how it can,
10099 since scratch is known to overlap with OUTVAL, and
10100 must be wider than a word. */
10101 emit_insn (gen_movhi (scratch_hi, outval));
10102 outval = scratch_hi;
10106 emit_set_insn (base_plus, base);
10109 else if (GET_CODE (base) == PLUS)
10111 /* The addend must be CONST_INT, or we would have dealt with it above. */
10112 HOST_WIDE_INT hi, lo;
10114 offset += INTVAL (XEXP (base, 1));
10115 base = XEXP (base, 0);
10117 /* Rework the address into a legal sequence of insns. */
10118 /* Valid range for lo is -4095 -> 4095 */
10121 : -((-offset) & 0xfff));
10123 /* Corner case, if lo is the max offset then we would be out of range
10124 once we have added the additional 1 below, so bump the msb into the
10125 pre-loading insn(s). */
10129 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10130 ^ (HOST_WIDE_INT) 0x80000000)
10131 - (HOST_WIDE_INT) 0x80000000);
10133 gcc_assert (hi + lo == offset);
10137 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10139 /* Be careful not to destroy OUTVAL. */
10140 if (reg_overlap_mentioned_p (base_plus, outval))
10142 /* Updating base_plus might destroy outval, see if we
10143 can swap the scratch and base_plus. */
10144 if (!reg_overlap_mentioned_p (scratch, outval))
10147 scratch = base_plus;
10152 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10154 /* Be conservative and copy outval into scratch now,
10155 this should only be necessary if outval is a
10156 subreg of something larger than a word. */
10157 /* XXX Might this clobber base? I can't see how it
10158 can, since scratch is known to overlap with
10160 emit_insn (gen_movhi (scratch_hi, outval));
10161 outval = scratch_hi;
10165 /* Get the base address; addsi3 knows how to handle constants
10166 that require more than one insn. */
10167 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10173 if (BYTES_BIG_ENDIAN)
10175 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10176 plus_constant (base, offset + 1)),
10177 gen_lowpart (QImode, outval)));
10178 emit_insn (gen_lshrsi3 (scratch,
10179 gen_rtx_SUBREG (SImode, outval, 0),
10181 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10182 gen_lowpart (QImode, scratch)));
10186 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10187 gen_lowpart (QImode, outval)));
10188 emit_insn (gen_lshrsi3 (scratch,
10189 gen_rtx_SUBREG (SImode, outval, 0),
10191 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10192 plus_constant (base, offset + 1)),
10193 gen_lowpart (QImode, scratch)));
10197 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10198 (padded to the size of a word) should be passed in a register. */
10201 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10203 if (TARGET_AAPCS_BASED)
10204 return must_pass_in_stack_var_size (mode, type);
10206 return must_pass_in_stack_var_size_or_pad (mode, type);
10210 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10211 Return true if an argument passed on the stack should be padded upwards,
10212 i.e. if the least-significant byte has useful data.
10213 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10214 aggregate types are placed in the lowest memory address. */
10217 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10219 if (!TARGET_AAPCS_BASED)
10220 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10222 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10229 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10230 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10231 byte of the register has useful data, and return the opposite if the
10232 most significant byte does.
10233 For AAPCS, small aggregates and small complex types are always padded
10237 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10238 tree type, int first ATTRIBUTE_UNUSED)
10240 if (TARGET_AAPCS_BASED
10241 && BYTES_BIG_ENDIAN
10242 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10243 && int_size_in_bytes (type) <= 4)
10246 /* Otherwise, use default padding. */
10247 return !BYTES_BIG_ENDIAN;
10251 /* Print a symbolic form of X to the debug file, F. */
10253 arm_print_value (FILE *f, rtx x)
10255 switch (GET_CODE (x))
10258 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10262 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10270 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10272 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10273 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10281 fprintf (f, "\"%s\"", XSTR (x, 0));
10285 fprintf (f, "`%s'", XSTR (x, 0));
10289 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10293 arm_print_value (f, XEXP (x, 0));
10297 arm_print_value (f, XEXP (x, 0));
10299 arm_print_value (f, XEXP (x, 1));
10307 fprintf (f, "????");
10312 /* Routines for manipulation of the constant pool. */
10314 /* Arm instructions cannot load a large constant directly into a
10315 register; they have to come from a pc relative load. The constant
10316 must therefore be placed in the addressable range of the pc
10317 relative load. Depending on the precise pc relative load
10318 instruction the range is somewhere between 256 bytes and 4k. This
10319 means that we often have to dump a constant inside a function, and
10320 generate code to branch around it.
10322 It is important to minimize this, since the branches will slow
10323 things down and make the code larger.
10325 Normally we can hide the table after an existing unconditional
10326 branch so that there is no interruption of the flow, but in the
10327 worst case the code looks like this:
10345 We fix this by performing a scan after scheduling, which notices
10346 which instructions need to have their operands fetched from the
10347 constant table and builds the table.
10349 The algorithm starts by building a table of all the constants that
10350 need fixing up and all the natural barriers in the function (places
10351 where a constant table can be dropped without breaking the flow).
10352 For each fixup we note how far the pc-relative replacement will be
10353 able to reach and the offset of the instruction into the function.
10355 Having built the table we then group the fixes together to form
10356 tables that are as large as possible (subject to addressing
10357 constraints) and emit each table of constants after the last
10358 barrier that is within range of all the instructions in the group.
10359 If a group does not contain a barrier, then we forcibly create one
10360 by inserting a jump instruction into the flow. Once the table has
10361 been inserted, the insns are then modified to reference the
10362 relevant entry in the pool.
10364 Possible enhancements to the algorithm (not implemented) are:
10366 1) For some processors and object formats, there may be benefit in
10367 aligning the pools to the start of cache lines; this alignment
10368 would need to be taken into account when calculating addressability
10371 /* These typedefs are located at the start of this file, so that
10372 they can be used in the prototypes there. This comment is to
10373 remind readers of that fact so that the following structures
10374 can be understood more easily.
10376 typedef struct minipool_node Mnode;
10377 typedef struct minipool_fixup Mfix; */
10379 struct minipool_node
10381 /* Doubly linked chain of entries. */
10384 /* The maximum offset into the code that this entry can be placed. While
10385 pushing fixes for forward references, all entries are sorted in order
10386 of increasing max_address. */
10387 HOST_WIDE_INT max_address;
10388 /* Similarly for an entry inserted for a backwards ref. */
10389 HOST_WIDE_INT min_address;
10390 /* The number of fixes referencing this entry. This can become zero
10391 if we "unpush" an entry. In this case we ignore the entry when we
10392 come to emit the code. */
10394 /* The offset from the start of the minipool. */
10395 HOST_WIDE_INT offset;
10396 /* The value in table. */
10398 /* The mode of value. */
10399 enum machine_mode mode;
10400 /* The size of the value. With iWMMXt enabled
10401 sizes > 4 also imply an alignment of 8-bytes. */
10405 struct minipool_fixup
10409 HOST_WIDE_INT address;
10411 enum machine_mode mode;
10415 HOST_WIDE_INT forwards;
10416 HOST_WIDE_INT backwards;
10419 /* Fixes less than a word need padding out to a word boundary. */
10420 #define MINIPOOL_FIX_SIZE(mode) \
10421 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10423 static Mnode * minipool_vector_head;
10424 static Mnode * minipool_vector_tail;
10425 static rtx minipool_vector_label;
10426 static int minipool_pad;
10428 /* The linked list of all minipool fixes required for this function. */
10429 Mfix * minipool_fix_head;
10430 Mfix * minipool_fix_tail;
10431 /* The fix entry for the current minipool, once it has been placed. */
10432 Mfix * minipool_barrier;
10434 /* Determines if INSN is the start of a jump table. Returns the end
10435 of the TABLE or NULL_RTX. */
10437 is_jump_table (rtx insn)
10441 if (GET_CODE (insn) == JUMP_INSN
10442 && JUMP_LABEL (insn) != NULL
10443 && ((table = next_real_insn (JUMP_LABEL (insn)))
10444 == next_real_insn (insn))
10446 && GET_CODE (table) == JUMP_INSN
10447 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10448 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10454 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10455 #define JUMP_TABLES_IN_TEXT_SECTION 0
10458 static HOST_WIDE_INT
10459 get_jump_table_size (rtx insn)
10461 /* ADDR_VECs only take room if read-only data does into the text
10463 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10465 rtx body = PATTERN (insn);
10466 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10467 HOST_WIDE_INT size;
10468 HOST_WIDE_INT modesize;
10470 modesize = GET_MODE_SIZE (GET_MODE (body));
10471 size = modesize * XVECLEN (body, elt);
10475 /* Round up size of TBB table to a halfword boundary. */
10476 size = (size + 1) & ~(HOST_WIDE_INT)1;
10479 /* No padding necessary for TBH. */
10482 /* Add two bytes for alignment on Thumb. */
10487 gcc_unreachable ();
10495 /* Move a minipool fix MP from its current location to before MAX_MP.
10496 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10497 constraints may need updating. */
10499 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10500 HOST_WIDE_INT max_address)
10502 /* The code below assumes these are different. */
10503 gcc_assert (mp != max_mp);
10505 if (max_mp == NULL)
10507 if (max_address < mp->max_address)
10508 mp->max_address = max_address;
10512 if (max_address > max_mp->max_address - mp->fix_size)
10513 mp->max_address = max_mp->max_address - mp->fix_size;
10515 mp->max_address = max_address;
10517 /* Unlink MP from its current position. Since max_mp is non-null,
10518 mp->prev must be non-null. */
10519 mp->prev->next = mp->next;
10520 if (mp->next != NULL)
10521 mp->next->prev = mp->prev;
10523 minipool_vector_tail = mp->prev;
10525 /* Re-insert it before MAX_MP. */
10527 mp->prev = max_mp->prev;
10530 if (mp->prev != NULL)
10531 mp->prev->next = mp;
10533 minipool_vector_head = mp;
10536 /* Save the new entry. */
10539 /* Scan over the preceding entries and adjust their addresses as
10541 while (mp->prev != NULL
10542 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10544 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10551 /* Add a constant to the minipool for a forward reference. Returns the
10552 node added or NULL if the constant will not fit in this pool. */
10554 add_minipool_forward_ref (Mfix *fix)
10556 /* If set, max_mp is the first pool_entry that has a lower
10557 constraint than the one we are trying to add. */
10558 Mnode * max_mp = NULL;
10559 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10562 /* If the minipool starts before the end of FIX->INSN then this FIX
10563 can not be placed into the current pool. Furthermore, adding the
10564 new constant pool entry may cause the pool to start FIX_SIZE bytes
10566 if (minipool_vector_head &&
10567 (fix->address + get_attr_length (fix->insn)
10568 >= minipool_vector_head->max_address - fix->fix_size))
10571 /* Scan the pool to see if a constant with the same value has
10572 already been added. While we are doing this, also note the
10573 location where we must insert the constant if it doesn't already
10575 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10577 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10578 && fix->mode == mp->mode
10579 && (GET_CODE (fix->value) != CODE_LABEL
10580 || (CODE_LABEL_NUMBER (fix->value)
10581 == CODE_LABEL_NUMBER (mp->value)))
10582 && rtx_equal_p (fix->value, mp->value))
10584 /* More than one fix references this entry. */
10586 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10589 /* Note the insertion point if necessary. */
10591 && mp->max_address > max_address)
10594 /* If we are inserting an 8-bytes aligned quantity and
10595 we have not already found an insertion point, then
10596 make sure that all such 8-byte aligned quantities are
10597 placed at the start of the pool. */
10598 if (ARM_DOUBLEWORD_ALIGN
10600 && fix->fix_size >= 8
10601 && mp->fix_size < 8)
10604 max_address = mp->max_address;
10608 /* The value is not currently in the minipool, so we need to create
10609 a new entry for it. If MAX_MP is NULL, the entry will be put on
10610 the end of the list since the placement is less constrained than
10611 any existing entry. Otherwise, we insert the new fix before
10612 MAX_MP and, if necessary, adjust the constraints on the other
10615 mp->fix_size = fix->fix_size;
10616 mp->mode = fix->mode;
10617 mp->value = fix->value;
10619 /* Not yet required for a backwards ref. */
10620 mp->min_address = -65536;
10622 if (max_mp == NULL)
10624 mp->max_address = max_address;
10626 mp->prev = minipool_vector_tail;
10628 if (mp->prev == NULL)
10630 minipool_vector_head = mp;
10631 minipool_vector_label = gen_label_rtx ();
10634 mp->prev->next = mp;
10636 minipool_vector_tail = mp;
10640 if (max_address > max_mp->max_address - mp->fix_size)
10641 mp->max_address = max_mp->max_address - mp->fix_size;
10643 mp->max_address = max_address;
10646 mp->prev = max_mp->prev;
10648 if (mp->prev != NULL)
10649 mp->prev->next = mp;
10651 minipool_vector_head = mp;
10654 /* Save the new entry. */
10657 /* Scan over the preceding entries and adjust their addresses as
10659 while (mp->prev != NULL
10660 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10662 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10670 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10671 HOST_WIDE_INT min_address)
10673 HOST_WIDE_INT offset;
10675 /* The code below assumes these are different. */
10676 gcc_assert (mp != min_mp);
10678 if (min_mp == NULL)
10680 if (min_address > mp->min_address)
10681 mp->min_address = min_address;
10685 /* We will adjust this below if it is too loose. */
10686 mp->min_address = min_address;
10688 /* Unlink MP from its current position. Since min_mp is non-null,
10689 mp->next must be non-null. */
10690 mp->next->prev = mp->prev;
10691 if (mp->prev != NULL)
10692 mp->prev->next = mp->next;
10694 minipool_vector_head = mp->next;
10696 /* Reinsert it after MIN_MP. */
10698 mp->next = min_mp->next;
10700 if (mp->next != NULL)
10701 mp->next->prev = mp;
10703 minipool_vector_tail = mp;
10709 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10711 mp->offset = offset;
10712 if (mp->refcount > 0)
10713 offset += mp->fix_size;
10715 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10716 mp->next->min_address = mp->min_address + mp->fix_size;
10722 /* Add a constant to the minipool for a backward reference. Returns the
10723 node added or NULL if the constant will not fit in this pool.
10725 Note that the code for insertion for a backwards reference can be
10726 somewhat confusing because the calculated offsets for each fix do
10727 not take into account the size of the pool (which is still under
10730 add_minipool_backward_ref (Mfix *fix)
10732 /* If set, min_mp is the last pool_entry that has a lower constraint
10733 than the one we are trying to add. */
10734 Mnode *min_mp = NULL;
10735 /* This can be negative, since it is only a constraint. */
10736 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10739 /* If we can't reach the current pool from this insn, or if we can't
10740 insert this entry at the end of the pool without pushing other
10741 fixes out of range, then we don't try. This ensures that we
10742 can't fail later on. */
10743 if (min_address >= minipool_barrier->address
10744 || (minipool_vector_tail->min_address + fix->fix_size
10745 >= minipool_barrier->address))
10748 /* Scan the pool to see if a constant with the same value has
10749 already been added. While we are doing this, also note the
10750 location where we must insert the constant if it doesn't already
10752 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10754 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10755 && fix->mode == mp->mode
10756 && (GET_CODE (fix->value) != CODE_LABEL
10757 || (CODE_LABEL_NUMBER (fix->value)
10758 == CODE_LABEL_NUMBER (mp->value)))
10759 && rtx_equal_p (fix->value, mp->value)
10760 /* Check that there is enough slack to move this entry to the
10761 end of the table (this is conservative). */
10762 && (mp->max_address
10763 > (minipool_barrier->address
10764 + minipool_vector_tail->offset
10765 + minipool_vector_tail->fix_size)))
10768 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10771 if (min_mp != NULL)
10772 mp->min_address += fix->fix_size;
10775 /* Note the insertion point if necessary. */
10776 if (mp->min_address < min_address)
10778 /* For now, we do not allow the insertion of 8-byte alignment
10779 requiring nodes anywhere but at the start of the pool. */
10780 if (ARM_DOUBLEWORD_ALIGN
10781 && fix->fix_size >= 8 && mp->fix_size < 8)
10786 else if (mp->max_address
10787 < minipool_barrier->address + mp->offset + fix->fix_size)
10789 /* Inserting before this entry would push the fix beyond
10790 its maximum address (which can happen if we have
10791 re-located a forwards fix); force the new fix to come
10793 if (ARM_DOUBLEWORD_ALIGN
10794 && fix->fix_size >= 8 && mp->fix_size < 8)
10799 min_address = mp->min_address + fix->fix_size;
10802 /* Do not insert a non-8-byte aligned quantity before 8-byte
10803 aligned quantities. */
10804 else if (ARM_DOUBLEWORD_ALIGN
10805 && fix->fix_size < 8
10806 && mp->fix_size >= 8)
10809 min_address = mp->min_address + fix->fix_size;
10814 /* We need to create a new entry. */
10816 mp->fix_size = fix->fix_size;
10817 mp->mode = fix->mode;
10818 mp->value = fix->value;
10820 mp->max_address = minipool_barrier->address + 65536;
10822 mp->min_address = min_address;
10824 if (min_mp == NULL)
10827 mp->next = minipool_vector_head;
10829 if (mp->next == NULL)
10831 minipool_vector_tail = mp;
10832 minipool_vector_label = gen_label_rtx ();
10835 mp->next->prev = mp;
10837 minipool_vector_head = mp;
10841 mp->next = min_mp->next;
10845 if (mp->next != NULL)
10846 mp->next->prev = mp;
10848 minipool_vector_tail = mp;
10851 /* Save the new entry. */
10859 /* Scan over the following entries and adjust their offsets. */
10860 while (mp->next != NULL)
10862 if (mp->next->min_address < mp->min_address + mp->fix_size)
10863 mp->next->min_address = mp->min_address + mp->fix_size;
10866 mp->next->offset = mp->offset + mp->fix_size;
10868 mp->next->offset = mp->offset;
10877 assign_minipool_offsets (Mfix *barrier)
10879 HOST_WIDE_INT offset = 0;
10882 minipool_barrier = barrier;
10884 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10886 mp->offset = offset;
10888 if (mp->refcount > 0)
10889 offset += mp->fix_size;
10893 /* Output the literal table */
10895 dump_minipool (rtx scan)
10901 if (ARM_DOUBLEWORD_ALIGN)
10902 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10903 if (mp->refcount > 0 && mp->fix_size >= 8)
10910 fprintf (dump_file,
10911 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10912 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10914 scan = emit_label_after (gen_label_rtx (), scan);
10915 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10916 scan = emit_label_after (minipool_vector_label, scan);
10918 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10920 if (mp->refcount > 0)
10924 fprintf (dump_file,
10925 ";; Offset %u, min %ld, max %ld ",
10926 (unsigned) mp->offset, (unsigned long) mp->min_address,
10927 (unsigned long) mp->max_address);
10928 arm_print_value (dump_file, mp->value);
10929 fputc ('\n', dump_file);
10932 switch (mp->fix_size)
10934 #ifdef HAVE_consttable_1
10936 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10940 #ifdef HAVE_consttable_2
10942 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10946 #ifdef HAVE_consttable_4
10948 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10952 #ifdef HAVE_consttable_8
10954 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10958 #ifdef HAVE_consttable_16
10960 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10965 gcc_unreachable ();
10973 minipool_vector_head = minipool_vector_tail = NULL;
10974 scan = emit_insn_after (gen_consttable_end (), scan);
10975 scan = emit_barrier_after (scan);
10978 /* Return the cost of forcibly inserting a barrier after INSN. */
10980 arm_barrier_cost (rtx insn)
10982 /* Basing the location of the pool on the loop depth is preferable,
10983 but at the moment, the basic block information seems to be
10984 corrupt by this stage of the compilation. */
10985 int base_cost = 50;
10986 rtx next = next_nonnote_insn (insn);
10988 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10991 switch (GET_CODE (insn))
10994 /* It will always be better to place the table before the label, rather
11003 return base_cost - 10;
11006 return base_cost + 10;
11010 /* Find the best place in the insn stream in the range
11011 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11012 Create the barrier by inserting a jump and add a new fix entry for
11015 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11017 HOST_WIDE_INT count = 0;
11019 rtx from = fix->insn;
11020 /* The instruction after which we will insert the jump. */
11021 rtx selected = NULL;
11023 /* The address at which the jump instruction will be placed. */
11024 HOST_WIDE_INT selected_address;
11026 HOST_WIDE_INT max_count = max_address - fix->address;
11027 rtx label = gen_label_rtx ();
11029 selected_cost = arm_barrier_cost (from);
11030 selected_address = fix->address;
11032 while (from && count < max_count)
11037 /* This code shouldn't have been called if there was a natural barrier
11039 gcc_assert (GET_CODE (from) != BARRIER);
11041 /* Count the length of this insn. */
11042 count += get_attr_length (from);
11044 /* If there is a jump table, add its length. */
11045 tmp = is_jump_table (from);
11048 count += get_jump_table_size (tmp);
11050 /* Jump tables aren't in a basic block, so base the cost on
11051 the dispatch insn. If we select this location, we will
11052 still put the pool after the table. */
11053 new_cost = arm_barrier_cost (from);
11055 if (count < max_count
11056 && (!selected || new_cost <= selected_cost))
11059 selected_cost = new_cost;
11060 selected_address = fix->address + count;
11063 /* Continue after the dispatch table. */
11064 from = NEXT_INSN (tmp);
11068 new_cost = arm_barrier_cost (from);
11070 if (count < max_count
11071 && (!selected || new_cost <= selected_cost))
11074 selected_cost = new_cost;
11075 selected_address = fix->address + count;
11078 from = NEXT_INSN (from);
11081 /* Make sure that we found a place to insert the jump. */
11082 gcc_assert (selected);
11084 /* Create a new JUMP_INSN that branches around a barrier. */
11085 from = emit_jump_insn_after (gen_jump (label), selected);
11086 JUMP_LABEL (from) = label;
11087 barrier = emit_barrier_after (from);
11088 emit_label_after (label, barrier);
11090 /* Create a minipool barrier entry for the new barrier. */
11091 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11092 new_fix->insn = barrier;
11093 new_fix->address = selected_address;
11094 new_fix->next = fix->next;
11095 fix->next = new_fix;
11100 /* Record that there is a natural barrier in the insn stream at
11103 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11105 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11108 fix->address = address;
11111 if (minipool_fix_head != NULL)
11112 minipool_fix_tail->next = fix;
11114 minipool_fix_head = fix;
11116 minipool_fix_tail = fix;
11119 /* Record INSN, which will need fixing up to load a value from the
11120 minipool. ADDRESS is the offset of the insn since the start of the
11121 function; LOC is a pointer to the part of the insn which requires
11122 fixing; VALUE is the constant that must be loaded, which is of type
11125 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11126 enum machine_mode mode, rtx value)
11128 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11131 fix->address = address;
11134 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11135 fix->value = value;
11136 fix->forwards = get_attr_pool_range (insn);
11137 fix->backwards = get_attr_neg_pool_range (insn);
11138 fix->minipool = NULL;
11140 /* If an insn doesn't have a range defined for it, then it isn't
11141 expecting to be reworked by this code. Better to stop now than
11142 to generate duff assembly code. */
11143 gcc_assert (fix->forwards || fix->backwards);
11145 /* If an entry requires 8-byte alignment then assume all constant pools
11146 require 4 bytes of padding. Trying to do this later on a per-pool
11147 basis is awkward because existing pool entries have to be modified. */
11148 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11153 fprintf (dump_file,
11154 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11155 GET_MODE_NAME (mode),
11156 INSN_UID (insn), (unsigned long) address,
11157 -1 * (long)fix->backwards, (long)fix->forwards);
11158 arm_print_value (dump_file, fix->value);
11159 fprintf (dump_file, "\n");
11162 /* Add it to the chain of fixes. */
11165 if (minipool_fix_head != NULL)
11166 minipool_fix_tail->next = fix;
11168 minipool_fix_head = fix;
11170 minipool_fix_tail = fix;
11173 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11174 Returns the number of insns needed, or 99 if we don't know how to
11177 arm_const_double_inline_cost (rtx val)
11179 rtx lowpart, highpart;
11180 enum machine_mode mode;
11182 mode = GET_MODE (val);
11184 if (mode == VOIDmode)
11187 gcc_assert (GET_MODE_SIZE (mode) == 8);
11189 lowpart = gen_lowpart (SImode, val);
11190 highpart = gen_highpart_mode (SImode, mode, val);
11192 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11193 gcc_assert (GET_CODE (highpart) == CONST_INT);
11195 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11196 NULL_RTX, NULL_RTX, 0, 0)
11197 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11198 NULL_RTX, NULL_RTX, 0, 0));
11201 /* Return true if it is worthwhile to split a 64-bit constant into two
11202 32-bit operations. This is the case if optimizing for size, or
11203 if we have load delay slots, or if one 32-bit part can be done with
11204 a single data operation. */
11206 arm_const_double_by_parts (rtx val)
11208 enum machine_mode mode = GET_MODE (val);
11211 if (optimize_size || arm_ld_sched)
11214 if (mode == VOIDmode)
11217 part = gen_highpart_mode (SImode, mode, val);
11219 gcc_assert (GET_CODE (part) == CONST_INT);
11221 if (const_ok_for_arm (INTVAL (part))
11222 || const_ok_for_arm (~INTVAL (part)))
11225 part = gen_lowpart (SImode, val);
11227 gcc_assert (GET_CODE (part) == CONST_INT);
11229 if (const_ok_for_arm (INTVAL (part))
11230 || const_ok_for_arm (~INTVAL (part)))
11236 /* Scan INSN and note any of its operands that need fixing.
11237 If DO_PUSHES is false we do not actually push any of the fixups
11238 needed. The function returns TRUE if any fixups were needed/pushed.
11239 This is used by arm_memory_load_p() which needs to know about loads
11240 of constants that will be converted into minipool loads. */
11242 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11244 bool result = false;
11247 extract_insn (insn);
11249 if (!constrain_operands (1))
11250 fatal_insn_not_found (insn);
11252 if (recog_data.n_alternatives == 0)
11255 /* Fill in recog_op_alt with information about the constraints of
11257 preprocess_constraints ();
11259 for (opno = 0; opno < recog_data.n_operands; opno++)
11261 /* Things we need to fix can only occur in inputs. */
11262 if (recog_data.operand_type[opno] != OP_IN)
11265 /* If this alternative is a memory reference, then any mention
11266 of constants in this alternative is really to fool reload
11267 into allowing us to accept one there. We need to fix them up
11268 now so that we output the right code. */
11269 if (recog_op_alt[opno][which_alternative].memory_ok)
11271 rtx op = recog_data.operand[opno];
11273 if (CONSTANT_P (op))
11276 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11277 recog_data.operand_mode[opno], op);
11280 else if (GET_CODE (op) == MEM
11281 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11282 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11286 rtx cop = avoid_constant_pool_reference (op);
11288 /* Casting the address of something to a mode narrower
11289 than a word can cause avoid_constant_pool_reference()
11290 to return the pool reference itself. That's no good to
11291 us here. Lets just hope that we can use the
11292 constant pool value directly. */
11294 cop = get_pool_constant (XEXP (op, 0));
11296 push_minipool_fix (insn, address,
11297 recog_data.operand_loc[opno],
11298 recog_data.operand_mode[opno], cop);
11309 /* Gcc puts the pool in the wrong place for ARM, since we can only
11310 load addresses a limited distance around the pc. We do some
11311 special munging to move the constant pool values to the correct
11312 point in the code. */
11317 HOST_WIDE_INT address = 0;
11320 minipool_fix_head = minipool_fix_tail = NULL;
11322 /* The first insn must always be a note, or the code below won't
11323 scan it properly. */
11324 insn = get_insns ();
11325 gcc_assert (GET_CODE (insn) == NOTE);
11328 /* Scan all the insns and record the operands that will need fixing. */
11329 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11331 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11332 && (arm_cirrus_insn_p (insn)
11333 || GET_CODE (insn) == JUMP_INSN
11334 || arm_memory_load_p (insn)))
11335 cirrus_reorg (insn);
11337 if (GET_CODE (insn) == BARRIER)
11338 push_minipool_barrier (insn, address);
11339 else if (INSN_P (insn))
11343 note_invalid_constants (insn, address, true);
11344 address += get_attr_length (insn);
11346 /* If the insn is a vector jump, add the size of the table
11347 and skip the table. */
11348 if ((table = is_jump_table (insn)) != NULL)
11350 address += get_jump_table_size (table);
11356 fix = minipool_fix_head;
11358 /* Now scan the fixups and perform the required changes. */
11363 Mfix * last_added_fix;
11364 Mfix * last_barrier = NULL;
11367 /* Skip any further barriers before the next fix. */
11368 while (fix && GET_CODE (fix->insn) == BARRIER)
11371 /* No more fixes. */
11375 last_added_fix = NULL;
11377 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11379 if (GET_CODE (ftmp->insn) == BARRIER)
11381 if (ftmp->address >= minipool_vector_head->max_address)
11384 last_barrier = ftmp;
11386 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11389 last_added_fix = ftmp; /* Keep track of the last fix added. */
11392 /* If we found a barrier, drop back to that; any fixes that we
11393 could have reached but come after the barrier will now go in
11394 the next mini-pool. */
11395 if (last_barrier != NULL)
11397 /* Reduce the refcount for those fixes that won't go into this
11399 for (fdel = last_barrier->next;
11400 fdel && fdel != ftmp;
11403 fdel->minipool->refcount--;
11404 fdel->minipool = NULL;
11407 ftmp = last_barrier;
11411 /* ftmp is first fix that we can't fit into this pool and
11412 there no natural barriers that we could use. Insert a
11413 new barrier in the code somewhere between the previous
11414 fix and this one, and arrange to jump around it. */
11415 HOST_WIDE_INT max_address;
11417 /* The last item on the list of fixes must be a barrier, so
11418 we can never run off the end of the list of fixes without
11419 last_barrier being set. */
11422 max_address = minipool_vector_head->max_address;
11423 /* Check that there isn't another fix that is in range that
11424 we couldn't fit into this pool because the pool was
11425 already too large: we need to put the pool before such an
11426 instruction. The pool itself may come just after the
11427 fix because create_fix_barrier also allows space for a
11428 jump instruction. */
11429 if (ftmp->address < max_address)
11430 max_address = ftmp->address + 1;
11432 last_barrier = create_fix_barrier (last_added_fix, max_address);
11435 assign_minipool_offsets (last_barrier);
11439 if (GET_CODE (ftmp->insn) != BARRIER
11440 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11447 /* Scan over the fixes we have identified for this pool, fixing them
11448 up and adding the constants to the pool itself. */
11449 for (this_fix = fix; this_fix && ftmp != this_fix;
11450 this_fix = this_fix->next)
11451 if (GET_CODE (this_fix->insn) != BARRIER)
11454 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11455 minipool_vector_label),
11456 this_fix->minipool->offset);
11457 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11460 dump_minipool (last_barrier->insn);
11464 /* From now on we must synthesize any constants that we can't handle
11465 directly. This can happen if the RTL gets split during final
11466 instruction generation. */
11467 after_arm_reorg = 1;
11469 /* Free the minipool memory. */
11470 obstack_free (&minipool_obstack, minipool_startobj);
11473 /* Routines to output assembly language. */
11475 /* If the rtx is the correct value then return the string of the number.
11476 In this way we can ensure that valid double constants are generated even
11477 when cross compiling. */
11479 fp_immediate_constant (rtx x)
11484 if (!fp_consts_inited)
11487 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11488 for (i = 0; i < 8; i++)
11489 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11490 return strings_fp[i];
11492 gcc_unreachable ();
11495 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11496 static const char *
11497 fp_const_from_val (REAL_VALUE_TYPE *r)
11501 if (!fp_consts_inited)
11504 for (i = 0; i < 8; i++)
11505 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11506 return strings_fp[i];
11508 gcc_unreachable ();
11511 /* Output the operands of a LDM/STM instruction to STREAM.
11512 MASK is the ARM register set mask of which only bits 0-15 are important.
11513 REG is the base register, either the frame pointer or the stack pointer,
11514 INSTR is the possibly suffixed load or store instruction.
11515 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11518 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11519 unsigned long mask, int rfe)
11522 bool not_first = FALSE;
11524 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11525 fputc ('\t', stream);
11526 asm_fprintf (stream, instr, reg);
11527 fputc ('{', stream);
11529 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11530 if (mask & (1 << i))
11533 fprintf (stream, ", ");
11535 asm_fprintf (stream, "%r", i);
11540 fprintf (stream, "}^\n");
11542 fprintf (stream, "}\n");
11546 /* Output a FLDMD instruction to STREAM.
11547 BASE if the register containing the address.
11548 REG and COUNT specify the register range.
11549 Extra registers may be added to avoid hardware bugs.
11551 We output FLDMD even for ARMv5 VFP implementations. Although
11552 FLDMD is technically not supported until ARMv6, it is believed
11553 that all VFP implementations support its use in this context. */
11556 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11560 /* Workaround ARM10 VFPr1 bug. */
11561 if (count == 2 && !arm_arch6)
11568 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11569 load into multiple parts if we have to handle more than 16 registers. */
11572 vfp_output_fldmd (stream, base, reg, 16);
11573 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11577 fputc ('\t', stream);
11578 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11580 for (i = reg; i < reg + count; i++)
11583 fputs (", ", stream);
11584 asm_fprintf (stream, "d%d", i);
11586 fputs ("}\n", stream);
11591 /* Output the assembly for a store multiple. */
11594 vfp_output_fstmd (rtx * operands)
11601 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11602 p = strlen (pattern);
11604 gcc_assert (GET_CODE (operands[1]) == REG);
11606 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11607 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11609 p += sprintf (&pattern[p], ", d%d", base + i);
11611 strcpy (&pattern[p], "}");
11613 output_asm_insn (pattern, operands);
11618 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11619 number of bytes pushed. */
11622 vfp_emit_fstmd (int base_reg, int count)
11629 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11630 register pairs are stored by a store multiple insn. We avoid this
11631 by pushing an extra pair. */
11632 if (count == 2 && !arm_arch6)
11634 if (base_reg == LAST_VFP_REGNUM - 3)
11639 /* FSTMD may not store more than 16 doubleword registers at once. Split
11640 larger stores into multiple parts (up to a maximum of two, in
11645 /* NOTE: base_reg is an internal register number, so each D register
11647 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11648 saved += vfp_emit_fstmd (base_reg, 16);
11652 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11653 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11655 reg = gen_rtx_REG (DFmode, base_reg);
11658 XVECEXP (par, 0, 0)
11659 = gen_rtx_SET (VOIDmode,
11662 gen_rtx_PRE_MODIFY (Pmode,
11665 (stack_pointer_rtx,
11668 gen_rtx_UNSPEC (BLKmode,
11669 gen_rtvec (1, reg),
11670 UNSPEC_PUSH_MULT));
11672 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11673 plus_constant (stack_pointer_rtx, -(count * 8)));
11674 RTX_FRAME_RELATED_P (tmp) = 1;
11675 XVECEXP (dwarf, 0, 0) = tmp;
11677 tmp = gen_rtx_SET (VOIDmode,
11678 gen_frame_mem (DFmode, stack_pointer_rtx),
11680 RTX_FRAME_RELATED_P (tmp) = 1;
11681 XVECEXP (dwarf, 0, 1) = tmp;
11683 for (i = 1; i < count; i++)
11685 reg = gen_rtx_REG (DFmode, base_reg);
11687 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11689 tmp = gen_rtx_SET (VOIDmode,
11690 gen_frame_mem (DFmode,
11691 plus_constant (stack_pointer_rtx,
11694 RTX_FRAME_RELATED_P (tmp) = 1;
11695 XVECEXP (dwarf, 0, i + 1) = tmp;
11698 par = emit_insn (par);
11699 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11700 RTX_FRAME_RELATED_P (par) = 1;
11705 /* Emit a call instruction with pattern PAT. ADDR is the address of
11706 the call target. */
11709 arm_emit_call_insn (rtx pat, rtx addr)
11713 insn = emit_call_insn (pat);
11715 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11716 If the call might use such an entry, add a use of the PIC register
11717 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11718 if (TARGET_VXWORKS_RTP
11720 && GET_CODE (addr) == SYMBOL_REF
11721 && (SYMBOL_REF_DECL (addr)
11722 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11723 : !SYMBOL_REF_LOCAL_P (addr)))
11725 require_pic_register ();
11726 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11730 /* Output a 'call' insn. */
11732 output_call (rtx *operands)
11734 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11736 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11737 if (REGNO (operands[0]) == LR_REGNUM)
11739 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11740 output_asm_insn ("mov%?\t%0, %|lr", operands);
11743 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11745 if (TARGET_INTERWORK || arm_arch4t)
11746 output_asm_insn ("bx%?\t%0", operands);
11748 output_asm_insn ("mov%?\t%|pc, %0", operands);
11753 /* Output a 'call' insn that is a reference in memory. This is
11754 disabled for ARMv5 and we prefer a blx instead because otherwise
11755 there's a significant performance overhead. */
11757 output_call_mem (rtx *operands)
11759 gcc_assert (!arm_arch5);
11760 if (TARGET_INTERWORK)
11762 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11763 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11764 output_asm_insn ("bx%?\t%|ip", operands);
11766 else if (regno_use_in (LR_REGNUM, operands[0]))
11768 /* LR is used in the memory address. We load the address in the
11769 first instruction. It's safe to use IP as the target of the
11770 load since the call will kill it anyway. */
11771 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11772 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11774 output_asm_insn ("bx%?\t%|ip", operands);
11776 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11780 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11781 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11788 /* Output a move from arm registers to an fpa registers.
11789 OPERANDS[0] is an fpa register.
11790 OPERANDS[1] is the first registers of an arm register pair. */
11792 output_mov_long_double_fpa_from_arm (rtx *operands)
11794 int arm_reg0 = REGNO (operands[1]);
11797 gcc_assert (arm_reg0 != IP_REGNUM);
11799 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11800 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11801 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11803 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11804 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11809 /* Output a move from an fpa register to arm registers.
11810 OPERANDS[0] is the first registers of an arm register pair.
11811 OPERANDS[1] is an fpa register. */
11813 output_mov_long_double_arm_from_fpa (rtx *operands)
11815 int arm_reg0 = REGNO (operands[0]);
11818 gcc_assert (arm_reg0 != IP_REGNUM);
11820 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11821 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11822 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11824 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11825 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11829 /* Output a move from arm registers to arm registers of a long double
11830 OPERANDS[0] is the destination.
11831 OPERANDS[1] is the source. */
11833 output_mov_long_double_arm_from_arm (rtx *operands)
11835 /* We have to be careful here because the two might overlap. */
11836 int dest_start = REGNO (operands[0]);
11837 int src_start = REGNO (operands[1]);
11841 if (dest_start < src_start)
11843 for (i = 0; i < 3; i++)
11845 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11846 ops[1] = gen_rtx_REG (SImode, src_start + i);
11847 output_asm_insn ("mov%?\t%0, %1", ops);
11852 for (i = 2; i >= 0; i--)
11854 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11855 ops[1] = gen_rtx_REG (SImode, src_start + i);
11856 output_asm_insn ("mov%?\t%0, %1", ops);
11864 arm_emit_movpair (rtx dest, rtx src)
11866 /* If the src is an immediate, simplify it. */
11867 if (CONST_INT_P (src))
11869 HOST_WIDE_INT val = INTVAL (src);
11870 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11871 if ((val >> 16) & 0x0000ffff)
11872 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11874 GEN_INT ((val >> 16) & 0x0000ffff));
11877 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11878 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11881 /* Output a move from arm registers to an fpa registers.
11882 OPERANDS[0] is an fpa register.
11883 OPERANDS[1] is the first registers of an arm register pair. */
11885 output_mov_double_fpa_from_arm (rtx *operands)
11887 int arm_reg0 = REGNO (operands[1]);
11890 gcc_assert (arm_reg0 != IP_REGNUM);
11892 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11893 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11894 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11895 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11899 /* Output a move from an fpa register to arm registers.
11900 OPERANDS[0] is the first registers of an arm register pair.
11901 OPERANDS[1] is an fpa register. */
11903 output_mov_double_arm_from_fpa (rtx *operands)
11905 int arm_reg0 = REGNO (operands[0]);
11908 gcc_assert (arm_reg0 != IP_REGNUM);
11910 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11911 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11912 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11913 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11917 /* Output a move between double words.
11918 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11919 or MEM<-REG and all MEMs must be offsettable addresses. */
11921 output_move_double (rtx *operands)
11923 enum rtx_code code0 = GET_CODE (operands[0]);
11924 enum rtx_code code1 = GET_CODE (operands[1]);
11929 unsigned int reg0 = REGNO (operands[0]);
11931 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11933 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11935 switch (GET_CODE (XEXP (operands[1], 0)))
11939 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11940 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11942 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11946 gcc_assert (TARGET_LDRD);
11947 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11952 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11954 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11959 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11961 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11965 gcc_assert (TARGET_LDRD);
11966 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11971 /* Autoicrement addressing modes should never have overlapping
11972 base and destination registers, and overlapping index registers
11973 are already prohibited, so this doesn't need to worry about
11975 otherops[0] = operands[0];
11976 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11977 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11979 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11981 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11983 /* Registers overlap so split out the increment. */
11984 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11989 /* Use a single insn if we can.
11990 FIXME: IWMMXT allows offsets larger than ldrd can
11991 handle, fix these up with a pair of ldr. */
11993 || GET_CODE (otherops[2]) != CONST_INT
11994 || (INTVAL (otherops[2]) > -256
11995 && INTVAL (otherops[2]) < 256))
11996 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11999 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12000 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12006 /* Use a single insn if we can.
12007 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12008 fix these up with a pair of ldr. */
12010 || GET_CODE (otherops[2]) != CONST_INT
12011 || (INTVAL (otherops[2]) > -256
12012 && INTVAL (otherops[2]) < 256))
12013 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12016 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12017 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12024 /* We might be able to use ldrd %0, %1 here. However the range is
12025 different to ldr/adr, and it is broken on some ARMv7-M
12026 implementations. */
12027 /* Use the second register of the pair to avoid problematic
12029 otherops[1] = operands[1];
12030 output_asm_insn ("adr%?\t%0, %1", otherops);
12031 operands[1] = otherops[0];
12033 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12035 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12038 /* ??? This needs checking for thumb2. */
12040 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12041 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12043 otherops[0] = operands[0];
12044 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12045 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12047 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12049 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12051 switch ((int) INTVAL (otherops[2]))
12054 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12059 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12064 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12068 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12069 operands[1] = otherops[0];
12071 && (GET_CODE (otherops[2]) == REG
12073 || (GET_CODE (otherops[2]) == CONST_INT
12074 && INTVAL (otherops[2]) > -256
12075 && INTVAL (otherops[2]) < 256)))
12077 if (reg_overlap_mentioned_p (operands[0],
12081 /* Swap base and index registers over to
12082 avoid a conflict. */
12084 otherops[1] = otherops[2];
12087 /* If both registers conflict, it will usually
12088 have been fixed by a splitter. */
12089 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12090 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12092 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12093 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12097 otherops[0] = operands[0];
12098 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12103 if (GET_CODE (otherops[2]) == CONST_INT)
12105 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12106 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12108 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12111 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12114 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12117 return "ldr%(d%)\t%0, [%1]";
12119 return "ldm%(ia%)\t%1, %M0";
12123 otherops[1] = adjust_address (operands[1], SImode, 4);
12124 /* Take care of overlapping base/data reg. */
12125 if (reg_mentioned_p (operands[0], operands[1]))
12127 output_asm_insn ("ldr%?\t%0, %1", otherops);
12128 output_asm_insn ("ldr%?\t%0, %1", operands);
12132 output_asm_insn ("ldr%?\t%0, %1", operands);
12133 output_asm_insn ("ldr%?\t%0, %1", otherops);
12140 /* Constraints should ensure this. */
12141 gcc_assert (code0 == MEM && code1 == REG);
12142 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12144 switch (GET_CODE (XEXP (operands[0], 0)))
12148 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12150 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12154 gcc_assert (TARGET_LDRD);
12155 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12160 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12162 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12167 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12169 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12173 gcc_assert (TARGET_LDRD);
12174 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12179 otherops[0] = operands[1];
12180 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12181 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12183 /* IWMMXT allows offsets larger than ldrd can handle,
12184 fix these up with a pair of ldr. */
12186 && GET_CODE (otherops[2]) == CONST_INT
12187 && (INTVAL(otherops[2]) <= -256
12188 || INTVAL(otherops[2]) >= 256))
12190 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12192 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12193 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12197 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12198 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12201 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12202 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12204 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12208 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12209 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12211 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12214 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12220 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12226 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12231 && (GET_CODE (otherops[2]) == REG
12233 || (GET_CODE (otherops[2]) == CONST_INT
12234 && INTVAL (otherops[2]) > -256
12235 && INTVAL (otherops[2]) < 256)))
12237 otherops[0] = operands[1];
12238 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12239 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12245 otherops[0] = adjust_address (operands[0], SImode, 4);
12246 otherops[1] = operands[1];
12247 output_asm_insn ("str%?\t%1, %0", operands);
12248 output_asm_insn ("str%?\t%H1, %0", otherops);
12255 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12256 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12259 output_move_quad (rtx *operands)
12261 if (REG_P (operands[0]))
12263 /* Load, or reg->reg move. */
12265 if (MEM_P (operands[1]))
12267 switch (GET_CODE (XEXP (operands[1], 0)))
12270 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12275 output_asm_insn ("adr%?\t%0, %1", operands);
12276 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12280 gcc_unreachable ();
12288 gcc_assert (REG_P (operands[1]));
12290 dest = REGNO (operands[0]);
12291 src = REGNO (operands[1]);
12293 /* This seems pretty dumb, but hopefully GCC won't try to do it
12296 for (i = 0; i < 4; i++)
12298 ops[0] = gen_rtx_REG (SImode, dest + i);
12299 ops[1] = gen_rtx_REG (SImode, src + i);
12300 output_asm_insn ("mov%?\t%0, %1", ops);
12303 for (i = 3; i >= 0; i--)
12305 ops[0] = gen_rtx_REG (SImode, dest + i);
12306 ops[1] = gen_rtx_REG (SImode, src + i);
12307 output_asm_insn ("mov%?\t%0, %1", ops);
12313 gcc_assert (MEM_P (operands[0]));
12314 gcc_assert (REG_P (operands[1]));
12315 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12317 switch (GET_CODE (XEXP (operands[0], 0)))
12320 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12324 gcc_unreachable ();
12331 /* Output a VFP load or store instruction. */
12334 output_move_vfp (rtx *operands)
12336 rtx reg, mem, addr, ops[2];
12337 int load = REG_P (operands[0]);
12338 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12339 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12342 enum machine_mode mode;
12344 reg = operands[!load];
12345 mem = operands[load];
12347 mode = GET_MODE (reg);
12349 gcc_assert (REG_P (reg));
12350 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12351 gcc_assert (mode == SFmode
12355 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12356 gcc_assert (MEM_P (mem));
12358 addr = XEXP (mem, 0);
12360 switch (GET_CODE (addr))
12363 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12364 ops[0] = XEXP (addr, 0);
12369 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12370 ops[0] = XEXP (addr, 0);
12375 templ = "f%s%c%%?\t%%%s0, %%1%s";
12381 sprintf (buff, templ,
12382 load ? "ld" : "st",
12385 integer_p ? "\t%@ int" : "");
12386 output_asm_insn (buff, ops);
12391 /* Output a Neon quad-word load or store, or a load or store for
12392 larger structure modes.
12394 WARNING: The ordering of elements is weird in big-endian mode,
12395 because we use VSTM, as required by the EABI. GCC RTL defines
12396 element ordering based on in-memory order. This can be differ
12397 from the architectural ordering of elements within a NEON register.
12398 The intrinsics defined in arm_neon.h use the NEON register element
12399 ordering, not the GCC RTL element ordering.
12401 For example, the in-memory ordering of a big-endian a quadword
12402 vector with 16-bit elements when stored from register pair {d0,d1}
12403 will be (lowest address first, d0[N] is NEON register element N):
12405 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12407 When necessary, quadword registers (dN, dN+1) are moved to ARM
12408 registers from rN in the order:
12410 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12412 So that STM/LDM can be used on vectors in ARM registers, and the
12413 same memory layout will result as if VSTM/VLDM were used. */
12416 output_move_neon (rtx *operands)
12418 rtx reg, mem, addr, ops[2];
12419 int regno, load = REG_P (operands[0]);
12422 enum machine_mode mode;
12424 reg = operands[!load];
12425 mem = operands[load];
12427 mode = GET_MODE (reg);
12429 gcc_assert (REG_P (reg));
12430 regno = REGNO (reg);
12431 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12432 || NEON_REGNO_OK_FOR_QUAD (regno));
12433 gcc_assert (VALID_NEON_DREG_MODE (mode)
12434 || VALID_NEON_QREG_MODE (mode)
12435 || VALID_NEON_STRUCT_MODE (mode));
12436 gcc_assert (MEM_P (mem));
12438 addr = XEXP (mem, 0);
12440 /* Strip off const from addresses like (const (plus (...))). */
12441 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12442 addr = XEXP (addr, 0);
12444 switch (GET_CODE (addr))
12447 templ = "v%smia%%?\t%%0!, %%h1";
12448 ops[0] = XEXP (addr, 0);
12453 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12454 templ = "v%smdb%%?\t%%0!, %%h1";
12455 ops[0] = XEXP (addr, 0);
12460 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12461 gcc_unreachable ();
12466 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12469 for (i = 0; i < nregs; i++)
12471 /* We're only using DImode here because it's a convenient size. */
12472 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12473 ops[1] = adjust_address (mem, DImode, 8 * i);
12474 if (reg_overlap_mentioned_p (ops[0], mem))
12476 gcc_assert (overlap == -1);
12481 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12482 output_asm_insn (buff, ops);
12487 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12488 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12489 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12490 output_asm_insn (buff, ops);
12497 templ = "v%smia%%?\t%%m0, %%h1";
12502 sprintf (buff, templ, load ? "ld" : "st");
12503 output_asm_insn (buff, ops);
12508 /* Output an ADD r, s, #n where n may be too big for one instruction.
12509 If adding zero to one register, output nothing. */
12511 output_add_immediate (rtx *operands)
12513 HOST_WIDE_INT n = INTVAL (operands[2]);
12515 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12518 output_multi_immediate (operands,
12519 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12522 output_multi_immediate (operands,
12523 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12530 /* Output a multiple immediate operation.
12531 OPERANDS is the vector of operands referred to in the output patterns.
12532 INSTR1 is the output pattern to use for the first constant.
12533 INSTR2 is the output pattern to use for subsequent constants.
12534 IMMED_OP is the index of the constant slot in OPERANDS.
12535 N is the constant value. */
12536 static const char *
12537 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12538 int immed_op, HOST_WIDE_INT n)
12540 #if HOST_BITS_PER_WIDE_INT > 32
12546 /* Quick and easy output. */
12547 operands[immed_op] = const0_rtx;
12548 output_asm_insn (instr1, operands);
12553 const char * instr = instr1;
12555 /* Note that n is never zero here (which would give no output). */
12556 for (i = 0; i < 32; i += 2)
12560 operands[immed_op] = GEN_INT (n & (255 << i));
12561 output_asm_insn (instr, operands);
12571 /* Return the name of a shifter operation. */
12572 static const char *
12573 arm_shift_nmem(enum rtx_code code)
12578 return ARM_LSL_NAME;
12594 /* Return the appropriate ARM instruction for the operation code.
12595 The returned result should not be overwritten. OP is the rtx of the
12596 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12599 arithmetic_instr (rtx op, int shift_first_arg)
12601 switch (GET_CODE (op))
12607 return shift_first_arg ? "rsb" : "sub";
12622 return arm_shift_nmem(GET_CODE(op));
12625 gcc_unreachable ();
12629 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12630 for the operation code. The returned result should not be overwritten.
12631 OP is the rtx code of the shift.
12632 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12634 static const char *
12635 shift_op (rtx op, HOST_WIDE_INT *amountp)
12638 enum rtx_code code = GET_CODE (op);
12640 switch (GET_CODE (XEXP (op, 1)))
12648 *amountp = INTVAL (XEXP (op, 1));
12652 gcc_unreachable ();
12658 gcc_assert (*amountp != -1);
12659 *amountp = 32 - *amountp;
12662 /* Fall through. */
12668 mnem = arm_shift_nmem(code);
12672 /* We never have to worry about the amount being other than a
12673 power of 2, since this case can never be reloaded from a reg. */
12674 gcc_assert (*amountp != -1);
12675 *amountp = int_log2 (*amountp);
12676 return ARM_LSL_NAME;
12679 gcc_unreachable ();
12682 if (*amountp != -1)
12684 /* This is not 100% correct, but follows from the desire to merge
12685 multiplication by a power of 2 with the recognizer for a
12686 shift. >=32 is not a valid shift for "lsl", so we must try and
12687 output a shift that produces the correct arithmetical result.
12688 Using lsr #32 is identical except for the fact that the carry bit
12689 is not set correctly if we set the flags; but we never use the
12690 carry bit from such an operation, so we can ignore that. */
12691 if (code == ROTATERT)
12692 /* Rotate is just modulo 32. */
12694 else if (*amountp != (*amountp & 31))
12696 if (code == ASHIFT)
12701 /* Shifts of 0 are no-ops. */
12709 /* Obtain the shift from the POWER of two. */
12711 static HOST_WIDE_INT
12712 int_log2 (HOST_WIDE_INT power)
12714 HOST_WIDE_INT shift = 0;
12716 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12718 gcc_assert (shift <= 31);
12725 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12726 because /bin/as is horribly restrictive. The judgement about
12727 whether or not each character is 'printable' (and can be output as
12728 is) or not (and must be printed with an octal escape) must be made
12729 with reference to the *host* character set -- the situation is
12730 similar to that discussed in the comments above pp_c_char in
12731 c-pretty-print.c. */
12733 #define MAX_ASCII_LEN 51
12736 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12739 int len_so_far = 0;
12741 fputs ("\t.ascii\t\"", stream);
12743 for (i = 0; i < len; i++)
12747 if (len_so_far >= MAX_ASCII_LEN)
12749 fputs ("\"\n\t.ascii\t\"", stream);
12755 if (c == '\\' || c == '\"')
12757 putc ('\\', stream);
12765 fprintf (stream, "\\%03o", c);
12770 fputs ("\"\n", stream);
12773 /* Compute the register save mask for registers 0 through 12
12774 inclusive. This code is used by arm_compute_save_reg_mask. */
12776 static unsigned long
12777 arm_compute_save_reg0_reg12_mask (void)
12779 unsigned long func_type = arm_current_func_type ();
12780 unsigned long save_reg_mask = 0;
12783 if (IS_INTERRUPT (func_type))
12785 unsigned int max_reg;
12786 /* Interrupt functions must not corrupt any registers,
12787 even call clobbered ones. If this is a leaf function
12788 we can just examine the registers used by the RTL, but
12789 otherwise we have to assume that whatever function is
12790 called might clobber anything, and so we have to save
12791 all the call-clobbered registers as well. */
12792 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12793 /* FIQ handlers have registers r8 - r12 banked, so
12794 we only need to check r0 - r7, Normal ISRs only
12795 bank r14 and r15, so we must check up to r12.
12796 r13 is the stack pointer which is always preserved,
12797 so we do not need to consider it here. */
12802 for (reg = 0; reg <= max_reg; reg++)
12803 if (df_regs_ever_live_p (reg)
12804 || (! current_function_is_leaf && call_used_regs[reg]))
12805 save_reg_mask |= (1 << reg);
12807 /* Also save the pic base register if necessary. */
12809 && !TARGET_SINGLE_PIC_BASE
12810 && arm_pic_register != INVALID_REGNUM
12811 && crtl->uses_pic_offset_table)
12812 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12814 else if (IS_VOLATILE(func_type))
12816 /* For noreturn functions we historically omitted register saves
12817 altogether. However this really messes up debugging. As a
12818 compromise save just the frame pointers. Combined with the link
12819 register saved elsewhere this should be sufficient to get
12821 if (frame_pointer_needed)
12822 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12823 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12824 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12825 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12826 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12830 /* In the normal case we only need to save those registers
12831 which are call saved and which are used by this function. */
12832 for (reg = 0; reg <= 11; reg++)
12833 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12834 save_reg_mask |= (1 << reg);
12836 /* Handle the frame pointer as a special case. */
12837 if (frame_pointer_needed)
12838 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12840 /* If we aren't loading the PIC register,
12841 don't stack it even though it may be live. */
12843 && !TARGET_SINGLE_PIC_BASE
12844 && arm_pic_register != INVALID_REGNUM
12845 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12846 || crtl->uses_pic_offset_table))
12847 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12849 /* The prologue will copy SP into R0, so save it. */
12850 if (IS_STACKALIGN (func_type))
12851 save_reg_mask |= 1;
12854 /* Save registers so the exception handler can modify them. */
12855 if (crtl->calls_eh_return)
12861 reg = EH_RETURN_DATA_REGNO (i);
12862 if (reg == INVALID_REGNUM)
12864 save_reg_mask |= 1 << reg;
12868 return save_reg_mask;
12872 /* Compute the number of bytes used to store the static chain register on the
12873 stack, above the stack frame. We need to know this accurately to get the
12874 alignment of the rest of the stack frame correct. */
12876 static int arm_compute_static_chain_stack_bytes (void)
12878 unsigned long func_type = arm_current_func_type ();
12879 int static_chain_stack_bytes = 0;
12881 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12882 IS_NESTED (func_type) &&
12883 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12884 static_chain_stack_bytes = 4;
12886 return static_chain_stack_bytes;
12890 /* Compute a bit mask of which registers need to be
12891 saved on the stack for the current function.
12892 This is used by arm_get_frame_offsets, which may add extra registers. */
12894 static unsigned long
12895 arm_compute_save_reg_mask (void)
12897 unsigned int save_reg_mask = 0;
12898 unsigned long func_type = arm_current_func_type ();
12901 if (IS_NAKED (func_type))
12902 /* This should never really happen. */
12905 /* If we are creating a stack frame, then we must save the frame pointer,
12906 IP (which will hold the old stack pointer), LR and the PC. */
12907 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12909 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12912 | (1 << PC_REGNUM);
12914 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12916 /* Decide if we need to save the link register.
12917 Interrupt routines have their own banked link register,
12918 so they never need to save it.
12919 Otherwise if we do not use the link register we do not need to save
12920 it. If we are pushing other registers onto the stack however, we
12921 can save an instruction in the epilogue by pushing the link register
12922 now and then popping it back into the PC. This incurs extra memory
12923 accesses though, so we only do it when optimizing for size, and only
12924 if we know that we will not need a fancy return sequence. */
12925 if (df_regs_ever_live_p (LR_REGNUM)
12928 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12929 && !crtl->calls_eh_return))
12930 save_reg_mask |= 1 << LR_REGNUM;
12932 if (cfun->machine->lr_save_eliminated)
12933 save_reg_mask &= ~ (1 << LR_REGNUM);
12935 if (TARGET_REALLY_IWMMXT
12936 && ((bit_count (save_reg_mask)
12937 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12938 arm_compute_static_chain_stack_bytes())
12941 /* The total number of registers that are going to be pushed
12942 onto the stack is odd. We need to ensure that the stack
12943 is 64-bit aligned before we start to save iWMMXt registers,
12944 and also before we start to create locals. (A local variable
12945 might be a double or long long which we will load/store using
12946 an iWMMXt instruction). Therefore we need to push another
12947 ARM register, so that the stack will be 64-bit aligned. We
12948 try to avoid using the arg registers (r0 -r3) as they might be
12949 used to pass values in a tail call. */
12950 for (reg = 4; reg <= 12; reg++)
12951 if ((save_reg_mask & (1 << reg)) == 0)
12955 save_reg_mask |= (1 << reg);
12958 cfun->machine->sibcall_blocked = 1;
12959 save_reg_mask |= (1 << 3);
12963 /* We may need to push an additional register for use initializing the
12964 PIC base register. */
12965 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12966 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12968 reg = thumb_find_work_register (1 << 4);
12969 if (!call_used_regs[reg])
12970 save_reg_mask |= (1 << reg);
12973 return save_reg_mask;
12977 /* Compute a bit mask of which registers need to be
12978 saved on the stack for the current function. */
12979 static unsigned long
12980 thumb1_compute_save_reg_mask (void)
12982 unsigned long mask;
12986 for (reg = 0; reg < 12; reg ++)
12987 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12991 && !TARGET_SINGLE_PIC_BASE
12992 && arm_pic_register != INVALID_REGNUM
12993 && crtl->uses_pic_offset_table)
12994 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12996 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12997 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12998 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13000 /* LR will also be pushed if any lo regs are pushed. */
13001 if (mask & 0xff || thumb_force_lr_save ())
13002 mask |= (1 << LR_REGNUM);
13004 /* Make sure we have a low work register if we need one.
13005 We will need one if we are going to push a high register,
13006 but we are not currently intending to push a low register. */
13007 if ((mask & 0xff) == 0
13008 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13010 /* Use thumb_find_work_register to choose which register
13011 we will use. If the register is live then we will
13012 have to push it. Use LAST_LO_REGNUM as our fallback
13013 choice for the register to select. */
13014 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13015 /* Make sure the register returned by thumb_find_work_register is
13016 not part of the return value. */
13017 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13018 reg = LAST_LO_REGNUM;
13020 if (! call_used_regs[reg])
13024 /* The 504 below is 8 bytes less than 512 because there are two possible
13025 alignment words. We can't tell here if they will be present or not so we
13026 have to play it safe and assume that they are. */
13027 if ((CALLER_INTERWORKING_SLOT_SIZE +
13028 ROUND_UP_WORD (get_frame_size ()) +
13029 crtl->outgoing_args_size) >= 504)
13031 /* This is the same as the code in thumb1_expand_prologue() which
13032 determines which register to use for stack decrement. */
13033 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13034 if (mask & (1 << reg))
13037 if (reg > LAST_LO_REGNUM)
13039 /* Make sure we have a register available for stack decrement. */
13040 mask |= 1 << LAST_LO_REGNUM;
13048 /* Return the number of bytes required to save VFP registers. */
13050 arm_get_vfp_saved_size (void)
13052 unsigned int regno;
13057 /* Space for saved VFP registers. */
13058 if (TARGET_HARD_FLOAT && TARGET_VFP)
13061 for (regno = FIRST_VFP_REGNUM;
13062 regno < LAST_VFP_REGNUM;
13065 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13066 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13070 /* Workaround ARM10 VFPr1 bug. */
13071 if (count == 2 && !arm_arch6)
13073 saved += count * 8;
13082 if (count == 2 && !arm_arch6)
13084 saved += count * 8;
13091 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13092 everything bar the final return instruction. */
13094 output_return_instruction (rtx operand, int really_return, int reverse)
13096 char conditional[10];
13099 unsigned long live_regs_mask;
13100 unsigned long func_type;
13101 arm_stack_offsets *offsets;
13103 func_type = arm_current_func_type ();
13105 if (IS_NAKED (func_type))
13108 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13110 /* If this function was declared non-returning, and we have
13111 found a tail call, then we have to trust that the called
13112 function won't return. */
13117 /* Otherwise, trap an attempted return by aborting. */
13119 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13121 assemble_external_libcall (ops[1]);
13122 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13128 gcc_assert (!cfun->calls_alloca || really_return);
13130 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13132 cfun->machine->return_used_this_function = 1;
13134 offsets = arm_get_frame_offsets ();
13135 live_regs_mask = offsets->saved_regs_mask;
13137 if (live_regs_mask)
13139 const char * return_reg;
13141 /* If we do not have any special requirements for function exit
13142 (e.g. interworking) then we can load the return address
13143 directly into the PC. Otherwise we must load it into LR. */
13145 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13146 return_reg = reg_names[PC_REGNUM];
13148 return_reg = reg_names[LR_REGNUM];
13150 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13152 /* There are three possible reasons for the IP register
13153 being saved. 1) a stack frame was created, in which case
13154 IP contains the old stack pointer, or 2) an ISR routine
13155 corrupted it, or 3) it was saved to align the stack on
13156 iWMMXt. In case 1, restore IP into SP, otherwise just
13158 if (frame_pointer_needed)
13160 live_regs_mask &= ~ (1 << IP_REGNUM);
13161 live_regs_mask |= (1 << SP_REGNUM);
13164 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13167 /* On some ARM architectures it is faster to use LDR rather than
13168 LDM to load a single register. On other architectures, the
13169 cost is the same. In 26 bit mode, or for exception handlers,
13170 we have to use LDM to load the PC so that the CPSR is also
13172 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13173 if (live_regs_mask == (1U << reg))
13176 if (reg <= LAST_ARM_REGNUM
13177 && (reg != LR_REGNUM
13179 || ! IS_INTERRUPT (func_type)))
13181 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13182 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13189 /* Generate the load multiple instruction to restore the
13190 registers. Note we can get here, even if
13191 frame_pointer_needed is true, but only if sp already
13192 points to the base of the saved core registers. */
13193 if (live_regs_mask & (1 << SP_REGNUM))
13195 unsigned HOST_WIDE_INT stack_adjust;
13197 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13198 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13200 if (stack_adjust && arm_arch5 && TARGET_ARM)
13201 if (TARGET_UNIFIED_ASM)
13202 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13204 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13207 /* If we can't use ldmib (SA110 bug),
13208 then try to pop r3 instead. */
13210 live_regs_mask |= 1 << 3;
13212 if (TARGET_UNIFIED_ASM)
13213 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13215 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13219 if (TARGET_UNIFIED_ASM)
13220 sprintf (instr, "pop%s\t{", conditional);
13222 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13224 p = instr + strlen (instr);
13226 for (reg = 0; reg <= SP_REGNUM; reg++)
13227 if (live_regs_mask & (1 << reg))
13229 int l = strlen (reg_names[reg]);
13235 memcpy (p, ", ", 2);
13239 memcpy (p, "%|", 2);
13240 memcpy (p + 2, reg_names[reg], l);
13244 if (live_regs_mask & (1 << LR_REGNUM))
13246 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13247 /* If returning from an interrupt, restore the CPSR. */
13248 if (IS_INTERRUPT (func_type))
13255 output_asm_insn (instr, & operand);
13257 /* See if we need to generate an extra instruction to
13258 perform the actual function return. */
13260 && func_type != ARM_FT_INTERWORKED
13261 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13263 /* The return has already been handled
13264 by loading the LR into the PC. */
13271 switch ((int) ARM_FUNC_TYPE (func_type))
13275 /* ??? This is wrong for unified assembly syntax. */
13276 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13279 case ARM_FT_INTERWORKED:
13280 sprintf (instr, "bx%s\t%%|lr", conditional);
13283 case ARM_FT_EXCEPTION:
13284 /* ??? This is wrong for unified assembly syntax. */
13285 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13289 /* Use bx if it's available. */
13290 if (arm_arch5 || arm_arch4t)
13291 sprintf (instr, "bx%s\t%%|lr", conditional);
13293 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13297 output_asm_insn (instr, & operand);
13303 /* Write the function name into the code section, directly preceding
13304 the function prologue.
13306 Code will be output similar to this:
13308 .ascii "arm_poke_function_name", 0
13311 .word 0xff000000 + (t1 - t0)
13312 arm_poke_function_name
13314 stmfd sp!, {fp, ip, lr, pc}
13317 When performing a stack backtrace, code can inspect the value
13318 of 'pc' stored at 'fp' + 0. If the trace function then looks
13319 at location pc - 12 and the top 8 bits are set, then we know
13320 that there is a function name embedded immediately preceding this
13321 location and has length ((pc[-3]) & 0xff000000).
13323 We assume that pc is declared as a pointer to an unsigned long.
13325 It is of no benefit to output the function name if we are assembling
13326 a leaf function. These function types will not contain a stack
13327 backtrace structure, therefore it is not possible to determine the
13330 arm_poke_function_name (FILE *stream, const char *name)
13332 unsigned long alignlength;
13333 unsigned long length;
13336 length = strlen (name) + 1;
13337 alignlength = ROUND_UP_WORD (length);
13339 ASM_OUTPUT_ASCII (stream, name, length);
13340 ASM_OUTPUT_ALIGN (stream, 2);
13341 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13342 assemble_aligned_integer (UNITS_PER_WORD, x);
13345 /* Place some comments into the assembler stream
13346 describing the current function. */
13348 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13350 unsigned long func_type;
13354 thumb1_output_function_prologue (f, frame_size);
13358 /* Sanity check. */
13359 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13361 func_type = arm_current_func_type ();
13363 switch ((int) ARM_FUNC_TYPE (func_type))
13366 case ARM_FT_NORMAL:
13368 case ARM_FT_INTERWORKED:
13369 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13372 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13375 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13377 case ARM_FT_EXCEPTION:
13378 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13382 if (IS_NAKED (func_type))
13383 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13385 if (IS_VOLATILE (func_type))
13386 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13388 if (IS_NESTED (func_type))
13389 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13390 if (IS_STACKALIGN (func_type))
13391 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13393 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13395 crtl->args.pretend_args_size, frame_size);
13397 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13398 frame_pointer_needed,
13399 cfun->machine->uses_anonymous_args);
13401 if (cfun->machine->lr_save_eliminated)
13402 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13404 if (crtl->calls_eh_return)
13405 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13410 arm_output_epilogue (rtx sibling)
13413 unsigned long saved_regs_mask;
13414 unsigned long func_type;
13415 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13416 frame that is $fp + 4 for a non-variadic function. */
13417 int floats_offset = 0;
13419 FILE * f = asm_out_file;
13420 unsigned int lrm_count = 0;
13421 int really_return = (sibling == NULL);
13423 arm_stack_offsets *offsets;
13425 /* If we have already generated the return instruction
13426 then it is futile to generate anything else. */
13427 if (use_return_insn (FALSE, sibling) &&
13428 (cfun->machine->return_used_this_function != 0))
13431 func_type = arm_current_func_type ();
13433 if (IS_NAKED (func_type))
13434 /* Naked functions don't have epilogues. */
13437 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13441 /* A volatile function should never return. Call abort. */
13442 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13443 assemble_external_libcall (op);
13444 output_asm_insn ("bl\t%a0", &op);
13449 /* If we are throwing an exception, then we really must be doing a
13450 return, so we can't tail-call. */
13451 gcc_assert (!crtl->calls_eh_return || really_return);
13453 offsets = arm_get_frame_offsets ();
13454 saved_regs_mask = offsets->saved_regs_mask;
13457 lrm_count = bit_count (saved_regs_mask);
13459 floats_offset = offsets->saved_args;
13460 /* Compute how far away the floats will be. */
13461 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13462 if (saved_regs_mask & (1 << reg))
13463 floats_offset += 4;
13465 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13467 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13468 int vfp_offset = offsets->frame;
13470 if (TARGET_FPA_EMU2)
13472 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13473 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13475 floats_offset += 12;
13476 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13477 reg, FP_REGNUM, floats_offset - vfp_offset);
13482 start_reg = LAST_FPA_REGNUM;
13484 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13486 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13488 floats_offset += 12;
13490 /* We can't unstack more than four registers at once. */
13491 if (start_reg - reg == 3)
13493 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13494 reg, FP_REGNUM, floats_offset - vfp_offset);
13495 start_reg = reg - 1;
13500 if (reg != start_reg)
13501 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13502 reg + 1, start_reg - reg,
13503 FP_REGNUM, floats_offset - vfp_offset);
13504 start_reg = reg - 1;
13508 /* Just in case the last register checked also needs unstacking. */
13509 if (reg != start_reg)
13510 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13511 reg + 1, start_reg - reg,
13512 FP_REGNUM, floats_offset - vfp_offset);
13515 if (TARGET_HARD_FLOAT && TARGET_VFP)
13519 /* The fldmd insns do not have base+offset addressing
13520 modes, so we use IP to hold the address. */
13521 saved_size = arm_get_vfp_saved_size ();
13523 if (saved_size > 0)
13525 floats_offset += saved_size;
13526 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13527 FP_REGNUM, floats_offset - vfp_offset);
13529 start_reg = FIRST_VFP_REGNUM;
13530 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13532 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13533 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13535 if (start_reg != reg)
13536 vfp_output_fldmd (f, IP_REGNUM,
13537 (start_reg - FIRST_VFP_REGNUM) / 2,
13538 (reg - start_reg) / 2);
13539 start_reg = reg + 2;
13542 if (start_reg != reg)
13543 vfp_output_fldmd (f, IP_REGNUM,
13544 (start_reg - FIRST_VFP_REGNUM) / 2,
13545 (reg - start_reg) / 2);
13550 /* The frame pointer is guaranteed to be non-double-word aligned.
13551 This is because it is set to (old_stack_pointer - 4) and the
13552 old_stack_pointer was double word aligned. Thus the offset to
13553 the iWMMXt registers to be loaded must also be non-double-word
13554 sized, so that the resultant address *is* double-word aligned.
13555 We can ignore floats_offset since that was already included in
13556 the live_regs_mask. */
13557 lrm_count += (lrm_count % 2 ? 2 : 1);
13559 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13560 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13562 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13563 reg, FP_REGNUM, lrm_count * 4);
13568 /* saved_regs_mask should contain the IP, which at the time of stack
13569 frame generation actually contains the old stack pointer. So a
13570 quick way to unwind the stack is just pop the IP register directly
13571 into the stack pointer. */
13572 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13573 saved_regs_mask &= ~ (1 << IP_REGNUM);
13574 saved_regs_mask |= (1 << SP_REGNUM);
13576 /* There are two registers left in saved_regs_mask - LR and PC. We
13577 only need to restore the LR register (the return address), but to
13578 save time we can load it directly into the PC, unless we need a
13579 special function exit sequence, or we are not really returning. */
13581 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13582 && !crtl->calls_eh_return)
13583 /* Delete the LR from the register mask, so that the LR on
13584 the stack is loaded into the PC in the register mask. */
13585 saved_regs_mask &= ~ (1 << LR_REGNUM);
13587 saved_regs_mask &= ~ (1 << PC_REGNUM);
13589 /* We must use SP as the base register, because SP is one of the
13590 registers being restored. If an interrupt or page fault
13591 happens in the ldm instruction, the SP might or might not
13592 have been restored. That would be bad, as then SP will no
13593 longer indicate the safe area of stack, and we can get stack
13594 corruption. Using SP as the base register means that it will
13595 be reset correctly to the original value, should an interrupt
13596 occur. If the stack pointer already points at the right
13597 place, then omit the subtraction. */
13598 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13599 || cfun->calls_alloca)
13600 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13601 4 * bit_count (saved_regs_mask));
13602 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13604 if (IS_INTERRUPT (func_type))
13605 /* Interrupt handlers will have pushed the
13606 IP onto the stack, so restore it now. */
13607 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13611 /* This branch is executed for ARM mode (non-apcs frames) and
13612 Thumb-2 mode. Frame layout is essentially the same for those
13613 cases, except that in ARM mode frame pointer points to the
13614 first saved register, while in Thumb-2 mode the frame pointer points
13615 to the last saved register.
13617 It is possible to make frame pointer point to last saved
13618 register in both cases, and remove some conditionals below.
13619 That means that fp setup in prologue would be just "mov fp, sp"
13620 and sp restore in epilogue would be just "mov sp, fp", whereas
13621 now we have to use add/sub in those cases. However, the value
13622 of that would be marginal, as both mov and add/sub are 32-bit
13623 in ARM mode, and it would require extra conditionals
13624 in arm_expand_prologue to distingish ARM-apcs-frame case
13625 (where frame pointer is required to point at first register)
13626 and ARM-non-apcs-frame. Therefore, such change is postponed
13627 until real need arise. */
13628 unsigned HOST_WIDE_INT amount;
13630 /* Restore stack pointer if necessary. */
13631 if (TARGET_ARM && frame_pointer_needed)
13633 operands[0] = stack_pointer_rtx;
13634 operands[1] = hard_frame_pointer_rtx;
13636 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13637 output_add_immediate (operands);
13641 if (frame_pointer_needed)
13643 /* For Thumb-2 restore sp from the frame pointer.
13644 Operand restrictions mean we have to incrememnt FP, then copy
13646 amount = offsets->locals_base - offsets->saved_regs;
13647 operands[0] = hard_frame_pointer_rtx;
13651 unsigned long count;
13652 operands[0] = stack_pointer_rtx;
13653 amount = offsets->outgoing_args - offsets->saved_regs;
13654 /* pop call clobbered registers if it avoids a
13655 separate stack adjustment. */
13656 count = offsets->saved_regs - offsets->saved_args;
13659 && !crtl->calls_eh_return
13660 && bit_count(saved_regs_mask) * 4 == count
13661 && !IS_INTERRUPT (func_type)
13662 && !crtl->tail_call_emit)
13664 unsigned long mask;
13665 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13667 mask &= ~saved_regs_mask;
13669 while (bit_count (mask) * 4 > amount)
13671 while ((mask & (1 << reg)) == 0)
13673 mask &= ~(1 << reg);
13675 if (bit_count (mask) * 4 == amount) {
13677 saved_regs_mask |= mask;
13684 operands[1] = operands[0];
13685 operands[2] = GEN_INT (amount);
13686 output_add_immediate (operands);
13688 if (frame_pointer_needed)
13689 asm_fprintf (f, "\tmov\t%r, %r\n",
13690 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13693 if (TARGET_FPA_EMU2)
13695 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13696 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13697 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13702 start_reg = FIRST_FPA_REGNUM;
13704 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13706 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13708 if (reg - start_reg == 3)
13710 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13711 start_reg, SP_REGNUM);
13712 start_reg = reg + 1;
13717 if (reg != start_reg)
13718 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13719 start_reg, reg - start_reg,
13722 start_reg = reg + 1;
13726 /* Just in case the last register checked also needs unstacking. */
13727 if (reg != start_reg)
13728 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13729 start_reg, reg - start_reg, SP_REGNUM);
13732 if (TARGET_HARD_FLOAT && TARGET_VFP)
13734 start_reg = FIRST_VFP_REGNUM;
13735 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13737 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13738 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13740 if (start_reg != reg)
13741 vfp_output_fldmd (f, SP_REGNUM,
13742 (start_reg - FIRST_VFP_REGNUM) / 2,
13743 (reg - start_reg) / 2);
13744 start_reg = reg + 2;
13747 if (start_reg != reg)
13748 vfp_output_fldmd (f, SP_REGNUM,
13749 (start_reg - FIRST_VFP_REGNUM) / 2,
13750 (reg - start_reg) / 2);
13753 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13754 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13755 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13757 /* If we can, restore the LR into the PC. */
13758 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13759 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13760 && !IS_STACKALIGN (func_type)
13762 && crtl->args.pretend_args_size == 0
13763 && saved_regs_mask & (1 << LR_REGNUM)
13764 && !crtl->calls_eh_return)
13766 saved_regs_mask &= ~ (1 << LR_REGNUM);
13767 saved_regs_mask |= (1 << PC_REGNUM);
13768 rfe = IS_INTERRUPT (func_type);
13773 /* Load the registers off the stack. If we only have one register
13774 to load use the LDR instruction - it is faster. For Thumb-2
13775 always use pop and the assembler will pick the best instruction.*/
13776 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13777 && !IS_INTERRUPT(func_type))
13779 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13781 else if (saved_regs_mask)
13783 if (saved_regs_mask & (1 << SP_REGNUM))
13784 /* Note - write back to the stack register is not enabled
13785 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13786 in the list of registers and if we add writeback the
13787 instruction becomes UNPREDICTABLE. */
13788 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13790 else if (TARGET_ARM)
13791 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13794 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13797 if (crtl->args.pretend_args_size)
13799 /* Unwind the pre-pushed regs. */
13800 operands[0] = operands[1] = stack_pointer_rtx;
13801 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13802 output_add_immediate (operands);
13806 /* We may have already restored PC directly from the stack. */
13807 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13810 /* Stack adjustment for exception handler. */
13811 if (crtl->calls_eh_return)
13812 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13813 ARM_EH_STACKADJ_REGNUM);
13815 /* Generate the return instruction. */
13816 switch ((int) ARM_FUNC_TYPE (func_type))
13820 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13823 case ARM_FT_EXCEPTION:
13824 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13827 case ARM_FT_INTERWORKED:
13828 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13832 if (IS_STACKALIGN (func_type))
13834 /* See comment in arm_expand_prologue. */
13835 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13837 if (arm_arch5 || arm_arch4t)
13838 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13840 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13848 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13849 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13851 arm_stack_offsets *offsets;
13857 /* Emit any call-via-reg trampolines that are needed for v4t support
13858 of call_reg and call_value_reg type insns. */
13859 for (regno = 0; regno < LR_REGNUM; regno++)
13861 rtx label = cfun->machine->call_via[regno];
13865 switch_to_section (function_section (current_function_decl));
13866 targetm.asm_out.internal_label (asm_out_file, "L",
13867 CODE_LABEL_NUMBER (label));
13868 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13872 /* ??? Probably not safe to set this here, since it assumes that a
13873 function will be emitted as assembly immediately after we generate
13874 RTL for it. This does not happen for inline functions. */
13875 cfun->machine->return_used_this_function = 0;
13877 else /* TARGET_32BIT */
13879 /* We need to take into account any stack-frame rounding. */
13880 offsets = arm_get_frame_offsets ();
13882 gcc_assert (!use_return_insn (FALSE, NULL)
13883 || (cfun->machine->return_used_this_function != 0)
13884 || offsets->saved_regs == offsets->outgoing_args
13885 || frame_pointer_needed);
13887 /* Reset the ARM-specific per-function variables. */
13888 after_arm_reorg = 0;
13892 /* Generate and emit an insn that we will recognize as a push_multi.
13893 Unfortunately, since this insn does not reflect very well the actual
13894 semantics of the operation, we need to annotate the insn for the benefit
13895 of DWARF2 frame unwind information. */
13897 emit_multi_reg_push (unsigned long mask)
13900 int num_dwarf_regs;
13904 int dwarf_par_index;
13907 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13908 if (mask & (1 << i))
13911 gcc_assert (num_regs && num_regs <= 16);
13913 /* We don't record the PC in the dwarf frame information. */
13914 num_dwarf_regs = num_regs;
13915 if (mask & (1 << PC_REGNUM))
13918 /* For the body of the insn we are going to generate an UNSPEC in
13919 parallel with several USEs. This allows the insn to be recognized
13920 by the push_multi pattern in the arm.md file.
13922 The body of the insn looks something like this:
13925 (set (mem:BLK (pre_modify:SI (reg:SI sp)
13926 (const_int:SI <num>)))
13927 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13933 For the frame note however, we try to be more explicit and actually
13934 show each register being stored into the stack frame, plus a (single)
13935 decrement of the stack pointer. We do it this way in order to be
13936 friendly to the stack unwinding code, which only wants to see a single
13937 stack decrement per instruction. The RTL we generate for the note looks
13938 something like this:
13941 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13942 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13943 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
13944 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
13948 FIXME:: In an ideal world the PRE_MODIFY would not exist and
13949 instead we'd have a parallel expression detailing all
13950 the stores to the various memory addresses so that debug
13951 information is more up-to-date. Remember however while writing
13952 this to take care of the constraints with the push instruction.
13954 Note also that this has to be taken care of for the VFP registers.
13956 For more see PR43399. */
13958 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13959 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13960 dwarf_par_index = 1;
13962 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13964 if (mask & (1 << i))
13966 reg = gen_rtx_REG (SImode, i);
13968 XVECEXP (par, 0, 0)
13969 = gen_rtx_SET (VOIDmode,
13972 gen_rtx_PRE_MODIFY (Pmode,
13975 (stack_pointer_rtx,
13978 gen_rtx_UNSPEC (BLKmode,
13979 gen_rtvec (1, reg),
13980 UNSPEC_PUSH_MULT));
13982 if (i != PC_REGNUM)
13984 tmp = gen_rtx_SET (VOIDmode,
13985 gen_frame_mem (SImode, stack_pointer_rtx),
13987 RTX_FRAME_RELATED_P (tmp) = 1;
13988 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13996 for (j = 1, i++; j < num_regs; i++)
13998 if (mask & (1 << i))
14000 reg = gen_rtx_REG (SImode, i);
14002 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14004 if (i != PC_REGNUM)
14007 = gen_rtx_SET (VOIDmode,
14010 plus_constant (stack_pointer_rtx,
14013 RTX_FRAME_RELATED_P (tmp) = 1;
14014 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14021 par = emit_insn (par);
14023 tmp = gen_rtx_SET (VOIDmode,
14025 plus_constant (stack_pointer_rtx, -4 * num_regs));
14026 RTX_FRAME_RELATED_P (tmp) = 1;
14027 XVECEXP (dwarf, 0, 0) = tmp;
14029 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14034 /* Calculate the size of the return value that is passed in registers. */
14036 arm_size_return_regs (void)
14038 enum machine_mode mode;
14040 if (crtl->return_rtx != 0)
14041 mode = GET_MODE (crtl->return_rtx);
14043 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14045 return GET_MODE_SIZE (mode);
14049 emit_sfm (int base_reg, int count)
14056 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14057 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14059 reg = gen_rtx_REG (XFmode, base_reg++);
14061 XVECEXP (par, 0, 0)
14062 = gen_rtx_SET (VOIDmode,
14065 gen_rtx_PRE_MODIFY (Pmode,
14068 (stack_pointer_rtx,
14071 gen_rtx_UNSPEC (BLKmode,
14072 gen_rtvec (1, reg),
14073 UNSPEC_PUSH_MULT));
14074 tmp = gen_rtx_SET (VOIDmode,
14075 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14076 RTX_FRAME_RELATED_P (tmp) = 1;
14077 XVECEXP (dwarf, 0, 1) = tmp;
14079 for (i = 1; i < count; i++)
14081 reg = gen_rtx_REG (XFmode, base_reg++);
14082 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14084 tmp = gen_rtx_SET (VOIDmode,
14085 gen_frame_mem (XFmode,
14086 plus_constant (stack_pointer_rtx,
14089 RTX_FRAME_RELATED_P (tmp) = 1;
14090 XVECEXP (dwarf, 0, i + 1) = tmp;
14093 tmp = gen_rtx_SET (VOIDmode,
14095 plus_constant (stack_pointer_rtx, -12 * count));
14097 RTX_FRAME_RELATED_P (tmp) = 1;
14098 XVECEXP (dwarf, 0, 0) = tmp;
14100 par = emit_insn (par);
14101 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14107 /* Return true if the current function needs to save/restore LR. */
14110 thumb_force_lr_save (void)
14112 return !cfun->machine->lr_save_eliminated
14113 && (!leaf_function_p ()
14114 || thumb_far_jump_used_p ()
14115 || df_regs_ever_live_p (LR_REGNUM));
14119 /* Compute the distance from register FROM to register TO.
14120 These can be the arg pointer (26), the soft frame pointer (25),
14121 the stack pointer (13) or the hard frame pointer (11).
14122 In thumb mode r7 is used as the soft frame pointer, if needed.
14123 Typical stack layout looks like this:
14125 old stack pointer -> | |
14128 | | saved arguments for
14129 | | vararg functions
14132 hard FP & arg pointer -> | | \
14140 soft frame pointer -> | | /
14145 locals base pointer -> | | /
14150 current stack pointer -> | | /
14153 For a given function some or all of these stack components
14154 may not be needed, giving rise to the possibility of
14155 eliminating some of the registers.
14157 The values returned by this function must reflect the behavior
14158 of arm_expand_prologue() and arm_compute_save_reg_mask().
14160 The sign of the number returned reflects the direction of stack
14161 growth, so the values are positive for all eliminations except
14162 from the soft frame pointer to the hard frame pointer.
14164 SFP may point just inside the local variables block to ensure correct
14168 /* Calculate stack offsets. These are used to calculate register elimination
14169 offsets and in prologue/epilogue code. Also calculates which registers
14170 should be saved. */
14172 static arm_stack_offsets *
14173 arm_get_frame_offsets (void)
14175 struct arm_stack_offsets *offsets;
14176 unsigned long func_type;
14180 HOST_WIDE_INT frame_size;
14183 offsets = &cfun->machine->stack_offsets;
14185 /* We need to know if we are a leaf function. Unfortunately, it
14186 is possible to be called after start_sequence has been called,
14187 which causes get_insns to return the insns for the sequence,
14188 not the function, which will cause leaf_function_p to return
14189 the incorrect result.
14191 to know about leaf functions once reload has completed, and the
14192 frame size cannot be changed after that time, so we can safely
14193 use the cached value. */
14195 if (reload_completed)
14198 /* Initially this is the size of the local variables. It will translated
14199 into an offset once we have determined the size of preceding data. */
14200 frame_size = ROUND_UP_WORD (get_frame_size ());
14202 leaf = leaf_function_p ();
14204 /* Space for variadic functions. */
14205 offsets->saved_args = crtl->args.pretend_args_size;
14207 /* In Thumb mode this is incorrect, but never used. */
14208 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14209 arm_compute_static_chain_stack_bytes();
14213 unsigned int regno;
14215 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14216 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14217 saved = core_saved;
14219 /* We know that SP will be doubleword aligned on entry, and we must
14220 preserve that condition at any subroutine call. We also require the
14221 soft frame pointer to be doubleword aligned. */
14223 if (TARGET_REALLY_IWMMXT)
14225 /* Check for the call-saved iWMMXt registers. */
14226 for (regno = FIRST_IWMMXT_REGNUM;
14227 regno <= LAST_IWMMXT_REGNUM;
14229 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14233 func_type = arm_current_func_type ();
14234 if (! IS_VOLATILE (func_type))
14236 /* Space for saved FPA registers. */
14237 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14238 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14241 /* Space for saved VFP registers. */
14242 if (TARGET_HARD_FLOAT && TARGET_VFP)
14243 saved += arm_get_vfp_saved_size ();
14246 else /* TARGET_THUMB1 */
14248 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14249 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14250 saved = core_saved;
14251 if (TARGET_BACKTRACE)
14255 /* Saved registers include the stack frame. */
14256 offsets->saved_regs = offsets->saved_args + saved +
14257 arm_compute_static_chain_stack_bytes();
14258 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14259 /* A leaf function does not need any stack alignment if it has nothing
14261 if (leaf && frame_size == 0)
14263 offsets->outgoing_args = offsets->soft_frame;
14264 offsets->locals_base = offsets->soft_frame;
14268 /* Ensure SFP has the correct alignment. */
14269 if (ARM_DOUBLEWORD_ALIGN
14270 && (offsets->soft_frame & 7))
14272 offsets->soft_frame += 4;
14273 /* Try to align stack by pushing an extra reg. Don't bother doing this
14274 when there is a stack frame as the alignment will be rolled into
14275 the normal stack adjustment. */
14276 if (frame_size + crtl->outgoing_args_size == 0)
14280 /* If it is safe to use r3, then do so. This sometimes
14281 generates better code on Thumb-2 by avoiding the need to
14282 use 32-bit push/pop instructions. */
14283 if (!crtl->tail_call_emit
14284 && arm_size_return_regs () <= 12)
14289 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14291 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14300 offsets->saved_regs += 4;
14301 offsets->saved_regs_mask |= (1 << reg);
14306 offsets->locals_base = offsets->soft_frame + frame_size;
14307 offsets->outgoing_args = (offsets->locals_base
14308 + crtl->outgoing_args_size);
14310 if (ARM_DOUBLEWORD_ALIGN)
14312 /* Ensure SP remains doubleword aligned. */
14313 if (offsets->outgoing_args & 7)
14314 offsets->outgoing_args += 4;
14315 gcc_assert (!(offsets->outgoing_args & 7));
14322 /* Calculate the relative offsets for the different stack pointers. Positive
14323 offsets are in the direction of stack growth. */
14326 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14328 arm_stack_offsets *offsets;
14330 offsets = arm_get_frame_offsets ();
14332 /* OK, now we have enough information to compute the distances.
14333 There must be an entry in these switch tables for each pair
14334 of registers in ELIMINABLE_REGS, even if some of the entries
14335 seem to be redundant or useless. */
14338 case ARG_POINTER_REGNUM:
14341 case THUMB_HARD_FRAME_POINTER_REGNUM:
14344 case FRAME_POINTER_REGNUM:
14345 /* This is the reverse of the soft frame pointer
14346 to hard frame pointer elimination below. */
14347 return offsets->soft_frame - offsets->saved_args;
14349 case ARM_HARD_FRAME_POINTER_REGNUM:
14350 /* This is only non-zero in the case where the static chain register
14351 is stored above the frame. */
14352 return offsets->frame - offsets->saved_args - 4;
14354 case STACK_POINTER_REGNUM:
14355 /* If nothing has been pushed on the stack at all
14356 then this will return -4. This *is* correct! */
14357 return offsets->outgoing_args - (offsets->saved_args + 4);
14360 gcc_unreachable ();
14362 gcc_unreachable ();
14364 case FRAME_POINTER_REGNUM:
14367 case THUMB_HARD_FRAME_POINTER_REGNUM:
14370 case ARM_HARD_FRAME_POINTER_REGNUM:
14371 /* The hard frame pointer points to the top entry in the
14372 stack frame. The soft frame pointer to the bottom entry
14373 in the stack frame. If there is no stack frame at all,
14374 then they are identical. */
14376 return offsets->frame - offsets->soft_frame;
14378 case STACK_POINTER_REGNUM:
14379 return offsets->outgoing_args - offsets->soft_frame;
14382 gcc_unreachable ();
14384 gcc_unreachable ();
14387 /* You cannot eliminate from the stack pointer.
14388 In theory you could eliminate from the hard frame
14389 pointer to the stack pointer, but this will never
14390 happen, since if a stack frame is not needed the
14391 hard frame pointer will never be used. */
14392 gcc_unreachable ();
14396 /* Given FROM and TO register numbers, say whether this elimination is
14397 allowed. Frame pointer elimination is automatically handled.
14399 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14400 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14401 pointer, we must eliminate FRAME_POINTER_REGNUM into
14402 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14403 ARG_POINTER_REGNUM. */
14406 arm_can_eliminate (const int from, const int to)
14408 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14409 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14410 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14411 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14415 /* Emit RTL to save coprocessor registers on function entry. Returns the
14416 number of bytes pushed. */
14419 arm_save_coproc_regs(void)
14421 int saved_size = 0;
14423 unsigned start_reg;
14426 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14427 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14429 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14430 insn = gen_rtx_MEM (V2SImode, insn);
14431 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14432 RTX_FRAME_RELATED_P (insn) = 1;
14436 /* Save any floating point call-saved registers used by this
14438 if (TARGET_FPA_EMU2)
14440 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14441 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14443 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14444 insn = gen_rtx_MEM (XFmode, insn);
14445 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14446 RTX_FRAME_RELATED_P (insn) = 1;
14452 start_reg = LAST_FPA_REGNUM;
14454 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14456 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14458 if (start_reg - reg == 3)
14460 insn = emit_sfm (reg, 4);
14461 RTX_FRAME_RELATED_P (insn) = 1;
14463 start_reg = reg - 1;
14468 if (start_reg != reg)
14470 insn = emit_sfm (reg + 1, start_reg - reg);
14471 RTX_FRAME_RELATED_P (insn) = 1;
14472 saved_size += (start_reg - reg) * 12;
14474 start_reg = reg - 1;
14478 if (start_reg != reg)
14480 insn = emit_sfm (reg + 1, start_reg - reg);
14481 saved_size += (start_reg - reg) * 12;
14482 RTX_FRAME_RELATED_P (insn) = 1;
14485 if (TARGET_HARD_FLOAT && TARGET_VFP)
14487 start_reg = FIRST_VFP_REGNUM;
14489 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14491 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14492 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14494 if (start_reg != reg)
14495 saved_size += vfp_emit_fstmd (start_reg,
14496 (reg - start_reg) / 2);
14497 start_reg = reg + 2;
14500 if (start_reg != reg)
14501 saved_size += vfp_emit_fstmd (start_reg,
14502 (reg - start_reg) / 2);
14508 /* Set the Thumb frame pointer from the stack pointer. */
14511 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14513 HOST_WIDE_INT amount;
14516 amount = offsets->outgoing_args - offsets->locals_base;
14518 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14519 stack_pointer_rtx, GEN_INT (amount)));
14522 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14523 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14524 expects the first two operands to be the same. */
14527 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14529 hard_frame_pointer_rtx));
14533 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14534 hard_frame_pointer_rtx,
14535 stack_pointer_rtx));
14537 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14538 plus_constant (stack_pointer_rtx, amount));
14539 RTX_FRAME_RELATED_P (dwarf) = 1;
14540 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14543 RTX_FRAME_RELATED_P (insn) = 1;
14546 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14549 arm_expand_prologue (void)
14554 unsigned long live_regs_mask;
14555 unsigned long func_type;
14557 int saved_pretend_args = 0;
14558 int saved_regs = 0;
14559 unsigned HOST_WIDE_INT args_to_push;
14560 arm_stack_offsets *offsets;
14562 func_type = arm_current_func_type ();
14564 /* Naked functions don't have prologues. */
14565 if (IS_NAKED (func_type))
14568 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14569 args_to_push = crtl->args.pretend_args_size;
14571 /* Compute which register we will have to save onto the stack. */
14572 offsets = arm_get_frame_offsets ();
14573 live_regs_mask = offsets->saved_regs_mask;
14575 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14577 if (IS_STACKALIGN (func_type))
14582 /* Handle a word-aligned stack pointer. We generate the following:
14587 <save and restore r0 in normal prologue/epilogue>
14591 The unwinder doesn't need to know about the stack realignment.
14592 Just tell it we saved SP in r0. */
14593 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14595 r0 = gen_rtx_REG (SImode, 0);
14596 r1 = gen_rtx_REG (SImode, 1);
14597 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14598 compiler won't choke. */
14599 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14600 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14601 insn = gen_movsi (r0, stack_pointer_rtx);
14602 RTX_FRAME_RELATED_P (insn) = 1;
14603 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14605 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14606 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14609 /* For APCS frames, if IP register is clobbered
14610 when creating frame, save that register in a special
14612 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14614 if (IS_INTERRUPT (func_type))
14616 /* Interrupt functions must not corrupt any registers.
14617 Creating a frame pointer however, corrupts the IP
14618 register, so we must push it first. */
14619 insn = emit_multi_reg_push (1 << IP_REGNUM);
14621 /* Do not set RTX_FRAME_RELATED_P on this insn.
14622 The dwarf stack unwinding code only wants to see one
14623 stack decrement per function, and this is not it. If
14624 this instruction is labeled as being part of the frame
14625 creation sequence then dwarf2out_frame_debug_expr will
14626 die when it encounters the assignment of IP to FP
14627 later on, since the use of SP here establishes SP as
14628 the CFA register and not IP.
14630 Anyway this instruction is not really part of the stack
14631 frame creation although it is part of the prologue. */
14633 else if (IS_NESTED (func_type))
14635 /* The Static chain register is the same as the IP register
14636 used as a scratch register during stack frame creation.
14637 To get around this need to find somewhere to store IP
14638 whilst the frame is being created. We try the following
14641 1. The last argument register.
14642 2. A slot on the stack above the frame. (This only
14643 works if the function is not a varargs function).
14644 3. Register r3, after pushing the argument registers
14647 Note - we only need to tell the dwarf2 backend about the SP
14648 adjustment in the second variant; the static chain register
14649 doesn't need to be unwound, as it doesn't contain a value
14650 inherited from the caller. */
14652 if (df_regs_ever_live_p (3) == false)
14653 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14654 else if (args_to_push == 0)
14658 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14661 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14662 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14665 /* Just tell the dwarf backend that we adjusted SP. */
14666 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14667 plus_constant (stack_pointer_rtx,
14669 RTX_FRAME_RELATED_P (insn) = 1;
14670 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14674 /* Store the args on the stack. */
14675 if (cfun->machine->uses_anonymous_args)
14676 insn = emit_multi_reg_push
14677 ((0xf0 >> (args_to_push / 4)) & 0xf);
14680 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14681 GEN_INT (- args_to_push)));
14683 RTX_FRAME_RELATED_P (insn) = 1;
14685 saved_pretend_args = 1;
14686 fp_offset = args_to_push;
14689 /* Now reuse r3 to preserve IP. */
14690 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14694 insn = emit_set_insn (ip_rtx,
14695 plus_constant (stack_pointer_rtx, fp_offset));
14696 RTX_FRAME_RELATED_P (insn) = 1;
14701 /* Push the argument registers, or reserve space for them. */
14702 if (cfun->machine->uses_anonymous_args)
14703 insn = emit_multi_reg_push
14704 ((0xf0 >> (args_to_push / 4)) & 0xf);
14707 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14708 GEN_INT (- args_to_push)));
14709 RTX_FRAME_RELATED_P (insn) = 1;
14712 /* If this is an interrupt service routine, and the link register
14713 is going to be pushed, and we're not generating extra
14714 push of IP (needed when frame is needed and frame layout if apcs),
14715 subtracting four from LR now will mean that the function return
14716 can be done with a single instruction. */
14717 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14718 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14719 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14722 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14724 emit_set_insn (lr, plus_constant (lr, -4));
14727 if (live_regs_mask)
14729 saved_regs += bit_count (live_regs_mask) * 4;
14730 if (optimize_size && !frame_pointer_needed
14731 && saved_regs == offsets->saved_regs - offsets->saved_args)
14733 /* If no coprocessor registers are being pushed and we don't have
14734 to worry about a frame pointer then push extra registers to
14735 create the stack frame. This is done is a way that does not
14736 alter the frame layout, so is independent of the epilogue. */
14740 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14742 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14743 if (frame && n * 4 >= frame)
14746 live_regs_mask |= (1 << n) - 1;
14747 saved_regs += frame;
14750 insn = emit_multi_reg_push (live_regs_mask);
14751 RTX_FRAME_RELATED_P (insn) = 1;
14754 if (! IS_VOLATILE (func_type))
14755 saved_regs += arm_save_coproc_regs ();
14757 if (frame_pointer_needed && TARGET_ARM)
14759 /* Create the new frame pointer. */
14760 if (TARGET_APCS_FRAME)
14762 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14763 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14764 RTX_FRAME_RELATED_P (insn) = 1;
14766 if (IS_NESTED (func_type))
14768 /* Recover the static chain register. */
14769 if (!df_regs_ever_live_p (3)
14770 || saved_pretend_args)
14771 insn = gen_rtx_REG (SImode, 3);
14772 else /* if (crtl->args.pretend_args_size == 0) */
14774 insn = plus_constant (hard_frame_pointer_rtx, 4);
14775 insn = gen_frame_mem (SImode, insn);
14777 emit_set_insn (ip_rtx, insn);
14778 /* Add a USE to stop propagate_one_insn() from barfing. */
14779 emit_insn (gen_prologue_use (ip_rtx));
14784 insn = GEN_INT (saved_regs - 4);
14785 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14786 stack_pointer_rtx, insn));
14787 RTX_FRAME_RELATED_P (insn) = 1;
14791 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14793 /* This add can produce multiple insns for a large constant, so we
14794 need to get tricky. */
14795 rtx last = get_last_insn ();
14797 amount = GEN_INT (offsets->saved_args + saved_regs
14798 - offsets->outgoing_args);
14800 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14804 last = last ? NEXT_INSN (last) : get_insns ();
14805 RTX_FRAME_RELATED_P (last) = 1;
14807 while (last != insn);
14809 /* If the frame pointer is needed, emit a special barrier that
14810 will prevent the scheduler from moving stores to the frame
14811 before the stack adjustment. */
14812 if (frame_pointer_needed)
14813 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14814 hard_frame_pointer_rtx));
14818 if (frame_pointer_needed && TARGET_THUMB2)
14819 thumb_set_frame_pointer (offsets);
14821 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14823 unsigned long mask;
14825 mask = live_regs_mask;
14826 mask &= THUMB2_WORK_REGS;
14827 if (!IS_NESTED (func_type))
14828 mask |= (1 << IP_REGNUM);
14829 arm_load_pic_register (mask);
14832 /* If we are profiling, make sure no instructions are scheduled before
14833 the call to mcount. Similarly if the user has requested no
14834 scheduling in the prolog. Similarly if we want non-call exceptions
14835 using the EABI unwinder, to prevent faulting instructions from being
14836 swapped with a stack adjustment. */
14837 if (crtl->profile || !TARGET_SCHED_PROLOG
14838 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14839 emit_insn (gen_blockage ());
14841 /* If the link register is being kept alive, with the return address in it,
14842 then make sure that it does not get reused by the ce2 pass. */
14843 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14844 cfun->machine->lr_save_eliminated = 1;
14847 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14849 arm_print_condition (FILE *stream)
14851 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14853 /* Branch conversion is not implemented for Thumb-2. */
14856 output_operand_lossage ("predicated Thumb instruction");
14859 if (current_insn_predicate != NULL)
14861 output_operand_lossage
14862 ("predicated instruction in conditional sequence");
14866 fputs (arm_condition_codes[arm_current_cc], stream);
14868 else if (current_insn_predicate)
14870 enum arm_cond_code code;
14874 output_operand_lossage ("predicated Thumb instruction");
14878 code = get_arm_condition_code (current_insn_predicate);
14879 fputs (arm_condition_codes[code], stream);
14884 /* If CODE is 'd', then the X is a condition operand and the instruction
14885 should only be executed if the condition is true.
14886 if CODE is 'D', then the X is a condition operand and the instruction
14887 should only be executed if the condition is false: however, if the mode
14888 of the comparison is CCFPEmode, then always execute the instruction -- we
14889 do this because in these circumstances !GE does not necessarily imply LT;
14890 in these cases the instruction pattern will take care to make sure that
14891 an instruction containing %d will follow, thereby undoing the effects of
14892 doing this instruction unconditionally.
14893 If CODE is 'N' then X is a floating point operand that must be negated
14895 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14896 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14898 arm_print_operand (FILE *stream, rtx x, int code)
14903 fputs (ASM_COMMENT_START, stream);
14907 fputs (user_label_prefix, stream);
14911 fputs (REGISTER_PREFIX, stream);
14915 arm_print_condition (stream);
14919 /* Nothing in unified syntax, otherwise the current condition code. */
14920 if (!TARGET_UNIFIED_ASM)
14921 arm_print_condition (stream);
14925 /* The current condition code in unified syntax, otherwise nothing. */
14926 if (TARGET_UNIFIED_ASM)
14927 arm_print_condition (stream);
14931 /* The current condition code for a condition code setting instruction.
14932 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14933 if (TARGET_UNIFIED_ASM)
14935 fputc('s', stream);
14936 arm_print_condition (stream);
14940 arm_print_condition (stream);
14941 fputc('s', stream);
14946 /* If the instruction is conditionally executed then print
14947 the current condition code, otherwise print 's'. */
14948 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14949 if (current_insn_predicate)
14950 arm_print_condition (stream);
14952 fputc('s', stream);
14955 /* %# is a "break" sequence. It doesn't output anything, but is used to
14956 separate e.g. operand numbers from following text, if that text consists
14957 of further digits which we don't want to be part of the operand
14965 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14966 r = REAL_VALUE_NEGATE (r);
14967 fprintf (stream, "%s", fp_const_from_val (&r));
14971 /* An integer or symbol address without a preceding # sign. */
14973 switch (GET_CODE (x))
14976 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14980 output_addr_const (stream, x);
14984 gcc_unreachable ();
14989 if (GET_CODE (x) == CONST_INT)
14992 val = ARM_SIGN_EXTEND (~INTVAL (x));
14993 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14997 putc ('~', stream);
14998 output_addr_const (stream, x);
15003 /* The low 16 bits of an immediate constant. */
15004 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15008 fprintf (stream, "%s", arithmetic_instr (x, 1));
15011 /* Truncate Cirrus shift counts. */
15013 if (GET_CODE (x) == CONST_INT)
15015 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15018 arm_print_operand (stream, x, 0);
15022 fprintf (stream, "%s", arithmetic_instr (x, 0));
15030 if (!shift_operator (x, SImode))
15032 output_operand_lossage ("invalid shift operand");
15036 shift = shift_op (x, &val);
15040 fprintf (stream, ", %s ", shift);
15042 arm_print_operand (stream, XEXP (x, 1), 0);
15044 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15049 /* An explanation of the 'Q', 'R' and 'H' register operands:
15051 In a pair of registers containing a DI or DF value the 'Q'
15052 operand returns the register number of the register containing
15053 the least significant part of the value. The 'R' operand returns
15054 the register number of the register containing the most
15055 significant part of the value.
15057 The 'H' operand returns the higher of the two register numbers.
15058 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15059 same as the 'Q' operand, since the most significant part of the
15060 value is held in the lower number register. The reverse is true
15061 on systems where WORDS_BIG_ENDIAN is false.
15063 The purpose of these operands is to distinguish between cases
15064 where the endian-ness of the values is important (for example
15065 when they are added together), and cases where the endian-ness
15066 is irrelevant, but the order of register operations is important.
15067 For example when loading a value from memory into a register
15068 pair, the endian-ness does not matter. Provided that the value
15069 from the lower memory address is put into the lower numbered
15070 register, and the value from the higher address is put into the
15071 higher numbered register, the load will work regardless of whether
15072 the value being loaded is big-wordian or little-wordian. The
15073 order of the two register loads can matter however, if the address
15074 of the memory location is actually held in one of the registers
15075 being overwritten by the load. */
15077 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15079 output_operand_lossage ("invalid operand for code '%c'", code);
15083 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15087 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15089 output_operand_lossage ("invalid operand for code '%c'", code);
15093 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15097 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15099 output_operand_lossage ("invalid operand for code '%c'", code);
15103 asm_fprintf (stream, "%r", REGNO (x) + 1);
15107 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15109 output_operand_lossage ("invalid operand for code '%c'", code);
15113 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15117 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15119 output_operand_lossage ("invalid operand for code '%c'", code);
15123 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15127 asm_fprintf (stream, "%r",
15128 GET_CODE (XEXP (x, 0)) == REG
15129 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15133 asm_fprintf (stream, "{%r-%r}",
15135 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15138 /* Like 'M', but writing doubleword vector registers, for use by Neon
15142 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15143 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15145 asm_fprintf (stream, "{d%d}", regno);
15147 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15152 /* CONST_TRUE_RTX means always -- that's the default. */
15153 if (x == const_true_rtx)
15156 if (!COMPARISON_P (x))
15158 output_operand_lossage ("invalid operand for code '%c'", code);
15162 fputs (arm_condition_codes[get_arm_condition_code (x)],
15167 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15168 want to do that. */
15169 if (x == const_true_rtx)
15171 output_operand_lossage ("instruction never executed");
15174 if (!COMPARISON_P (x))
15176 output_operand_lossage ("invalid operand for code '%c'", code);
15180 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15181 (get_arm_condition_code (x))],
15185 /* Cirrus registers can be accessed in a variety of ways:
15186 single floating point (f)
15187 double floating point (d)
15189 64bit integer (dx). */
15190 case 'W': /* Cirrus register in F mode. */
15191 case 'X': /* Cirrus register in D mode. */
15192 case 'Y': /* Cirrus register in FX mode. */
15193 case 'Z': /* Cirrus register in DX mode. */
15194 gcc_assert (GET_CODE (x) == REG
15195 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15197 fprintf (stream, "mv%s%s",
15199 : code == 'X' ? "d"
15200 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15204 /* Print cirrus register in the mode specified by the register's mode. */
15207 int mode = GET_MODE (x);
15209 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15211 output_operand_lossage ("invalid operand for code '%c'", code);
15215 fprintf (stream, "mv%s%s",
15216 mode == DFmode ? "d"
15217 : mode == SImode ? "fx"
15218 : mode == DImode ? "dx"
15219 : "f", reg_names[REGNO (x)] + 2);
15225 if (GET_CODE (x) != REG
15226 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15227 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15228 /* Bad value for wCG register number. */
15230 output_operand_lossage ("invalid operand for code '%c'", code);
15235 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15238 /* Print an iWMMXt control register name. */
15240 if (GET_CODE (x) != CONST_INT
15242 || INTVAL (x) >= 16)
15243 /* Bad value for wC register number. */
15245 output_operand_lossage ("invalid operand for code '%c'", code);
15251 static const char * wc_reg_names [16] =
15253 "wCID", "wCon", "wCSSF", "wCASF",
15254 "wC4", "wC5", "wC6", "wC7",
15255 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15256 "wC12", "wC13", "wC14", "wC15"
15259 fprintf (stream, wc_reg_names [INTVAL (x)]);
15263 /* Print the high single-precision register of a VFP double-precision
15267 int mode = GET_MODE (x);
15270 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15272 output_operand_lossage ("invalid operand for code '%c'", code);
15277 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15279 output_operand_lossage ("invalid operand for code '%c'", code);
15283 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15287 /* Print a VFP/Neon double precision or quad precision register name. */
15291 int mode = GET_MODE (x);
15292 int is_quad = (code == 'q');
15295 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15297 output_operand_lossage ("invalid operand for code '%c'", code);
15301 if (GET_CODE (x) != REG
15302 || !IS_VFP_REGNUM (REGNO (x)))
15304 output_operand_lossage ("invalid operand for code '%c'", code);
15309 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15310 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15312 output_operand_lossage ("invalid operand for code '%c'", code);
15316 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15317 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15321 /* These two codes print the low/high doubleword register of a Neon quad
15322 register, respectively. For pair-structure types, can also print
15323 low/high quadword registers. */
15327 int mode = GET_MODE (x);
15330 if ((GET_MODE_SIZE (mode) != 16
15331 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15333 output_operand_lossage ("invalid operand for code '%c'", code);
15338 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15340 output_operand_lossage ("invalid operand for code '%c'", code);
15344 if (GET_MODE_SIZE (mode) == 16)
15345 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15346 + (code == 'f' ? 1 : 0));
15348 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15349 + (code == 'f' ? 1 : 0));
15353 /* Print a VFPv3 floating-point constant, represented as an integer
15357 int index = vfp3_const_double_index (x);
15358 gcc_assert (index != -1);
15359 fprintf (stream, "%d", index);
15363 /* Print bits representing opcode features for Neon.
15365 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15366 and polynomials as unsigned.
15368 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15370 Bit 2 is 1 for rounding functions, 0 otherwise. */
15372 /* Identify the type as 's', 'u', 'p' or 'f'. */
15375 HOST_WIDE_INT bits = INTVAL (x);
15376 fputc ("uspf"[bits & 3], stream);
15380 /* Likewise, but signed and unsigned integers are both 'i'. */
15383 HOST_WIDE_INT bits = INTVAL (x);
15384 fputc ("iipf"[bits & 3], stream);
15388 /* As for 'T', but emit 'u' instead of 'p'. */
15391 HOST_WIDE_INT bits = INTVAL (x);
15392 fputc ("usuf"[bits & 3], stream);
15396 /* Bit 2: rounding (vs none). */
15399 HOST_WIDE_INT bits = INTVAL (x);
15400 fputs ((bits & 4) != 0 ? "r" : "", stream);
15404 /* Memory operand for vld1/vst1 instruction. */
15408 bool postinc = FALSE;
15409 gcc_assert (GET_CODE (x) == MEM);
15410 addr = XEXP (x, 0);
15411 if (GET_CODE (addr) == POST_INC)
15414 addr = XEXP (addr, 0);
15416 asm_fprintf (stream, "[%r]", REGNO (addr));
15418 fputs("!", stream);
15422 /* Translate an S register number into a D register number and element index. */
15425 int mode = GET_MODE (x);
15428 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15430 output_operand_lossage ("invalid operand for code '%c'", code);
15435 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15437 output_operand_lossage ("invalid operand for code '%c'", code);
15441 regno = regno - FIRST_VFP_REGNUM;
15442 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15446 /* Register specifier for vld1.16/vst1.16. Translate the S register
15447 number into a D register number and element index. */
15450 int mode = GET_MODE (x);
15453 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15455 output_operand_lossage ("invalid operand for code '%c'", code);
15460 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15462 output_operand_lossage ("invalid operand for code '%c'", code);
15466 regno = regno - FIRST_VFP_REGNUM;
15467 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15474 output_operand_lossage ("missing operand");
15478 switch (GET_CODE (x))
15481 asm_fprintf (stream, "%r", REGNO (x));
15485 output_memory_reference_mode = GET_MODE (x);
15486 output_address (XEXP (x, 0));
15493 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15494 sizeof (fpstr), 0, 1);
15495 fprintf (stream, "#%s", fpstr);
15498 fprintf (stream, "#%s", fp_immediate_constant (x));
15502 gcc_assert (GET_CODE (x) != NEG);
15503 fputc ('#', stream);
15504 if (GET_CODE (x) == HIGH)
15506 fputs (":lower16:", stream);
15510 output_addr_const (stream, x);
15516 /* Target hook for assembling integer objects. The ARM version needs to
15517 handle word-sized values specially. */
15519 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15521 enum machine_mode mode;
15523 if (size == UNITS_PER_WORD && aligned_p)
15525 fputs ("\t.word\t", asm_out_file);
15526 output_addr_const (asm_out_file, x);
15528 /* Mark symbols as position independent. We only do this in the
15529 .text segment, not in the .data segment. */
15530 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15531 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15533 /* See legitimize_pic_address for an explanation of the
15534 TARGET_VXWORKS_RTP check. */
15535 if (TARGET_VXWORKS_RTP
15536 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15537 fputs ("(GOT)", asm_out_file);
15539 fputs ("(GOTOFF)", asm_out_file);
15541 fputc ('\n', asm_out_file);
15545 mode = GET_MODE (x);
15547 if (arm_vector_mode_supported_p (mode))
15551 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15553 units = CONST_VECTOR_NUNITS (x);
15554 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15556 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15557 for (i = 0; i < units; i++)
15559 rtx elt = CONST_VECTOR_ELT (x, i);
15561 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15564 for (i = 0; i < units; i++)
15566 rtx elt = CONST_VECTOR_ELT (x, i);
15567 REAL_VALUE_TYPE rval;
15569 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15572 (rval, GET_MODE_INNER (mode),
15573 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15579 return default_assemble_integer (x, size, aligned_p);
15583 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15587 if (!TARGET_AAPCS_BASED)
15590 default_named_section_asm_out_constructor
15591 : default_named_section_asm_out_destructor) (symbol, priority);
15595 /* Put these in the .init_array section, using a special relocation. */
15596 if (priority != DEFAULT_INIT_PRIORITY)
15599 sprintf (buf, "%s.%.5u",
15600 is_ctor ? ".init_array" : ".fini_array",
15602 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15609 switch_to_section (s);
15610 assemble_align (POINTER_SIZE);
15611 fputs ("\t.word\t", asm_out_file);
15612 output_addr_const (asm_out_file, symbol);
15613 fputs ("(target1)\n", asm_out_file);
15616 /* Add a function to the list of static constructors. */
15619 arm_elf_asm_constructor (rtx symbol, int priority)
15621 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15624 /* Add a function to the list of static destructors. */
15627 arm_elf_asm_destructor (rtx symbol, int priority)
15629 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15632 /* A finite state machine takes care of noticing whether or not instructions
15633 can be conditionally executed, and thus decrease execution time and code
15634 size by deleting branch instructions. The fsm is controlled by
15635 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15637 /* The state of the fsm controlling condition codes are:
15638 0: normal, do nothing special
15639 1: make ASM_OUTPUT_OPCODE not output this instruction
15640 2: make ASM_OUTPUT_OPCODE not output this instruction
15641 3: make instructions conditional
15642 4: make instructions conditional
15644 State transitions (state->state by whom under condition):
15645 0 -> 1 final_prescan_insn if the `target' is a label
15646 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15647 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15648 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15649 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15650 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15651 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15652 (the target insn is arm_target_insn).
15654 If the jump clobbers the conditions then we use states 2 and 4.
15656 A similar thing can be done with conditional return insns.
15658 XXX In case the `target' is an unconditional branch, this conditionalising
15659 of the instructions always reduces code size, but not always execution
15660 time. But then, I want to reduce the code size to somewhere near what
15661 /bin/cc produces. */
15663 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15664 instructions. When a COND_EXEC instruction is seen the subsequent
15665 instructions are scanned so that multiple conditional instructions can be
15666 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15667 specify the length and true/false mask for the IT block. These will be
15668 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15670 /* Returns the index of the ARM condition code string in
15671 `arm_condition_codes'. COMPARISON should be an rtx like
15672 `(eq (...) (...))'. */
15673 static enum arm_cond_code
15674 get_arm_condition_code (rtx comparison)
15676 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15677 enum arm_cond_code code;
15678 enum rtx_code comp_code = GET_CODE (comparison);
15680 if (GET_MODE_CLASS (mode) != MODE_CC)
15681 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15682 XEXP (comparison, 1));
15686 case CC_DNEmode: code = ARM_NE; goto dominance;
15687 case CC_DEQmode: code = ARM_EQ; goto dominance;
15688 case CC_DGEmode: code = ARM_GE; goto dominance;
15689 case CC_DGTmode: code = ARM_GT; goto dominance;
15690 case CC_DLEmode: code = ARM_LE; goto dominance;
15691 case CC_DLTmode: code = ARM_LT; goto dominance;
15692 case CC_DGEUmode: code = ARM_CS; goto dominance;
15693 case CC_DGTUmode: code = ARM_HI; goto dominance;
15694 case CC_DLEUmode: code = ARM_LS; goto dominance;
15695 case CC_DLTUmode: code = ARM_CC;
15698 gcc_assert (comp_code == EQ || comp_code == NE);
15700 if (comp_code == EQ)
15701 return ARM_INVERSE_CONDITION_CODE (code);
15707 case NE: return ARM_NE;
15708 case EQ: return ARM_EQ;
15709 case GE: return ARM_PL;
15710 case LT: return ARM_MI;
15711 default: gcc_unreachable ();
15717 case NE: return ARM_NE;
15718 case EQ: return ARM_EQ;
15719 default: gcc_unreachable ();
15725 case NE: return ARM_MI;
15726 case EQ: return ARM_PL;
15727 default: gcc_unreachable ();
15732 /* These encodings assume that AC=1 in the FPA system control
15733 byte. This allows us to handle all cases except UNEQ and
15737 case GE: return ARM_GE;
15738 case GT: return ARM_GT;
15739 case LE: return ARM_LS;
15740 case LT: return ARM_MI;
15741 case NE: return ARM_NE;
15742 case EQ: return ARM_EQ;
15743 case ORDERED: return ARM_VC;
15744 case UNORDERED: return ARM_VS;
15745 case UNLT: return ARM_LT;
15746 case UNLE: return ARM_LE;
15747 case UNGT: return ARM_HI;
15748 case UNGE: return ARM_PL;
15749 /* UNEQ and LTGT do not have a representation. */
15750 case UNEQ: /* Fall through. */
15751 case LTGT: /* Fall through. */
15752 default: gcc_unreachable ();
15758 case NE: return ARM_NE;
15759 case EQ: return ARM_EQ;
15760 case GE: return ARM_LE;
15761 case GT: return ARM_LT;
15762 case LE: return ARM_GE;
15763 case LT: return ARM_GT;
15764 case GEU: return ARM_LS;
15765 case GTU: return ARM_CC;
15766 case LEU: return ARM_CS;
15767 case LTU: return ARM_HI;
15768 default: gcc_unreachable ();
15774 case LTU: return ARM_CS;
15775 case GEU: return ARM_CC;
15776 default: gcc_unreachable ();
15782 case NE: return ARM_NE;
15783 case EQ: return ARM_EQ;
15784 case GE: return ARM_GE;
15785 case GT: return ARM_GT;
15786 case LE: return ARM_LE;
15787 case LT: return ARM_LT;
15788 case GEU: return ARM_CS;
15789 case GTU: return ARM_HI;
15790 case LEU: return ARM_LS;
15791 case LTU: return ARM_CC;
15792 default: gcc_unreachable ();
15795 default: gcc_unreachable ();
15799 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15802 thumb2_final_prescan_insn (rtx insn)
15804 rtx first_insn = insn;
15805 rtx body = PATTERN (insn);
15807 enum arm_cond_code code;
15811 /* Remove the previous insn from the count of insns to be output. */
15812 if (arm_condexec_count)
15813 arm_condexec_count--;
15815 /* Nothing to do if we are already inside a conditional block. */
15816 if (arm_condexec_count)
15819 if (GET_CODE (body) != COND_EXEC)
15822 /* Conditional jumps are implemented directly. */
15823 if (GET_CODE (insn) == JUMP_INSN)
15826 predicate = COND_EXEC_TEST (body);
15827 arm_current_cc = get_arm_condition_code (predicate);
15829 n = get_attr_ce_count (insn);
15830 arm_condexec_count = 1;
15831 arm_condexec_mask = (1 << n) - 1;
15832 arm_condexec_masklen = n;
15833 /* See if subsequent instructions can be combined into the same block. */
15836 insn = next_nonnote_insn (insn);
15838 /* Jumping into the middle of an IT block is illegal, so a label or
15839 barrier terminates the block. */
15840 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15843 body = PATTERN (insn);
15844 /* USE and CLOBBER aren't really insns, so just skip them. */
15845 if (GET_CODE (body) == USE
15846 || GET_CODE (body) == CLOBBER)
15849 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15850 if (GET_CODE (body) != COND_EXEC)
15852 /* Allow up to 4 conditionally executed instructions in a block. */
15853 n = get_attr_ce_count (insn);
15854 if (arm_condexec_masklen + n > 4)
15857 predicate = COND_EXEC_TEST (body);
15858 code = get_arm_condition_code (predicate);
15859 mask = (1 << n) - 1;
15860 if (arm_current_cc == code)
15861 arm_condexec_mask |= (mask << arm_condexec_masklen);
15862 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15865 arm_condexec_count++;
15866 arm_condexec_masklen += n;
15868 /* A jump must be the last instruction in a conditional block. */
15869 if (GET_CODE(insn) == JUMP_INSN)
15872 /* Restore recog_data (getting the attributes of other insns can
15873 destroy this array, but final.c assumes that it remains intact
15874 across this call). */
15875 extract_constrain_insn_cached (first_insn);
15879 arm_final_prescan_insn (rtx insn)
15881 /* BODY will hold the body of INSN. */
15882 rtx body = PATTERN (insn);
15884 /* This will be 1 if trying to repeat the trick, and things need to be
15885 reversed if it appears to fail. */
15888 /* If we start with a return insn, we only succeed if we find another one. */
15889 int seeking_return = 0;
15891 /* START_INSN will hold the insn from where we start looking. This is the
15892 first insn after the following code_label if REVERSE is true. */
15893 rtx start_insn = insn;
15895 /* If in state 4, check if the target branch is reached, in order to
15896 change back to state 0. */
15897 if (arm_ccfsm_state == 4)
15899 if (insn == arm_target_insn)
15901 arm_target_insn = NULL;
15902 arm_ccfsm_state = 0;
15907 /* If in state 3, it is possible to repeat the trick, if this insn is an
15908 unconditional branch to a label, and immediately following this branch
15909 is the previous target label which is only used once, and the label this
15910 branch jumps to is not too far off. */
15911 if (arm_ccfsm_state == 3)
15913 if (simplejump_p (insn))
15915 start_insn = next_nonnote_insn (start_insn);
15916 if (GET_CODE (start_insn) == BARRIER)
15918 /* XXX Isn't this always a barrier? */
15919 start_insn = next_nonnote_insn (start_insn);
15921 if (GET_CODE (start_insn) == CODE_LABEL
15922 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15923 && LABEL_NUSES (start_insn) == 1)
15928 else if (GET_CODE (body) == RETURN)
15930 start_insn = next_nonnote_insn (start_insn);
15931 if (GET_CODE (start_insn) == BARRIER)
15932 start_insn = next_nonnote_insn (start_insn);
15933 if (GET_CODE (start_insn) == CODE_LABEL
15934 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15935 && LABEL_NUSES (start_insn) == 1)
15938 seeking_return = 1;
15947 gcc_assert (!arm_ccfsm_state || reverse);
15948 if (GET_CODE (insn) != JUMP_INSN)
15951 /* This jump might be paralleled with a clobber of the condition codes
15952 the jump should always come first */
15953 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15954 body = XVECEXP (body, 0, 0);
15957 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15958 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15961 int fail = FALSE, succeed = FALSE;
15962 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15963 int then_not_else = TRUE;
15964 rtx this_insn = start_insn, label = 0;
15966 /* Register the insn jumped to. */
15969 if (!seeking_return)
15970 label = XEXP (SET_SRC (body), 0);
15972 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15973 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15974 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15976 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15977 then_not_else = FALSE;
15979 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15980 seeking_return = 1;
15981 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15983 seeking_return = 1;
15984 then_not_else = FALSE;
15987 gcc_unreachable ();
15989 /* See how many insns this branch skips, and what kind of insns. If all
15990 insns are okay, and the label or unconditional branch to the same
15991 label is not too far away, succeed. */
15992 for (insns_skipped = 0;
15993 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15997 this_insn = next_nonnote_insn (this_insn);
16001 switch (GET_CODE (this_insn))
16004 /* Succeed if it is the target label, otherwise fail since
16005 control falls in from somewhere else. */
16006 if (this_insn == label)
16008 arm_ccfsm_state = 1;
16016 /* Succeed if the following insn is the target label.
16018 If return insns are used then the last insn in a function
16019 will be a barrier. */
16020 this_insn = next_nonnote_insn (this_insn);
16021 if (this_insn && this_insn == label)
16023 arm_ccfsm_state = 1;
16031 /* The AAPCS says that conditional calls should not be
16032 used since they make interworking inefficient (the
16033 linker can't transform BL<cond> into BLX). That's
16034 only a problem if the machine has BLX. */
16041 /* Succeed if the following insn is the target label, or
16042 if the following two insns are a barrier and the
16044 this_insn = next_nonnote_insn (this_insn);
16045 if (this_insn && GET_CODE (this_insn) == BARRIER)
16046 this_insn = next_nonnote_insn (this_insn);
16048 if (this_insn && this_insn == label
16049 && insns_skipped < max_insns_skipped)
16051 arm_ccfsm_state = 1;
16059 /* If this is an unconditional branch to the same label, succeed.
16060 If it is to another label, do nothing. If it is conditional,
16062 /* XXX Probably, the tests for SET and the PC are
16065 scanbody = PATTERN (this_insn);
16066 if (GET_CODE (scanbody) == SET
16067 && GET_CODE (SET_DEST (scanbody)) == PC)
16069 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16070 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16072 arm_ccfsm_state = 2;
16075 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16078 /* Fail if a conditional return is undesirable (e.g. on a
16079 StrongARM), but still allow this if optimizing for size. */
16080 else if (GET_CODE (scanbody) == RETURN
16081 && !use_return_insn (TRUE, NULL)
16084 else if (GET_CODE (scanbody) == RETURN
16087 arm_ccfsm_state = 2;
16090 else if (GET_CODE (scanbody) == PARALLEL)
16092 switch (get_attr_conds (this_insn))
16102 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16107 /* Instructions using or affecting the condition codes make it
16109 scanbody = PATTERN (this_insn);
16110 if (!(GET_CODE (scanbody) == SET
16111 || GET_CODE (scanbody) == PARALLEL)
16112 || get_attr_conds (this_insn) != CONDS_NOCOND)
16115 /* A conditional cirrus instruction must be followed by
16116 a non Cirrus instruction. However, since we
16117 conditionalize instructions in this function and by
16118 the time we get here we can't add instructions
16119 (nops), because shorten_branches() has already been
16120 called, we will disable conditionalizing Cirrus
16121 instructions to be safe. */
16122 if (GET_CODE (scanbody) != USE
16123 && GET_CODE (scanbody) != CLOBBER
16124 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16134 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16135 arm_target_label = CODE_LABEL_NUMBER (label);
16138 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16140 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16142 this_insn = next_nonnote_insn (this_insn);
16143 gcc_assert (!this_insn
16144 || (GET_CODE (this_insn) != BARRIER
16145 && GET_CODE (this_insn) != CODE_LABEL));
16149 /* Oh, dear! we ran off the end.. give up. */
16150 extract_constrain_insn_cached (insn);
16151 arm_ccfsm_state = 0;
16152 arm_target_insn = NULL;
16155 arm_target_insn = this_insn;
16158 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16161 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16163 if (reverse || then_not_else)
16164 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16167 /* Restore recog_data (getting the attributes of other insns can
16168 destroy this array, but final.c assumes that it remains intact
16169 across this call. */
16170 extract_constrain_insn_cached (insn);
16174 /* Output IT instructions. */
16176 thumb2_asm_output_opcode (FILE * stream)
16181 if (arm_condexec_mask)
16183 for (n = 0; n < arm_condexec_masklen; n++)
16184 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16186 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16187 arm_condition_codes[arm_current_cc]);
16188 arm_condexec_mask = 0;
16192 /* Returns true if REGNO is a valid register
16193 for holding a quantity of type MODE. */
16195 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16197 if (GET_MODE_CLASS (mode) == MODE_CC)
16198 return (regno == CC_REGNUM
16199 || (TARGET_HARD_FLOAT && TARGET_VFP
16200 && regno == VFPCC_REGNUM));
16203 /* For the Thumb we only allow values bigger than SImode in
16204 registers 0 - 6, so that there is always a second low
16205 register available to hold the upper part of the value.
16206 We probably we ought to ensure that the register is the
16207 start of an even numbered register pair. */
16208 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16210 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16211 && IS_CIRRUS_REGNUM (regno))
16212 /* We have outlawed SI values in Cirrus registers because they
16213 reside in the lower 32 bits, but SF values reside in the
16214 upper 32 bits. This causes gcc all sorts of grief. We can't
16215 even split the registers into pairs because Cirrus SI values
16216 get sign extended to 64bits-- aldyh. */
16217 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16219 if (TARGET_HARD_FLOAT && TARGET_VFP
16220 && IS_VFP_REGNUM (regno))
16222 if (mode == SFmode || mode == SImode)
16223 return VFP_REGNO_OK_FOR_SINGLE (regno);
16225 if (mode == DFmode)
16226 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16228 /* VFP registers can hold HFmode values, but there is no point in
16229 putting them there unless we have hardware conversion insns. */
16230 if (mode == HFmode)
16231 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16234 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16235 || (VALID_NEON_QREG_MODE (mode)
16236 && NEON_REGNO_OK_FOR_QUAD (regno))
16237 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16238 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16239 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16240 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16241 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16246 if (TARGET_REALLY_IWMMXT)
16248 if (IS_IWMMXT_GR_REGNUM (regno))
16249 return mode == SImode;
16251 if (IS_IWMMXT_REGNUM (regno))
16252 return VALID_IWMMXT_REG_MODE (mode);
16255 /* We allow almost any value to be stored in the general registers.
16256 Restrict doubleword quantities to even register pairs so that we can
16257 use ldrd. Do not allow very large Neon structure opaque modes in
16258 general registers; they would use too many. */
16259 if (regno <= LAST_ARM_REGNUM)
16260 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16261 && ARM_NUM_REGS (mode) <= 4;
16263 if (regno == FRAME_POINTER_REGNUM
16264 || regno == ARG_POINTER_REGNUM)
16265 /* We only allow integers in the fake hard registers. */
16266 return GET_MODE_CLASS (mode) == MODE_INT;
16268 /* The only registers left are the FPA registers
16269 which we only allow to hold FP values. */
16270 return (TARGET_HARD_FLOAT && TARGET_FPA
16271 && GET_MODE_CLASS (mode) == MODE_FLOAT
16272 && regno >= FIRST_FPA_REGNUM
16273 && regno <= LAST_FPA_REGNUM);
16276 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16277 not used in arm mode. */
16280 arm_regno_class (int regno)
16284 if (regno == STACK_POINTER_REGNUM)
16286 if (regno == CC_REGNUM)
16293 if (TARGET_THUMB2 && regno < 8)
16296 if ( regno <= LAST_ARM_REGNUM
16297 || regno == FRAME_POINTER_REGNUM
16298 || regno == ARG_POINTER_REGNUM)
16299 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16301 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16302 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16304 if (IS_CIRRUS_REGNUM (regno))
16305 return CIRRUS_REGS;
16307 if (IS_VFP_REGNUM (regno))
16309 if (regno <= D7_VFP_REGNUM)
16310 return VFP_D0_D7_REGS;
16311 else if (regno <= LAST_LO_VFP_REGNUM)
16312 return VFP_LO_REGS;
16314 return VFP_HI_REGS;
16317 if (IS_IWMMXT_REGNUM (regno))
16318 return IWMMXT_REGS;
16320 if (IS_IWMMXT_GR_REGNUM (regno))
16321 return IWMMXT_GR_REGS;
16326 /* Handle a special case when computing the offset
16327 of an argument from the frame pointer. */
16329 arm_debugger_arg_offset (int value, rtx addr)
16333 /* We are only interested if dbxout_parms() failed to compute the offset. */
16337 /* We can only cope with the case where the address is held in a register. */
16338 if (GET_CODE (addr) != REG)
16341 /* If we are using the frame pointer to point at the argument, then
16342 an offset of 0 is correct. */
16343 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16346 /* If we are using the stack pointer to point at the
16347 argument, then an offset of 0 is correct. */
16348 /* ??? Check this is consistent with thumb2 frame layout. */
16349 if ((TARGET_THUMB || !frame_pointer_needed)
16350 && REGNO (addr) == SP_REGNUM)
16353 /* Oh dear. The argument is pointed to by a register rather
16354 than being held in a register, or being stored at a known
16355 offset from the frame pointer. Since GDB only understands
16356 those two kinds of argument we must translate the address
16357 held in the register into an offset from the frame pointer.
16358 We do this by searching through the insns for the function
16359 looking to see where this register gets its value. If the
16360 register is initialized from the frame pointer plus an offset
16361 then we are in luck and we can continue, otherwise we give up.
16363 This code is exercised by producing debugging information
16364 for a function with arguments like this:
16366 double func (double a, double b, int c, double d) {return d;}
16368 Without this code the stab for parameter 'd' will be set to
16369 an offset of 0 from the frame pointer, rather than 8. */
16371 /* The if() statement says:
16373 If the insn is a normal instruction
16374 and if the insn is setting the value in a register
16375 and if the register being set is the register holding the address of the argument
16376 and if the address is computing by an addition
16377 that involves adding to a register
16378 which is the frame pointer
16383 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16385 if ( GET_CODE (insn) == INSN
16386 && GET_CODE (PATTERN (insn)) == SET
16387 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16388 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16389 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16390 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16391 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16394 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16403 warning (0, "unable to compute real location of stacked parameter");
16404 value = 8; /* XXX magic hack */
16410 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16413 if ((MASK) & insn_flags) \
16414 add_builtin_function ((NAME), (TYPE), (CODE), \
16415 BUILT_IN_MD, NULL, NULL_TREE); \
16419 struct builtin_description
16421 const unsigned int mask;
16422 const enum insn_code icode;
16423 const char * const name;
16424 const enum arm_builtins code;
16425 const enum rtx_code comparison;
16426 const unsigned int flag;
16429 static const struct builtin_description bdesc_2arg[] =
16431 #define IWMMXT_BUILTIN(code, string, builtin) \
16432 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16433 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16435 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16436 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16437 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16438 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16439 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16440 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16441 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16442 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16443 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16444 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16445 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16446 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16447 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16448 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16449 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16450 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16451 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16452 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16453 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16454 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16455 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16456 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16457 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16458 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16459 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16460 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16461 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16462 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16463 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16464 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16465 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16466 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16467 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16468 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16469 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16470 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16471 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16472 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16473 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16474 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16475 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16476 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16477 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16478 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16479 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16480 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16481 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16482 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16483 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16484 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16485 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16486 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16487 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16488 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16489 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16490 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16491 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16492 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16494 #define IWMMXT_BUILTIN2(code, builtin) \
16495 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16497 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16498 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16499 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16500 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16501 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16502 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16503 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16504 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16505 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16506 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16507 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16508 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16509 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16510 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16511 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16512 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16513 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16514 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16515 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16516 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16517 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16518 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16519 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16520 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16521 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16522 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16523 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16524 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16525 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16526 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16527 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16528 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16531 static const struct builtin_description bdesc_1arg[] =
16533 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16534 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16535 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16536 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16537 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16538 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16539 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16540 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16541 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16542 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16543 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16544 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16545 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16546 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16547 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16548 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16549 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16550 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16553 /* Set up all the iWMMXt builtins. This is
16554 not called if TARGET_IWMMXT is zero. */
16557 arm_init_iwmmxt_builtins (void)
16559 const struct builtin_description * d;
16561 tree endlink = void_list_node;
16563 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16564 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16565 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16568 = build_function_type (integer_type_node,
16569 tree_cons (NULL_TREE, integer_type_node, endlink));
16570 tree v8qi_ftype_v8qi_v8qi_int
16571 = build_function_type (V8QI_type_node,
16572 tree_cons (NULL_TREE, V8QI_type_node,
16573 tree_cons (NULL_TREE, V8QI_type_node,
16574 tree_cons (NULL_TREE,
16577 tree v4hi_ftype_v4hi_int
16578 = build_function_type (V4HI_type_node,
16579 tree_cons (NULL_TREE, V4HI_type_node,
16580 tree_cons (NULL_TREE, integer_type_node,
16582 tree v2si_ftype_v2si_int
16583 = build_function_type (V2SI_type_node,
16584 tree_cons (NULL_TREE, V2SI_type_node,
16585 tree_cons (NULL_TREE, integer_type_node,
16587 tree v2si_ftype_di_di
16588 = build_function_type (V2SI_type_node,
16589 tree_cons (NULL_TREE, long_long_integer_type_node,
16590 tree_cons (NULL_TREE, long_long_integer_type_node,
16592 tree di_ftype_di_int
16593 = build_function_type (long_long_integer_type_node,
16594 tree_cons (NULL_TREE, long_long_integer_type_node,
16595 tree_cons (NULL_TREE, integer_type_node,
16597 tree di_ftype_di_int_int
16598 = build_function_type (long_long_integer_type_node,
16599 tree_cons (NULL_TREE, long_long_integer_type_node,
16600 tree_cons (NULL_TREE, integer_type_node,
16601 tree_cons (NULL_TREE,
16604 tree int_ftype_v8qi
16605 = build_function_type (integer_type_node,
16606 tree_cons (NULL_TREE, V8QI_type_node,
16608 tree int_ftype_v4hi
16609 = build_function_type (integer_type_node,
16610 tree_cons (NULL_TREE, V4HI_type_node,
16612 tree int_ftype_v2si
16613 = build_function_type (integer_type_node,
16614 tree_cons (NULL_TREE, V2SI_type_node,
16616 tree int_ftype_v8qi_int
16617 = build_function_type (integer_type_node,
16618 tree_cons (NULL_TREE, V8QI_type_node,
16619 tree_cons (NULL_TREE, integer_type_node,
16621 tree int_ftype_v4hi_int
16622 = build_function_type (integer_type_node,
16623 tree_cons (NULL_TREE, V4HI_type_node,
16624 tree_cons (NULL_TREE, integer_type_node,
16626 tree int_ftype_v2si_int
16627 = build_function_type (integer_type_node,
16628 tree_cons (NULL_TREE, V2SI_type_node,
16629 tree_cons (NULL_TREE, integer_type_node,
16631 tree v8qi_ftype_v8qi_int_int
16632 = build_function_type (V8QI_type_node,
16633 tree_cons (NULL_TREE, V8QI_type_node,
16634 tree_cons (NULL_TREE, integer_type_node,
16635 tree_cons (NULL_TREE,
16638 tree v4hi_ftype_v4hi_int_int
16639 = build_function_type (V4HI_type_node,
16640 tree_cons (NULL_TREE, V4HI_type_node,
16641 tree_cons (NULL_TREE, integer_type_node,
16642 tree_cons (NULL_TREE,
16645 tree v2si_ftype_v2si_int_int
16646 = build_function_type (V2SI_type_node,
16647 tree_cons (NULL_TREE, V2SI_type_node,
16648 tree_cons (NULL_TREE, integer_type_node,
16649 tree_cons (NULL_TREE,
16652 /* Miscellaneous. */
16653 tree v8qi_ftype_v4hi_v4hi
16654 = build_function_type (V8QI_type_node,
16655 tree_cons (NULL_TREE, V4HI_type_node,
16656 tree_cons (NULL_TREE, V4HI_type_node,
16658 tree v4hi_ftype_v2si_v2si
16659 = build_function_type (V4HI_type_node,
16660 tree_cons (NULL_TREE, V2SI_type_node,
16661 tree_cons (NULL_TREE, V2SI_type_node,
16663 tree v2si_ftype_v4hi_v4hi
16664 = build_function_type (V2SI_type_node,
16665 tree_cons (NULL_TREE, V4HI_type_node,
16666 tree_cons (NULL_TREE, V4HI_type_node,
16668 tree v2si_ftype_v8qi_v8qi
16669 = build_function_type (V2SI_type_node,
16670 tree_cons (NULL_TREE, V8QI_type_node,
16671 tree_cons (NULL_TREE, V8QI_type_node,
16673 tree v4hi_ftype_v4hi_di
16674 = build_function_type (V4HI_type_node,
16675 tree_cons (NULL_TREE, V4HI_type_node,
16676 tree_cons (NULL_TREE,
16677 long_long_integer_type_node,
16679 tree v2si_ftype_v2si_di
16680 = build_function_type (V2SI_type_node,
16681 tree_cons (NULL_TREE, V2SI_type_node,
16682 tree_cons (NULL_TREE,
16683 long_long_integer_type_node,
16685 tree void_ftype_int_int
16686 = build_function_type (void_type_node,
16687 tree_cons (NULL_TREE, integer_type_node,
16688 tree_cons (NULL_TREE, integer_type_node,
16691 = build_function_type (long_long_unsigned_type_node, endlink);
16693 = build_function_type (long_long_integer_type_node,
16694 tree_cons (NULL_TREE, V8QI_type_node,
16697 = build_function_type (long_long_integer_type_node,
16698 tree_cons (NULL_TREE, V4HI_type_node,
16701 = build_function_type (long_long_integer_type_node,
16702 tree_cons (NULL_TREE, V2SI_type_node,
16704 tree v2si_ftype_v4hi
16705 = build_function_type (V2SI_type_node,
16706 tree_cons (NULL_TREE, V4HI_type_node,
16708 tree v4hi_ftype_v8qi
16709 = build_function_type (V4HI_type_node,
16710 tree_cons (NULL_TREE, V8QI_type_node,
16713 tree di_ftype_di_v4hi_v4hi
16714 = build_function_type (long_long_unsigned_type_node,
16715 tree_cons (NULL_TREE,
16716 long_long_unsigned_type_node,
16717 tree_cons (NULL_TREE, V4HI_type_node,
16718 tree_cons (NULL_TREE,
16722 tree di_ftype_v4hi_v4hi
16723 = build_function_type (long_long_unsigned_type_node,
16724 tree_cons (NULL_TREE, V4HI_type_node,
16725 tree_cons (NULL_TREE, V4HI_type_node,
16728 /* Normal vector binops. */
16729 tree v8qi_ftype_v8qi_v8qi
16730 = build_function_type (V8QI_type_node,
16731 tree_cons (NULL_TREE, V8QI_type_node,
16732 tree_cons (NULL_TREE, V8QI_type_node,
16734 tree v4hi_ftype_v4hi_v4hi
16735 = build_function_type (V4HI_type_node,
16736 tree_cons (NULL_TREE, V4HI_type_node,
16737 tree_cons (NULL_TREE, V4HI_type_node,
16739 tree v2si_ftype_v2si_v2si
16740 = build_function_type (V2SI_type_node,
16741 tree_cons (NULL_TREE, V2SI_type_node,
16742 tree_cons (NULL_TREE, V2SI_type_node,
16744 tree di_ftype_di_di
16745 = build_function_type (long_long_unsigned_type_node,
16746 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16747 tree_cons (NULL_TREE,
16748 long_long_unsigned_type_node,
16751 /* Add all builtins that are more or less simple operations on two
16753 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16755 /* Use one of the operands; the target can have a different mode for
16756 mask-generating compares. */
16757 enum machine_mode mode;
16763 mode = insn_data[d->icode].operand[1].mode;
16768 type = v8qi_ftype_v8qi_v8qi;
16771 type = v4hi_ftype_v4hi_v4hi;
16774 type = v2si_ftype_v2si_v2si;
16777 type = di_ftype_di_di;
16781 gcc_unreachable ();
16784 def_mbuiltin (d->mask, d->name, type, d->code);
16787 /* Add the remaining MMX insns with somewhat more complicated types. */
16788 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16789 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16790 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16792 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16793 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16794 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16795 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16796 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16797 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16799 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16800 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16801 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16802 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16803 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16808 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16810 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16814 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16817 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16818 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16820 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16824 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16825 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16828 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16832 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16834 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16835 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16837 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16838 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16839 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16842 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16843 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16846 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16848 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16849 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16852 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16853 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16854 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16855 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16856 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16857 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16858 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16859 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16860 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16861 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16862 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16863 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16865 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16866 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16867 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16868 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16870 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16871 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16872 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16874 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16875 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16876 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16880 arm_init_tls_builtins (void)
16884 ftype = build_function_type (ptr_type_node, void_list_node);
16885 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16886 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16888 TREE_NOTHROW (decl) = 1;
16889 TREE_READONLY (decl) = 1;
16892 enum neon_builtin_type_bits {
16908 #define v8qi_UP T_V8QI
16909 #define v4hi_UP T_V4HI
16910 #define v2si_UP T_V2SI
16911 #define v2sf_UP T_V2SF
16913 #define v16qi_UP T_V16QI
16914 #define v8hi_UP T_V8HI
16915 #define v4si_UP T_V4SI
16916 #define v4sf_UP T_V4SF
16917 #define v2di_UP T_V2DI
16922 #define UP(X) X##_UP
16957 NEON_LOADSTRUCTLANE,
16959 NEON_STORESTRUCTLANE,
16968 const neon_itype itype;
16970 const enum insn_code codes[T_MAX];
16971 const unsigned int num_vars;
16972 unsigned int base_fcode;
16973 } neon_builtin_datum;
16975 #define CF(N,X) CODE_FOR_neon_##N##X
16977 #define VAR1(T, N, A) \
16978 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16979 #define VAR2(T, N, A, B) \
16980 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16981 #define VAR3(T, N, A, B, C) \
16982 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16983 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16984 #define VAR4(T, N, A, B, C, D) \
16985 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16986 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16987 #define VAR5(T, N, A, B, C, D, E) \
16988 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16989 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16990 #define VAR6(T, N, A, B, C, D, E, F) \
16991 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16992 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16993 #define VAR7(T, N, A, B, C, D, E, F, G) \
16994 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16995 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16997 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16998 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17000 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17001 CF (N, G), CF (N, H) }, 8, 0
17002 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17003 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17004 | UP (H) | UP (I), \
17005 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17006 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17007 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17008 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17009 | UP (H) | UP (I) | UP (J), \
17010 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17011 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17013 /* The mode entries in the following table correspond to the "key" type of the
17014 instruction variant, i.e. equivalent to that which would be specified after
17015 the assembler mnemonic, which usually refers to the last vector operand.
17016 (Signed/unsigned/polynomial types are not differentiated between though, and
17017 are all mapped onto the same mode for a given element size.) The modes
17018 listed per instruction should be the same as those defined for that
17019 instruction's pattern in neon.md.
17020 WARNING: Variants should be listed in the same increasing order as
17021 neon_builtin_type_bits. */
17023 static neon_builtin_datum neon_builtin_data[] =
17025 { VAR10 (BINOP, vadd,
17026 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17027 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17028 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17029 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17030 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17031 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17032 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17033 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17034 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17035 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17036 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17037 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17038 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17039 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17040 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17041 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17042 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17043 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17044 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17045 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17046 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17047 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17048 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17049 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17050 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17051 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17052 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17053 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17054 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17055 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17056 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17057 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17058 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17059 { VAR10 (BINOP, vsub,
17060 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17061 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17062 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17063 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17064 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17065 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17066 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17067 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17068 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17069 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17070 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17071 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17072 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17073 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17074 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17075 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17076 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17077 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17078 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17079 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17080 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17081 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17082 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17083 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17084 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17085 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17086 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17087 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17088 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17089 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17090 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17091 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17092 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17093 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17094 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17095 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17096 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17097 /* FIXME: vget_lane supports more variants than this! */
17098 { VAR10 (GETLANE, vget_lane,
17099 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17100 { VAR10 (SETLANE, vset_lane,
17101 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17102 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17103 { VAR10 (DUP, vdup_n,
17104 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17105 { VAR10 (DUPLANE, vdup_lane,
17106 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17107 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17108 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17109 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17110 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17111 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17112 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17113 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17114 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17115 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17116 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17117 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17118 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17119 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17120 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17121 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17122 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17123 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17124 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17125 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17126 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17127 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17128 { VAR10 (BINOP, vext,
17129 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17130 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17131 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17132 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17133 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17134 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17135 { VAR10 (SELECT, vbsl,
17136 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17137 { VAR1 (VTBL, vtbl1, v8qi) },
17138 { VAR1 (VTBL, vtbl2, v8qi) },
17139 { VAR1 (VTBL, vtbl3, v8qi) },
17140 { VAR1 (VTBL, vtbl4, v8qi) },
17141 { VAR1 (VTBX, vtbx1, v8qi) },
17142 { VAR1 (VTBX, vtbx2, v8qi) },
17143 { VAR1 (VTBX, vtbx3, v8qi) },
17144 { VAR1 (VTBX, vtbx4, v8qi) },
17145 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17146 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17147 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17148 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17149 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17150 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17151 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17152 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17153 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17154 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17155 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17156 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17157 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17158 { VAR10 (LOAD1, vld1,
17159 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17160 { VAR10 (LOAD1LANE, vld1_lane,
17161 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17162 { VAR10 (LOAD1, vld1_dup,
17163 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17164 { VAR10 (STORE1, vst1,
17165 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17166 { VAR10 (STORE1LANE, vst1_lane,
17167 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17168 { VAR9 (LOADSTRUCT,
17169 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17170 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17171 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17172 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17173 { VAR9 (STORESTRUCT, vst2,
17174 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17175 { VAR7 (STORESTRUCTLANE, vst2_lane,
17176 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17177 { VAR9 (LOADSTRUCT,
17178 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17179 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17180 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17181 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17182 { VAR9 (STORESTRUCT, vst3,
17183 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17184 { VAR7 (STORESTRUCTLANE, vst3_lane,
17185 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17186 { VAR9 (LOADSTRUCT, vld4,
17187 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17188 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17189 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17190 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17191 { VAR9 (STORESTRUCT, vst4,
17192 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17193 { VAR7 (STORESTRUCTLANE, vst4_lane,
17194 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17195 { VAR10 (LOGICBINOP, vand,
17196 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17197 { VAR10 (LOGICBINOP, vorr,
17198 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17199 { VAR10 (BINOP, veor,
17200 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17201 { VAR10 (LOGICBINOP, vbic,
17202 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17203 { VAR10 (LOGICBINOP, vorn,
17204 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17220 arm_init_neon_builtins (void)
17222 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17224 tree neon_intQI_type_node;
17225 tree neon_intHI_type_node;
17226 tree neon_polyQI_type_node;
17227 tree neon_polyHI_type_node;
17228 tree neon_intSI_type_node;
17229 tree neon_intDI_type_node;
17230 tree neon_float_type_node;
17232 tree intQI_pointer_node;
17233 tree intHI_pointer_node;
17234 tree intSI_pointer_node;
17235 tree intDI_pointer_node;
17236 tree float_pointer_node;
17238 tree const_intQI_node;
17239 tree const_intHI_node;
17240 tree const_intSI_node;
17241 tree const_intDI_node;
17242 tree const_float_node;
17244 tree const_intQI_pointer_node;
17245 tree const_intHI_pointer_node;
17246 tree const_intSI_pointer_node;
17247 tree const_intDI_pointer_node;
17248 tree const_float_pointer_node;
17250 tree V8QI_type_node;
17251 tree V4HI_type_node;
17252 tree V2SI_type_node;
17253 tree V2SF_type_node;
17254 tree V16QI_type_node;
17255 tree V8HI_type_node;
17256 tree V4SI_type_node;
17257 tree V4SF_type_node;
17258 tree V2DI_type_node;
17260 tree intUQI_type_node;
17261 tree intUHI_type_node;
17262 tree intUSI_type_node;
17263 tree intUDI_type_node;
17265 tree intEI_type_node;
17266 tree intOI_type_node;
17267 tree intCI_type_node;
17268 tree intXI_type_node;
17270 tree V8QI_pointer_node;
17271 tree V4HI_pointer_node;
17272 tree V2SI_pointer_node;
17273 tree V2SF_pointer_node;
17274 tree V16QI_pointer_node;
17275 tree V8HI_pointer_node;
17276 tree V4SI_pointer_node;
17277 tree V4SF_pointer_node;
17278 tree V2DI_pointer_node;
17280 tree void_ftype_pv8qi_v8qi_v8qi;
17281 tree void_ftype_pv4hi_v4hi_v4hi;
17282 tree void_ftype_pv2si_v2si_v2si;
17283 tree void_ftype_pv2sf_v2sf_v2sf;
17284 tree void_ftype_pdi_di_di;
17285 tree void_ftype_pv16qi_v16qi_v16qi;
17286 tree void_ftype_pv8hi_v8hi_v8hi;
17287 tree void_ftype_pv4si_v4si_v4si;
17288 tree void_ftype_pv4sf_v4sf_v4sf;
17289 tree void_ftype_pv2di_v2di_v2di;
17291 tree reinterp_ftype_dreg[5][5];
17292 tree reinterp_ftype_qreg[5][5];
17293 tree dreg_types[5], qreg_types[5];
17295 /* Create distinguished type nodes for NEON vector element types,
17296 and pointers to values of such types, so we can detect them later. */
17297 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17298 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17299 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17300 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17301 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17302 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17303 neon_float_type_node = make_node (REAL_TYPE);
17304 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17305 layout_type (neon_float_type_node);
17307 /* Define typedefs which exactly correspond to the modes we are basing vector
17308 types on. If you change these names you'll need to change
17309 the table used by arm_mangle_type too. */
17310 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17311 "__builtin_neon_qi");
17312 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17313 "__builtin_neon_hi");
17314 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17315 "__builtin_neon_si");
17316 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17317 "__builtin_neon_sf");
17318 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17319 "__builtin_neon_di");
17320 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17321 "__builtin_neon_poly8");
17322 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17323 "__builtin_neon_poly16");
17325 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17326 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17327 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17328 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17329 float_pointer_node = build_pointer_type (neon_float_type_node);
17331 /* Next create constant-qualified versions of the above types. */
17332 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17334 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17336 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17338 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17340 const_float_node = build_qualified_type (neon_float_type_node,
17343 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17344 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17345 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17346 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17347 const_float_pointer_node = build_pointer_type (const_float_node);
17349 /* Now create vector types based on our NEON element types. */
17350 /* 64-bit vectors. */
17352 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17354 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17356 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17358 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17359 /* 128-bit vectors. */
17361 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17363 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17365 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17367 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17369 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17371 /* Unsigned integer types for various mode sizes. */
17372 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17373 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17374 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17375 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17377 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17378 "__builtin_neon_uqi");
17379 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17380 "__builtin_neon_uhi");
17381 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17382 "__builtin_neon_usi");
17383 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17384 "__builtin_neon_udi");
17386 /* Opaque integer types for structures of vectors. */
17387 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17388 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17389 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17390 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17392 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17393 "__builtin_neon_ti");
17394 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17395 "__builtin_neon_ei");
17396 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17397 "__builtin_neon_oi");
17398 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17399 "__builtin_neon_ci");
17400 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17401 "__builtin_neon_xi");
17403 /* Pointers to vector types. */
17404 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17405 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17406 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17407 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17408 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17409 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17410 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17411 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17412 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17414 /* Operations which return results as pairs. */
17415 void_ftype_pv8qi_v8qi_v8qi =
17416 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17417 V8QI_type_node, NULL);
17418 void_ftype_pv4hi_v4hi_v4hi =
17419 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17420 V4HI_type_node, NULL);
17421 void_ftype_pv2si_v2si_v2si =
17422 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17423 V2SI_type_node, NULL);
17424 void_ftype_pv2sf_v2sf_v2sf =
17425 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17426 V2SF_type_node, NULL);
17427 void_ftype_pdi_di_di =
17428 build_function_type_list (void_type_node, intDI_pointer_node,
17429 neon_intDI_type_node, neon_intDI_type_node, NULL);
17430 void_ftype_pv16qi_v16qi_v16qi =
17431 build_function_type_list (void_type_node, V16QI_pointer_node,
17432 V16QI_type_node, V16QI_type_node, NULL);
17433 void_ftype_pv8hi_v8hi_v8hi =
17434 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17435 V8HI_type_node, NULL);
17436 void_ftype_pv4si_v4si_v4si =
17437 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17438 V4SI_type_node, NULL);
17439 void_ftype_pv4sf_v4sf_v4sf =
17440 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17441 V4SF_type_node, NULL);
17442 void_ftype_pv2di_v2di_v2di =
17443 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17444 V2DI_type_node, NULL);
17446 dreg_types[0] = V8QI_type_node;
17447 dreg_types[1] = V4HI_type_node;
17448 dreg_types[2] = V2SI_type_node;
17449 dreg_types[3] = V2SF_type_node;
17450 dreg_types[4] = neon_intDI_type_node;
17452 qreg_types[0] = V16QI_type_node;
17453 qreg_types[1] = V8HI_type_node;
17454 qreg_types[2] = V4SI_type_node;
17455 qreg_types[3] = V4SF_type_node;
17456 qreg_types[4] = V2DI_type_node;
17458 for (i = 0; i < 5; i++)
17461 for (j = 0; j < 5; j++)
17463 reinterp_ftype_dreg[i][j]
17464 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17465 reinterp_ftype_qreg[i][j]
17466 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17470 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17472 neon_builtin_datum *d = &neon_builtin_data[i];
17473 unsigned int j, codeidx = 0;
17475 d->base_fcode = fcode;
17477 for (j = 0; j < T_MAX; j++)
17479 const char* const modenames[] = {
17480 "v8qi", "v4hi", "v2si", "v2sf", "di",
17481 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17485 enum insn_code icode;
17486 int is_load = 0, is_store = 0;
17488 if ((d->bits & (1 << j)) == 0)
17491 icode = d->codes[codeidx++];
17496 case NEON_LOAD1LANE:
17497 case NEON_LOADSTRUCT:
17498 case NEON_LOADSTRUCTLANE:
17500 /* Fall through. */
17502 case NEON_STORE1LANE:
17503 case NEON_STORESTRUCT:
17504 case NEON_STORESTRUCTLANE:
17507 /* Fall through. */
17510 case NEON_LOGICBINOP:
17511 case NEON_SHIFTINSERT:
17518 case NEON_SHIFTIMM:
17519 case NEON_SHIFTACC:
17525 case NEON_LANEMULL:
17526 case NEON_LANEMULH:
17528 case NEON_SCALARMUL:
17529 case NEON_SCALARMULL:
17530 case NEON_SCALARMULH:
17531 case NEON_SCALARMAC:
17537 tree return_type = void_type_node, args = void_list_node;
17539 /* Build a function type directly from the insn_data for this
17540 builtin. The build_function_type() function takes care of
17541 removing duplicates for us. */
17542 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17546 if (is_load && k == 1)
17548 /* Neon load patterns always have the memory operand
17549 (a SImode pointer) in the operand 1 position. We
17550 want a const pointer to the element type in that
17552 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17558 eltype = const_intQI_pointer_node;
17563 eltype = const_intHI_pointer_node;
17568 eltype = const_intSI_pointer_node;
17573 eltype = const_float_pointer_node;
17578 eltype = const_intDI_pointer_node;
17581 default: gcc_unreachable ();
17584 else if (is_store && k == 0)
17586 /* Similarly, Neon store patterns use operand 0 as
17587 the memory location to store to (a SImode pointer).
17588 Use a pointer to the element type of the store in
17590 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17596 eltype = intQI_pointer_node;
17601 eltype = intHI_pointer_node;
17606 eltype = intSI_pointer_node;
17611 eltype = float_pointer_node;
17616 eltype = intDI_pointer_node;
17619 default: gcc_unreachable ();
17624 switch (insn_data[icode].operand[k].mode)
17626 case VOIDmode: eltype = void_type_node; break;
17628 case QImode: eltype = neon_intQI_type_node; break;
17629 case HImode: eltype = neon_intHI_type_node; break;
17630 case SImode: eltype = neon_intSI_type_node; break;
17631 case SFmode: eltype = neon_float_type_node; break;
17632 case DImode: eltype = neon_intDI_type_node; break;
17633 case TImode: eltype = intTI_type_node; break;
17634 case EImode: eltype = intEI_type_node; break;
17635 case OImode: eltype = intOI_type_node; break;
17636 case CImode: eltype = intCI_type_node; break;
17637 case XImode: eltype = intXI_type_node; break;
17638 /* 64-bit vectors. */
17639 case V8QImode: eltype = V8QI_type_node; break;
17640 case V4HImode: eltype = V4HI_type_node; break;
17641 case V2SImode: eltype = V2SI_type_node; break;
17642 case V2SFmode: eltype = V2SF_type_node; break;
17643 /* 128-bit vectors. */
17644 case V16QImode: eltype = V16QI_type_node; break;
17645 case V8HImode: eltype = V8HI_type_node; break;
17646 case V4SImode: eltype = V4SI_type_node; break;
17647 case V4SFmode: eltype = V4SF_type_node; break;
17648 case V2DImode: eltype = V2DI_type_node; break;
17649 default: gcc_unreachable ();
17653 if (k == 0 && !is_store)
17654 return_type = eltype;
17656 args = tree_cons (NULL_TREE, eltype, args);
17659 ftype = build_function_type (return_type, args);
17663 case NEON_RESULTPAIR:
17665 switch (insn_data[icode].operand[1].mode)
17667 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17668 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17669 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17670 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17671 case DImode: ftype = void_ftype_pdi_di_di; break;
17672 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17673 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17674 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17675 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17676 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17677 default: gcc_unreachable ();
17682 case NEON_REINTERP:
17684 /* We iterate over 5 doubleword types, then 5 quadword
17687 switch (insn_data[icode].operand[0].mode)
17689 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17690 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17691 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17692 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17693 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17694 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17695 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17696 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17697 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17698 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17699 default: gcc_unreachable ();
17705 gcc_unreachable ();
17708 gcc_assert (ftype != NULL);
17710 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17712 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17719 arm_init_fp16_builtins (void)
17721 tree fp16_type = make_node (REAL_TYPE);
17722 TYPE_PRECISION (fp16_type) = 16;
17723 layout_type (fp16_type);
17724 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17728 arm_init_builtins (void)
17730 arm_init_tls_builtins ();
17732 if (TARGET_REALLY_IWMMXT)
17733 arm_init_iwmmxt_builtins ();
17736 arm_init_neon_builtins ();
17738 if (arm_fp16_format)
17739 arm_init_fp16_builtins ();
17742 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17744 static const char *
17745 arm_invalid_parameter_type (const_tree t)
17747 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17748 return N_("function parameters cannot have __fp16 type");
17752 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17754 static const char *
17755 arm_invalid_return_type (const_tree t)
17757 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17758 return N_("functions cannot return __fp16 type");
17762 /* Implement TARGET_PROMOTED_TYPE. */
17765 arm_promoted_type (const_tree t)
17767 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17768 return float_type_node;
17772 /* Implement TARGET_CONVERT_TO_TYPE.
17773 Specifically, this hook implements the peculiarity of the ARM
17774 half-precision floating-point C semantics that requires conversions between
17775 __fp16 to or from double to do an intermediate conversion to float. */
17778 arm_convert_to_type (tree type, tree expr)
17780 tree fromtype = TREE_TYPE (expr);
17781 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17783 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17784 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17785 return convert (type, convert (float_type_node, expr));
17789 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17790 This simply adds HFmode as a supported mode; even though we don't
17791 implement arithmetic on this type directly, it's supported by
17792 optabs conversions, much the way the double-word arithmetic is
17793 special-cased in the default hook. */
17796 arm_scalar_mode_supported_p (enum machine_mode mode)
17798 if (mode == HFmode)
17799 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17801 return default_scalar_mode_supported_p (mode);
17804 /* Errors in the source file can cause expand_expr to return const0_rtx
17805 where we expect a vector. To avoid crashing, use one of the vector
17806 clear instructions. */
17809 safe_vector_operand (rtx x, enum machine_mode mode)
17811 if (x != const0_rtx)
17813 x = gen_reg_rtx (mode);
17815 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17816 : gen_rtx_SUBREG (DImode, x, 0)));
17820 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17823 arm_expand_binop_builtin (enum insn_code icode,
17824 tree exp, rtx target)
17827 tree arg0 = CALL_EXPR_ARG (exp, 0);
17828 tree arg1 = CALL_EXPR_ARG (exp, 1);
17829 rtx op0 = expand_normal (arg0);
17830 rtx op1 = expand_normal (arg1);
17831 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17832 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17833 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17835 if (VECTOR_MODE_P (mode0))
17836 op0 = safe_vector_operand (op0, mode0);
17837 if (VECTOR_MODE_P (mode1))
17838 op1 = safe_vector_operand (op1, mode1);
17841 || GET_MODE (target) != tmode
17842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17843 target = gen_reg_rtx (tmode);
17845 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17847 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17848 op0 = copy_to_mode_reg (mode0, op0);
17849 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17850 op1 = copy_to_mode_reg (mode1, op1);
17852 pat = GEN_FCN (icode) (target, op0, op1);
17859 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17862 arm_expand_unop_builtin (enum insn_code icode,
17863 tree exp, rtx target, int do_load)
17866 tree arg0 = CALL_EXPR_ARG (exp, 0);
17867 rtx op0 = expand_normal (arg0);
17868 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17869 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17872 || GET_MODE (target) != tmode
17873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17874 target = gen_reg_rtx (tmode);
17876 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17879 if (VECTOR_MODE_P (mode0))
17880 op0 = safe_vector_operand (op0, mode0);
17882 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17883 op0 = copy_to_mode_reg (mode0, op0);
17886 pat = GEN_FCN (icode) (target, op0);
17894 neon_builtin_compare (const void *a, const void *b)
17896 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17897 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17898 unsigned int soughtcode = key->base_fcode;
17900 if (soughtcode >= memb->base_fcode
17901 && soughtcode < memb->base_fcode + memb->num_vars)
17903 else if (soughtcode < memb->base_fcode)
17909 static enum insn_code
17910 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17912 neon_builtin_datum key, *found;
17915 key.base_fcode = fcode;
17916 found = (neon_builtin_datum *)
17917 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17918 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17919 gcc_assert (found);
17920 idx = fcode - (int) found->base_fcode;
17921 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17924 *itype = found->itype;
17926 return found->codes[idx];
17930 NEON_ARG_COPY_TO_REG,
17935 #define NEON_MAX_BUILTIN_ARGS 5
17937 /* Expand a Neon builtin. */
17939 arm_expand_neon_args (rtx target, int icode, int have_retval,
17944 tree arg[NEON_MAX_BUILTIN_ARGS];
17945 rtx op[NEON_MAX_BUILTIN_ARGS];
17946 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17947 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17952 || GET_MODE (target) != tmode
17953 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17954 target = gen_reg_rtx (tmode);
17956 va_start (ap, exp);
17960 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17962 if (thisarg == NEON_ARG_STOP)
17966 arg[argc] = CALL_EXPR_ARG (exp, argc);
17967 op[argc] = expand_normal (arg[argc]);
17968 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17972 case NEON_ARG_COPY_TO_REG:
17973 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17974 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17975 (op[argc], mode[argc]))
17976 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17979 case NEON_ARG_CONSTANT:
17980 /* FIXME: This error message is somewhat unhelpful. */
17981 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17982 (op[argc], mode[argc]))
17983 error ("argument must be a constant");
17986 case NEON_ARG_STOP:
17987 gcc_unreachable ();
18000 pat = GEN_FCN (icode) (target, op[0]);
18004 pat = GEN_FCN (icode) (target, op[0], op[1]);
18008 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18012 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18016 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18020 gcc_unreachable ();
18026 pat = GEN_FCN (icode) (op[0]);
18030 pat = GEN_FCN (icode) (op[0], op[1]);
18034 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18038 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18042 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18046 gcc_unreachable ();
18057 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18058 constants defined per-instruction or per instruction-variant. Instead, the
18059 required info is looked up in the table neon_builtin_data. */
18061 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18064 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18071 return arm_expand_neon_args (target, icode, 1, exp,
18072 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18076 case NEON_SCALARMUL:
18077 case NEON_SCALARMULL:
18078 case NEON_SCALARMULH:
18079 case NEON_SHIFTINSERT:
18080 case NEON_LOGICBINOP:
18081 return arm_expand_neon_args (target, icode, 1, exp,
18082 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18086 return arm_expand_neon_args (target, icode, 1, exp,
18087 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18088 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18092 case NEON_SHIFTIMM:
18093 return arm_expand_neon_args (target, icode, 1, exp,
18094 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18098 return arm_expand_neon_args (target, icode, 1, exp,
18099 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18103 case NEON_REINTERP:
18104 return arm_expand_neon_args (target, icode, 1, exp,
18105 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18109 return arm_expand_neon_args (target, icode, 1, exp,
18110 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18112 case NEON_RESULTPAIR:
18113 return arm_expand_neon_args (target, icode, 0, exp,
18114 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18118 case NEON_LANEMULL:
18119 case NEON_LANEMULH:
18120 return arm_expand_neon_args (target, icode, 1, exp,
18121 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18122 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18125 return arm_expand_neon_args (target, icode, 1, exp,
18126 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18127 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18129 case NEON_SHIFTACC:
18130 return arm_expand_neon_args (target, icode, 1, exp,
18131 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18132 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18134 case NEON_SCALARMAC:
18135 return arm_expand_neon_args (target, icode, 1, exp,
18136 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18137 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18141 return arm_expand_neon_args (target, icode, 1, exp,
18142 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18146 case NEON_LOADSTRUCT:
18147 return arm_expand_neon_args (target, icode, 1, exp,
18148 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18150 case NEON_LOAD1LANE:
18151 case NEON_LOADSTRUCTLANE:
18152 return arm_expand_neon_args (target, icode, 1, exp,
18153 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18157 case NEON_STORESTRUCT:
18158 return arm_expand_neon_args (target, icode, 0, exp,
18159 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18161 case NEON_STORE1LANE:
18162 case NEON_STORESTRUCTLANE:
18163 return arm_expand_neon_args (target, icode, 0, exp,
18164 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18168 gcc_unreachable ();
18171 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18173 neon_reinterpret (rtx dest, rtx src)
18175 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18178 /* Emit code to place a Neon pair result in memory locations (with equal
18181 neon_emit_pair_result_insn (enum machine_mode mode,
18182 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18185 rtx mem = gen_rtx_MEM (mode, destaddr);
18186 rtx tmp1 = gen_reg_rtx (mode);
18187 rtx tmp2 = gen_reg_rtx (mode);
18189 emit_insn (intfn (tmp1, op1, tmp2, op2));
18191 emit_move_insn (mem, tmp1);
18192 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18193 emit_move_insn (mem, tmp2);
18196 /* Set up operands for a register copy from src to dest, taking care not to
18197 clobber registers in the process.
18198 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18199 be called with a large N, so that should be OK. */
18202 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18204 unsigned int copied = 0, opctr = 0;
18205 unsigned int done = (1 << count) - 1;
18208 while (copied != done)
18210 for (i = 0; i < count; i++)
18214 for (j = 0; good && j < count; j++)
18215 if (i != j && (copied & (1 << j)) == 0
18216 && reg_overlap_mentioned_p (src[j], dest[i]))
18221 operands[opctr++] = dest[i];
18222 operands[opctr++] = src[i];
18228 gcc_assert (opctr == count * 2);
18231 /* Expand an expression EXP that calls a built-in function,
18232 with result going to TARGET if that's convenient
18233 (and in mode MODE if that's convenient).
18234 SUBTARGET may be used as the target for computing one of EXP's operands.
18235 IGNORE is nonzero if the value is to be ignored. */
18238 arm_expand_builtin (tree exp,
18240 rtx subtarget ATTRIBUTE_UNUSED,
18241 enum machine_mode mode ATTRIBUTE_UNUSED,
18242 int ignore ATTRIBUTE_UNUSED)
18244 const struct builtin_description * d;
18245 enum insn_code icode;
18246 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18254 int fcode = DECL_FUNCTION_CODE (fndecl);
18256 enum machine_mode tmode;
18257 enum machine_mode mode0;
18258 enum machine_mode mode1;
18259 enum machine_mode mode2;
18261 if (fcode >= ARM_BUILTIN_NEON_BASE)
18262 return arm_expand_neon_builtin (fcode, exp, target);
18266 case ARM_BUILTIN_TEXTRMSB:
18267 case ARM_BUILTIN_TEXTRMUB:
18268 case ARM_BUILTIN_TEXTRMSH:
18269 case ARM_BUILTIN_TEXTRMUH:
18270 case ARM_BUILTIN_TEXTRMSW:
18271 case ARM_BUILTIN_TEXTRMUW:
18272 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18273 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18274 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18275 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18276 : CODE_FOR_iwmmxt_textrmw);
18278 arg0 = CALL_EXPR_ARG (exp, 0);
18279 arg1 = CALL_EXPR_ARG (exp, 1);
18280 op0 = expand_normal (arg0);
18281 op1 = expand_normal (arg1);
18282 tmode = insn_data[icode].operand[0].mode;
18283 mode0 = insn_data[icode].operand[1].mode;
18284 mode1 = insn_data[icode].operand[2].mode;
18286 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18287 op0 = copy_to_mode_reg (mode0, op0);
18288 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18290 /* @@@ better error message */
18291 error ("selector must be an immediate");
18292 return gen_reg_rtx (tmode);
18295 || GET_MODE (target) != tmode
18296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18297 target = gen_reg_rtx (tmode);
18298 pat = GEN_FCN (icode) (target, op0, op1);
18304 case ARM_BUILTIN_TINSRB:
18305 case ARM_BUILTIN_TINSRH:
18306 case ARM_BUILTIN_TINSRW:
18307 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18308 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18309 : CODE_FOR_iwmmxt_tinsrw);
18310 arg0 = CALL_EXPR_ARG (exp, 0);
18311 arg1 = CALL_EXPR_ARG (exp, 1);
18312 arg2 = CALL_EXPR_ARG (exp, 2);
18313 op0 = expand_normal (arg0);
18314 op1 = expand_normal (arg1);
18315 op2 = expand_normal (arg2);
18316 tmode = insn_data[icode].operand[0].mode;
18317 mode0 = insn_data[icode].operand[1].mode;
18318 mode1 = insn_data[icode].operand[2].mode;
18319 mode2 = insn_data[icode].operand[3].mode;
18321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18322 op0 = copy_to_mode_reg (mode0, op0);
18323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18324 op1 = copy_to_mode_reg (mode1, op1);
18325 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18327 /* @@@ better error message */
18328 error ("selector must be an immediate");
18332 || GET_MODE (target) != tmode
18333 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18334 target = gen_reg_rtx (tmode);
18335 pat = GEN_FCN (icode) (target, op0, op1, op2);
18341 case ARM_BUILTIN_SETWCX:
18342 arg0 = CALL_EXPR_ARG (exp, 0);
18343 arg1 = CALL_EXPR_ARG (exp, 1);
18344 op0 = force_reg (SImode, expand_normal (arg0));
18345 op1 = expand_normal (arg1);
18346 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18349 case ARM_BUILTIN_GETWCX:
18350 arg0 = CALL_EXPR_ARG (exp, 0);
18351 op0 = expand_normal (arg0);
18352 target = gen_reg_rtx (SImode);
18353 emit_insn (gen_iwmmxt_tmrc (target, op0));
18356 case ARM_BUILTIN_WSHUFH:
18357 icode = CODE_FOR_iwmmxt_wshufh;
18358 arg0 = CALL_EXPR_ARG (exp, 0);
18359 arg1 = CALL_EXPR_ARG (exp, 1);
18360 op0 = expand_normal (arg0);
18361 op1 = expand_normal (arg1);
18362 tmode = insn_data[icode].operand[0].mode;
18363 mode1 = insn_data[icode].operand[1].mode;
18364 mode2 = insn_data[icode].operand[2].mode;
18366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18367 op0 = copy_to_mode_reg (mode1, op0);
18368 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18370 /* @@@ better error message */
18371 error ("mask must be an immediate");
18375 || GET_MODE (target) != tmode
18376 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18377 target = gen_reg_rtx (tmode);
18378 pat = GEN_FCN (icode) (target, op0, op1);
18384 case ARM_BUILTIN_WSADB:
18385 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18386 case ARM_BUILTIN_WSADH:
18387 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18388 case ARM_BUILTIN_WSADBZ:
18389 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18390 case ARM_BUILTIN_WSADHZ:
18391 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18393 /* Several three-argument builtins. */
18394 case ARM_BUILTIN_WMACS:
18395 case ARM_BUILTIN_WMACU:
18396 case ARM_BUILTIN_WALIGN:
18397 case ARM_BUILTIN_TMIA:
18398 case ARM_BUILTIN_TMIAPH:
18399 case ARM_BUILTIN_TMIATT:
18400 case ARM_BUILTIN_TMIATB:
18401 case ARM_BUILTIN_TMIABT:
18402 case ARM_BUILTIN_TMIABB:
18403 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18404 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18405 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18406 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18407 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18408 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18409 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18410 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18411 : CODE_FOR_iwmmxt_walign);
18412 arg0 = CALL_EXPR_ARG (exp, 0);
18413 arg1 = CALL_EXPR_ARG (exp, 1);
18414 arg2 = CALL_EXPR_ARG (exp, 2);
18415 op0 = expand_normal (arg0);
18416 op1 = expand_normal (arg1);
18417 op2 = expand_normal (arg2);
18418 tmode = insn_data[icode].operand[0].mode;
18419 mode0 = insn_data[icode].operand[1].mode;
18420 mode1 = insn_data[icode].operand[2].mode;
18421 mode2 = insn_data[icode].operand[3].mode;
18423 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18424 op0 = copy_to_mode_reg (mode0, op0);
18425 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18426 op1 = copy_to_mode_reg (mode1, op1);
18427 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18428 op2 = copy_to_mode_reg (mode2, op2);
18430 || GET_MODE (target) != tmode
18431 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18432 target = gen_reg_rtx (tmode);
18433 pat = GEN_FCN (icode) (target, op0, op1, op2);
18439 case ARM_BUILTIN_WZERO:
18440 target = gen_reg_rtx (DImode);
18441 emit_insn (gen_iwmmxt_clrdi (target));
18444 case ARM_BUILTIN_THREAD_POINTER:
18445 return arm_load_tp (target);
18451 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18452 if (d->code == (const enum arm_builtins) fcode)
18453 return arm_expand_binop_builtin (d->icode, exp, target);
18455 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18456 if (d->code == (const enum arm_builtins) fcode)
18457 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18459 /* @@@ Should really do something sensible here. */
18463 /* Return the number (counting from 0) of
18464 the least significant set bit in MASK. */
18467 number_of_first_bit_set (unsigned mask)
18472 (mask & (1 << bit)) == 0;
18479 /* Emit code to push or pop registers to or from the stack. F is the
18480 assembly file. MASK is the registers to push or pop. PUSH is
18481 nonzero if we should push, and zero if we should pop. For debugging
18482 output, if pushing, adjust CFA_OFFSET by the amount of space added
18483 to the stack. REAL_REGS should have the same number of bits set as
18484 MASK, and will be used instead (in the same order) to describe which
18485 registers were saved - this is used to mark the save slots when we
18486 push high registers after moving them to low registers. */
18488 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18489 unsigned long real_regs)
18492 int lo_mask = mask & 0xFF;
18493 int pushed_words = 0;
18497 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18499 /* Special case. Do not generate a POP PC statement here, do it in
18501 thumb_exit (f, -1);
18505 if (ARM_EABI_UNWIND_TABLES && push)
18507 fprintf (f, "\t.save\t{");
18508 for (regno = 0; regno < 15; regno++)
18510 if (real_regs & (1 << regno))
18512 if (real_regs & ((1 << regno) -1))
18514 asm_fprintf (f, "%r", regno);
18517 fprintf (f, "}\n");
18520 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18522 /* Look at the low registers first. */
18523 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18527 asm_fprintf (f, "%r", regno);
18529 if ((lo_mask & ~1) != 0)
18536 if (push && (mask & (1 << LR_REGNUM)))
18538 /* Catch pushing the LR. */
18542 asm_fprintf (f, "%r", LR_REGNUM);
18546 else if (!push && (mask & (1 << PC_REGNUM)))
18548 /* Catch popping the PC. */
18549 if (TARGET_INTERWORK || TARGET_BACKTRACE
18550 || crtl->calls_eh_return)
18552 /* The PC is never poped directly, instead
18553 it is popped into r3 and then BX is used. */
18554 fprintf (f, "}\n");
18556 thumb_exit (f, -1);
18565 asm_fprintf (f, "%r", PC_REGNUM);
18569 fprintf (f, "}\n");
18571 if (push && pushed_words && dwarf2out_do_frame ())
18573 char *l = dwarf2out_cfi_label (false);
18574 int pushed_mask = real_regs;
18576 *cfa_offset += pushed_words * 4;
18577 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18580 pushed_mask = real_regs;
18581 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18583 if (pushed_mask & 1)
18584 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18589 /* Generate code to return from a thumb function.
18590 If 'reg_containing_return_addr' is -1, then the return address is
18591 actually on the stack, at the stack pointer. */
18593 thumb_exit (FILE *f, int reg_containing_return_addr)
18595 unsigned regs_available_for_popping;
18596 unsigned regs_to_pop;
18598 unsigned available;
18602 int restore_a4 = FALSE;
18604 /* Compute the registers we need to pop. */
18608 if (reg_containing_return_addr == -1)
18610 regs_to_pop |= 1 << LR_REGNUM;
18614 if (TARGET_BACKTRACE)
18616 /* Restore the (ARM) frame pointer and stack pointer. */
18617 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18621 /* If there is nothing to pop then just emit the BX instruction and
18623 if (pops_needed == 0)
18625 if (crtl->calls_eh_return)
18626 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18628 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18631 /* Otherwise if we are not supporting interworking and we have not created
18632 a backtrace structure and the function was not entered in ARM mode then
18633 just pop the return address straight into the PC. */
18634 else if (!TARGET_INTERWORK
18635 && !TARGET_BACKTRACE
18636 && !is_called_in_ARM_mode (current_function_decl)
18637 && !crtl->calls_eh_return)
18639 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18643 /* Find out how many of the (return) argument registers we can corrupt. */
18644 regs_available_for_popping = 0;
18646 /* If returning via __builtin_eh_return, the bottom three registers
18647 all contain information needed for the return. */
18648 if (crtl->calls_eh_return)
18652 /* If we can deduce the registers used from the function's
18653 return value. This is more reliable that examining
18654 df_regs_ever_live_p () because that will be set if the register is
18655 ever used in the function, not just if the register is used
18656 to hold a return value. */
18658 if (crtl->return_rtx != 0)
18659 mode = GET_MODE (crtl->return_rtx);
18661 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18663 size = GET_MODE_SIZE (mode);
18667 /* In a void function we can use any argument register.
18668 In a function that returns a structure on the stack
18669 we can use the second and third argument registers. */
18670 if (mode == VOIDmode)
18671 regs_available_for_popping =
18672 (1 << ARG_REGISTER (1))
18673 | (1 << ARG_REGISTER (2))
18674 | (1 << ARG_REGISTER (3));
18676 regs_available_for_popping =
18677 (1 << ARG_REGISTER (2))
18678 | (1 << ARG_REGISTER (3));
18680 else if (size <= 4)
18681 regs_available_for_popping =
18682 (1 << ARG_REGISTER (2))
18683 | (1 << ARG_REGISTER (3));
18684 else if (size <= 8)
18685 regs_available_for_popping =
18686 (1 << ARG_REGISTER (3));
18689 /* Match registers to be popped with registers into which we pop them. */
18690 for (available = regs_available_for_popping,
18691 required = regs_to_pop;
18692 required != 0 && available != 0;
18693 available &= ~(available & - available),
18694 required &= ~(required & - required))
18697 /* If we have any popping registers left over, remove them. */
18699 regs_available_for_popping &= ~available;
18701 /* Otherwise if we need another popping register we can use
18702 the fourth argument register. */
18703 else if (pops_needed)
18705 /* If we have not found any free argument registers and
18706 reg a4 contains the return address, we must move it. */
18707 if (regs_available_for_popping == 0
18708 && reg_containing_return_addr == LAST_ARG_REGNUM)
18710 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18711 reg_containing_return_addr = LR_REGNUM;
18713 else if (size > 12)
18715 /* Register a4 is being used to hold part of the return value,
18716 but we have dire need of a free, low register. */
18719 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18722 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18724 /* The fourth argument register is available. */
18725 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18731 /* Pop as many registers as we can. */
18732 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18733 regs_available_for_popping);
18735 /* Process the registers we popped. */
18736 if (reg_containing_return_addr == -1)
18738 /* The return address was popped into the lowest numbered register. */
18739 regs_to_pop &= ~(1 << LR_REGNUM);
18741 reg_containing_return_addr =
18742 number_of_first_bit_set (regs_available_for_popping);
18744 /* Remove this register for the mask of available registers, so that
18745 the return address will not be corrupted by further pops. */
18746 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18749 /* If we popped other registers then handle them here. */
18750 if (regs_available_for_popping)
18754 /* Work out which register currently contains the frame pointer. */
18755 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18757 /* Move it into the correct place. */
18758 asm_fprintf (f, "\tmov\t%r, %r\n",
18759 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18761 /* (Temporarily) remove it from the mask of popped registers. */
18762 regs_available_for_popping &= ~(1 << frame_pointer);
18763 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18765 if (regs_available_for_popping)
18769 /* We popped the stack pointer as well,
18770 find the register that contains it. */
18771 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18773 /* Move it into the stack register. */
18774 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18776 /* At this point we have popped all necessary registers, so
18777 do not worry about restoring regs_available_for_popping
18778 to its correct value:
18780 assert (pops_needed == 0)
18781 assert (regs_available_for_popping == (1 << frame_pointer))
18782 assert (regs_to_pop == (1 << STACK_POINTER)) */
18786 /* Since we have just move the popped value into the frame
18787 pointer, the popping register is available for reuse, and
18788 we know that we still have the stack pointer left to pop. */
18789 regs_available_for_popping |= (1 << frame_pointer);
18793 /* If we still have registers left on the stack, but we no longer have
18794 any registers into which we can pop them, then we must move the return
18795 address into the link register and make available the register that
18797 if (regs_available_for_popping == 0 && pops_needed > 0)
18799 regs_available_for_popping |= 1 << reg_containing_return_addr;
18801 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18802 reg_containing_return_addr);
18804 reg_containing_return_addr = LR_REGNUM;
18807 /* If we have registers left on the stack then pop some more.
18808 We know that at most we will want to pop FP and SP. */
18809 if (pops_needed > 0)
18814 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18815 regs_available_for_popping);
18817 /* We have popped either FP or SP.
18818 Move whichever one it is into the correct register. */
18819 popped_into = number_of_first_bit_set (regs_available_for_popping);
18820 move_to = number_of_first_bit_set (regs_to_pop);
18822 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18824 regs_to_pop &= ~(1 << move_to);
18829 /* If we still have not popped everything then we must have only
18830 had one register available to us and we are now popping the SP. */
18831 if (pops_needed > 0)
18835 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18836 regs_available_for_popping);
18838 popped_into = number_of_first_bit_set (regs_available_for_popping);
18840 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18842 assert (regs_to_pop == (1 << STACK_POINTER))
18843 assert (pops_needed == 1)
18847 /* If necessary restore the a4 register. */
18850 if (reg_containing_return_addr != LR_REGNUM)
18852 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18853 reg_containing_return_addr = LR_REGNUM;
18856 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18859 if (crtl->calls_eh_return)
18860 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18862 /* Return to caller. */
18863 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18868 thumb1_final_prescan_insn (rtx insn)
18870 if (flag_print_asm_name)
18871 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18872 INSN_ADDRESSES (INSN_UID (insn)));
18876 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18878 unsigned HOST_WIDE_INT mask = 0xff;
18881 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18882 if (val == 0) /* XXX */
18885 for (i = 0; i < 25; i++)
18886 if ((val & (mask << i)) == val)
18892 /* Returns nonzero if the current function contains,
18893 or might contain a far jump. */
18895 thumb_far_jump_used_p (void)
18899 /* This test is only important for leaf functions. */
18900 /* assert (!leaf_function_p ()); */
18902 /* If we have already decided that far jumps may be used,
18903 do not bother checking again, and always return true even if
18904 it turns out that they are not being used. Once we have made
18905 the decision that far jumps are present (and that hence the link
18906 register will be pushed onto the stack) we cannot go back on it. */
18907 if (cfun->machine->far_jump_used)
18910 /* If this function is not being called from the prologue/epilogue
18911 generation code then it must be being called from the
18912 INITIAL_ELIMINATION_OFFSET macro. */
18913 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18915 /* In this case we know that we are being asked about the elimination
18916 of the arg pointer register. If that register is not being used,
18917 then there are no arguments on the stack, and we do not have to
18918 worry that a far jump might force the prologue to push the link
18919 register, changing the stack offsets. In this case we can just
18920 return false, since the presence of far jumps in the function will
18921 not affect stack offsets.
18923 If the arg pointer is live (or if it was live, but has now been
18924 eliminated and so set to dead) then we do have to test to see if
18925 the function might contain a far jump. This test can lead to some
18926 false negatives, since before reload is completed, then length of
18927 branch instructions is not known, so gcc defaults to returning their
18928 longest length, which in turn sets the far jump attribute to true.
18930 A false negative will not result in bad code being generated, but it
18931 will result in a needless push and pop of the link register. We
18932 hope that this does not occur too often.
18934 If we need doubleword stack alignment this could affect the other
18935 elimination offsets so we can't risk getting it wrong. */
18936 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18937 cfun->machine->arg_pointer_live = 1;
18938 else if (!cfun->machine->arg_pointer_live)
18942 /* Check to see if the function contains a branch
18943 insn with the far jump attribute set. */
18944 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18946 if (GET_CODE (insn) == JUMP_INSN
18947 /* Ignore tablejump patterns. */
18948 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18949 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18950 && get_attr_far_jump (insn) == FAR_JUMP_YES
18953 /* Record the fact that we have decided that
18954 the function does use far jumps. */
18955 cfun->machine->far_jump_used = 1;
18963 /* Return nonzero if FUNC must be entered in ARM mode. */
18965 is_called_in_ARM_mode (tree func)
18967 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18969 /* Ignore the problem about functions whose address is taken. */
18970 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18974 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18980 /* The bits which aren't usefully expanded as rtl. */
18982 thumb_unexpanded_epilogue (void)
18984 arm_stack_offsets *offsets;
18986 unsigned long live_regs_mask = 0;
18987 int high_regs_pushed = 0;
18988 int had_to_push_lr;
18991 if (cfun->machine->return_used_this_function != 0)
18994 if (IS_NAKED (arm_current_func_type ()))
18997 offsets = arm_get_frame_offsets ();
18998 live_regs_mask = offsets->saved_regs_mask;
18999 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19001 /* If we can deduce the registers used from the function's return value.
19002 This is more reliable that examining df_regs_ever_live_p () because that
19003 will be set if the register is ever used in the function, not just if
19004 the register is used to hold a return value. */
19005 size = arm_size_return_regs ();
19007 /* The prolog may have pushed some high registers to use as
19008 work registers. e.g. the testsuite file:
19009 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19010 compiles to produce:
19011 push {r4, r5, r6, r7, lr}
19015 as part of the prolog. We have to undo that pushing here. */
19017 if (high_regs_pushed)
19019 unsigned long mask = live_regs_mask & 0xff;
19022 /* The available low registers depend on the size of the value we are
19030 /* Oh dear! We have no low registers into which we can pop
19033 ("no low registers available for popping high registers");
19035 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19036 if (live_regs_mask & (1 << next_hi_reg))
19039 while (high_regs_pushed)
19041 /* Find lo register(s) into which the high register(s) can
19043 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19045 if (mask & (1 << regno))
19046 high_regs_pushed--;
19047 if (high_regs_pushed == 0)
19051 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19053 /* Pop the values into the low register(s). */
19054 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19056 /* Move the value(s) into the high registers. */
19057 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19059 if (mask & (1 << regno))
19061 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19064 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19065 if (live_regs_mask & (1 << next_hi_reg))
19070 live_regs_mask &= ~0x0f00;
19073 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19074 live_regs_mask &= 0xff;
19076 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19078 /* Pop the return address into the PC. */
19079 if (had_to_push_lr)
19080 live_regs_mask |= 1 << PC_REGNUM;
19082 /* Either no argument registers were pushed or a backtrace
19083 structure was created which includes an adjusted stack
19084 pointer, so just pop everything. */
19085 if (live_regs_mask)
19086 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19089 /* We have either just popped the return address into the
19090 PC or it is was kept in LR for the entire function. */
19091 if (!had_to_push_lr)
19092 thumb_exit (asm_out_file, LR_REGNUM);
19096 /* Pop everything but the return address. */
19097 if (live_regs_mask)
19098 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19101 if (had_to_push_lr)
19105 /* We have no free low regs, so save one. */
19106 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19110 /* Get the return address into a temporary register. */
19111 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19112 1 << LAST_ARG_REGNUM);
19116 /* Move the return address to lr. */
19117 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19119 /* Restore the low register. */
19120 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19125 regno = LAST_ARG_REGNUM;
19130 /* Remove the argument registers that were pushed onto the stack. */
19131 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19132 SP_REGNUM, SP_REGNUM,
19133 crtl->args.pretend_args_size);
19135 thumb_exit (asm_out_file, regno);
19141 /* Functions to save and restore machine-specific function data. */
19142 static struct machine_function *
19143 arm_init_machine_status (void)
19145 struct machine_function *machine;
19146 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19148 #if ARM_FT_UNKNOWN != 0
19149 machine->func_type = ARM_FT_UNKNOWN;
19154 /* Return an RTX indicating where the return address to the
19155 calling function can be found. */
19157 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19162 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19165 /* Do anything needed before RTL is emitted for each function. */
19167 arm_init_expanders (void)
19169 /* Arrange to initialize and mark the machine per-function status. */
19170 init_machine_status = arm_init_machine_status;
19172 /* This is to stop the combine pass optimizing away the alignment
19173 adjustment of va_arg. */
19174 /* ??? It is claimed that this should not be necessary. */
19176 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19180 /* Like arm_compute_initial_elimination offset. Simpler because there
19181 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19182 to point at the base of the local variables after static stack
19183 space for a function has been allocated. */
19186 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19188 arm_stack_offsets *offsets;
19190 offsets = arm_get_frame_offsets ();
19194 case ARG_POINTER_REGNUM:
19197 case STACK_POINTER_REGNUM:
19198 return offsets->outgoing_args - offsets->saved_args;
19200 case FRAME_POINTER_REGNUM:
19201 return offsets->soft_frame - offsets->saved_args;
19203 case ARM_HARD_FRAME_POINTER_REGNUM:
19204 return offsets->saved_regs - offsets->saved_args;
19206 case THUMB_HARD_FRAME_POINTER_REGNUM:
19207 return offsets->locals_base - offsets->saved_args;
19210 gcc_unreachable ();
19214 case FRAME_POINTER_REGNUM:
19217 case STACK_POINTER_REGNUM:
19218 return offsets->outgoing_args - offsets->soft_frame;
19220 case ARM_HARD_FRAME_POINTER_REGNUM:
19221 return offsets->saved_regs - offsets->soft_frame;
19223 case THUMB_HARD_FRAME_POINTER_REGNUM:
19224 return offsets->locals_base - offsets->soft_frame;
19227 gcc_unreachable ();
19232 gcc_unreachable ();
19236 /* Generate the rest of a function's prologue. */
19238 thumb1_expand_prologue (void)
19242 HOST_WIDE_INT amount;
19243 arm_stack_offsets *offsets;
19244 unsigned long func_type;
19246 unsigned long live_regs_mask;
19248 func_type = arm_current_func_type ();
19250 /* Naked functions don't have prologues. */
19251 if (IS_NAKED (func_type))
19254 if (IS_INTERRUPT (func_type))
19256 error ("interrupt Service Routines cannot be coded in Thumb mode");
19260 offsets = arm_get_frame_offsets ();
19261 live_regs_mask = offsets->saved_regs_mask;
19262 /* Load the pic register before setting the frame pointer,
19263 so we can use r7 as a temporary work register. */
19264 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19265 arm_load_pic_register (live_regs_mask);
19267 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19268 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19269 stack_pointer_rtx);
19271 amount = offsets->outgoing_args - offsets->saved_regs;
19276 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19277 GEN_INT (- amount)));
19278 RTX_FRAME_RELATED_P (insn) = 1;
19284 /* The stack decrement is too big for an immediate value in a single
19285 insn. In theory we could issue multiple subtracts, but after
19286 three of them it becomes more space efficient to place the full
19287 value in the constant pool and load into a register. (Also the
19288 ARM debugger really likes to see only one stack decrement per
19289 function). So instead we look for a scratch register into which
19290 we can load the decrement, and then we subtract this from the
19291 stack pointer. Unfortunately on the thumb the only available
19292 scratch registers are the argument registers, and we cannot use
19293 these as they may hold arguments to the function. Instead we
19294 attempt to locate a call preserved register which is used by this
19295 function. If we can find one, then we know that it will have
19296 been pushed at the start of the prologue and so we can corrupt
19298 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19299 if (live_regs_mask & (1 << regno))
19302 gcc_assert(regno <= LAST_LO_REGNUM);
19304 reg = gen_rtx_REG (SImode, regno);
19306 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19308 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19309 stack_pointer_rtx, reg));
19310 RTX_FRAME_RELATED_P (insn) = 1;
19311 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19312 plus_constant (stack_pointer_rtx,
19314 RTX_FRAME_RELATED_P (dwarf) = 1;
19315 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19319 if (frame_pointer_needed)
19320 thumb_set_frame_pointer (offsets);
19322 /* If we are profiling, make sure no instructions are scheduled before
19323 the call to mcount. Similarly if the user has requested no
19324 scheduling in the prolog. Similarly if we want non-call exceptions
19325 using the EABI unwinder, to prevent faulting instructions from being
19326 swapped with a stack adjustment. */
19327 if (crtl->profile || !TARGET_SCHED_PROLOG
19328 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19329 emit_insn (gen_blockage ());
19331 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19332 if (live_regs_mask & 0xff)
19333 cfun->machine->lr_save_eliminated = 0;
19338 thumb1_expand_epilogue (void)
19340 HOST_WIDE_INT amount;
19341 arm_stack_offsets *offsets;
19344 /* Naked functions don't have prologues. */
19345 if (IS_NAKED (arm_current_func_type ()))
19348 offsets = arm_get_frame_offsets ();
19349 amount = offsets->outgoing_args - offsets->saved_regs;
19351 if (frame_pointer_needed)
19353 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19354 amount = offsets->locals_base - offsets->saved_regs;
19357 gcc_assert (amount >= 0);
19361 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19362 GEN_INT (amount)));
19365 /* r3 is always free in the epilogue. */
19366 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19368 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19369 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19373 /* Emit a USE (stack_pointer_rtx), so that
19374 the stack adjustment will not be deleted. */
19375 emit_insn (gen_prologue_use (stack_pointer_rtx));
19377 if (crtl->profile || !TARGET_SCHED_PROLOG)
19378 emit_insn (gen_blockage ());
19380 /* Emit a clobber for each insn that will be restored in the epilogue,
19381 so that flow2 will get register lifetimes correct. */
19382 for (regno = 0; regno < 13; regno++)
19383 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19384 emit_clobber (gen_rtx_REG (SImode, regno));
19386 if (! df_regs_ever_live_p (LR_REGNUM))
19387 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19391 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19393 arm_stack_offsets *offsets;
19394 unsigned long live_regs_mask = 0;
19395 unsigned long l_mask;
19396 unsigned high_regs_pushed = 0;
19397 int cfa_offset = 0;
19400 if (IS_NAKED (arm_current_func_type ()))
19403 if (is_called_in_ARM_mode (current_function_decl))
19407 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19408 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19410 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19412 /* Generate code sequence to switch us into Thumb mode. */
19413 /* The .code 32 directive has already been emitted by
19414 ASM_DECLARE_FUNCTION_NAME. */
19415 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19416 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19418 /* Generate a label, so that the debugger will notice the
19419 change in instruction sets. This label is also used by
19420 the assembler to bypass the ARM code when this function
19421 is called from a Thumb encoded function elsewhere in the
19422 same file. Hence the definition of STUB_NAME here must
19423 agree with the definition in gas/config/tc-arm.c. */
19425 #define STUB_NAME ".real_start_of"
19427 fprintf (f, "\t.code\t16\n");
19429 if (arm_dllexport_name_p (name))
19430 name = arm_strip_name_encoding (name);
19432 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19433 fprintf (f, "\t.thumb_func\n");
19434 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19437 if (crtl->args.pretend_args_size)
19439 /* Output unwind directive for the stack adjustment. */
19440 if (ARM_EABI_UNWIND_TABLES)
19441 fprintf (f, "\t.pad #%d\n",
19442 crtl->args.pretend_args_size);
19444 if (cfun->machine->uses_anonymous_args)
19448 fprintf (f, "\tpush\t{");
19450 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19452 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19453 regno <= LAST_ARG_REGNUM;
19455 asm_fprintf (f, "%r%s", regno,
19456 regno == LAST_ARG_REGNUM ? "" : ", ");
19458 fprintf (f, "}\n");
19461 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19462 SP_REGNUM, SP_REGNUM,
19463 crtl->args.pretend_args_size);
19465 /* We don't need to record the stores for unwinding (would it
19466 help the debugger any if we did?), but record the change in
19467 the stack pointer. */
19468 if (dwarf2out_do_frame ())
19470 char *l = dwarf2out_cfi_label (false);
19472 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19473 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19477 /* Get the registers we are going to push. */
19478 offsets = arm_get_frame_offsets ();
19479 live_regs_mask = offsets->saved_regs_mask;
19480 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19481 l_mask = live_regs_mask & 0x40ff;
19482 /* Then count how many other high registers will need to be pushed. */
19483 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19485 if (TARGET_BACKTRACE)
19488 unsigned work_register;
19490 /* We have been asked to create a stack backtrace structure.
19491 The code looks like this:
19495 0 sub SP, #16 Reserve space for 4 registers.
19496 2 push {R7} Push low registers.
19497 4 add R7, SP, #20 Get the stack pointer before the push.
19498 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19499 8 mov R7, PC Get hold of the start of this code plus 12.
19500 10 str R7, [SP, #16] Store it.
19501 12 mov R7, FP Get hold of the current frame pointer.
19502 14 str R7, [SP, #4] Store it.
19503 16 mov R7, LR Get hold of the current return address.
19504 18 str R7, [SP, #12] Store it.
19505 20 add R7, SP, #16 Point at the start of the backtrace structure.
19506 22 mov FP, R7 Put this value into the frame pointer. */
19508 work_register = thumb_find_work_register (live_regs_mask);
19510 if (ARM_EABI_UNWIND_TABLES)
19511 asm_fprintf (f, "\t.pad #16\n");
19514 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19515 SP_REGNUM, SP_REGNUM);
19517 if (dwarf2out_do_frame ())
19519 char *l = dwarf2out_cfi_label (false);
19521 cfa_offset = cfa_offset + 16;
19522 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19527 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19528 offset = bit_count (l_mask) * UNITS_PER_WORD;
19533 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19534 offset + 16 + crtl->args.pretend_args_size);
19536 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19539 /* Make sure that the instruction fetching the PC is in the right place
19540 to calculate "start of backtrace creation code + 12". */
19543 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19544 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19546 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19547 ARM_HARD_FRAME_POINTER_REGNUM);
19548 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19553 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19554 ARM_HARD_FRAME_POINTER_REGNUM);
19555 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19557 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19558 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19562 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19563 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19565 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19567 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19568 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19570 /* Optimization: If we are not pushing any low registers but we are going
19571 to push some high registers then delay our first push. This will just
19572 be a push of LR and we can combine it with the push of the first high
19574 else if ((l_mask & 0xff) != 0
19575 || (high_regs_pushed == 0 && l_mask))
19576 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19578 if (high_regs_pushed)
19580 unsigned pushable_regs;
19581 unsigned next_hi_reg;
19583 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19584 if (live_regs_mask & (1 << next_hi_reg))
19587 pushable_regs = l_mask & 0xff;
19589 if (pushable_regs == 0)
19590 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19592 while (high_regs_pushed > 0)
19594 unsigned long real_regs_mask = 0;
19596 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19598 if (pushable_regs & (1 << regno))
19600 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19602 high_regs_pushed --;
19603 real_regs_mask |= (1 << next_hi_reg);
19605 if (high_regs_pushed)
19607 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19609 if (live_regs_mask & (1 << next_hi_reg))
19614 pushable_regs &= ~((1 << regno) - 1);
19620 /* If we had to find a work register and we have not yet
19621 saved the LR then add it to the list of regs to push. */
19622 if (l_mask == (1 << LR_REGNUM))
19624 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19626 real_regs_mask | (1 << LR_REGNUM));
19630 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19635 /* Handle the case of a double word load into a low register from
19636 a computed memory address. The computed address may involve a
19637 register which is overwritten by the load. */
19639 thumb_load_double_from_address (rtx *operands)
19647 gcc_assert (GET_CODE (operands[0]) == REG);
19648 gcc_assert (GET_CODE (operands[1]) == MEM);
19650 /* Get the memory address. */
19651 addr = XEXP (operands[1], 0);
19653 /* Work out how the memory address is computed. */
19654 switch (GET_CODE (addr))
19657 operands[2] = adjust_address (operands[1], SImode, 4);
19659 if (REGNO (operands[0]) == REGNO (addr))
19661 output_asm_insn ("ldr\t%H0, %2", operands);
19662 output_asm_insn ("ldr\t%0, %1", operands);
19666 output_asm_insn ("ldr\t%0, %1", operands);
19667 output_asm_insn ("ldr\t%H0, %2", operands);
19672 /* Compute <address> + 4 for the high order load. */
19673 operands[2] = adjust_address (operands[1], SImode, 4);
19675 output_asm_insn ("ldr\t%0, %1", operands);
19676 output_asm_insn ("ldr\t%H0, %2", operands);
19680 arg1 = XEXP (addr, 0);
19681 arg2 = XEXP (addr, 1);
19683 if (CONSTANT_P (arg1))
19684 base = arg2, offset = arg1;
19686 base = arg1, offset = arg2;
19688 gcc_assert (GET_CODE (base) == REG);
19690 /* Catch the case of <address> = <reg> + <reg> */
19691 if (GET_CODE (offset) == REG)
19693 int reg_offset = REGNO (offset);
19694 int reg_base = REGNO (base);
19695 int reg_dest = REGNO (operands[0]);
19697 /* Add the base and offset registers together into the
19698 higher destination register. */
19699 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19700 reg_dest + 1, reg_base, reg_offset);
19702 /* Load the lower destination register from the address in
19703 the higher destination register. */
19704 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19705 reg_dest, reg_dest + 1);
19707 /* Load the higher destination register from its own address
19709 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19710 reg_dest + 1, reg_dest + 1);
19714 /* Compute <address> + 4 for the high order load. */
19715 operands[2] = adjust_address (operands[1], SImode, 4);
19717 /* If the computed address is held in the low order register
19718 then load the high order register first, otherwise always
19719 load the low order register first. */
19720 if (REGNO (operands[0]) == REGNO (base))
19722 output_asm_insn ("ldr\t%H0, %2", operands);
19723 output_asm_insn ("ldr\t%0, %1", operands);
19727 output_asm_insn ("ldr\t%0, %1", operands);
19728 output_asm_insn ("ldr\t%H0, %2", operands);
19734 /* With no registers to worry about we can just load the value
19736 operands[2] = adjust_address (operands[1], SImode, 4);
19738 output_asm_insn ("ldr\t%H0, %2", operands);
19739 output_asm_insn ("ldr\t%0, %1", operands);
19743 gcc_unreachable ();
19750 thumb_output_move_mem_multiple (int n, rtx *operands)
19757 if (REGNO (operands[4]) > REGNO (operands[5]))
19760 operands[4] = operands[5];
19763 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19764 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19768 if (REGNO (operands[4]) > REGNO (operands[5]))
19771 operands[4] = operands[5];
19774 if (REGNO (operands[5]) > REGNO (operands[6]))
19777 operands[5] = operands[6];
19780 if (REGNO (operands[4]) > REGNO (operands[5]))
19783 operands[4] = operands[5];
19787 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19788 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19792 gcc_unreachable ();
19798 /* Output a call-via instruction for thumb state. */
19800 thumb_call_via_reg (rtx reg)
19802 int regno = REGNO (reg);
19805 gcc_assert (regno < LR_REGNUM);
19807 /* If we are in the normal text section we can use a single instance
19808 per compilation unit. If we are doing function sections, then we need
19809 an entry per section, since we can't rely on reachability. */
19810 if (in_section == text_section)
19812 thumb_call_reg_needed = 1;
19814 if (thumb_call_via_label[regno] == NULL)
19815 thumb_call_via_label[regno] = gen_label_rtx ();
19816 labelp = thumb_call_via_label + regno;
19820 if (cfun->machine->call_via[regno] == NULL)
19821 cfun->machine->call_via[regno] = gen_label_rtx ();
19822 labelp = cfun->machine->call_via + regno;
19825 output_asm_insn ("bl\t%a0", labelp);
19829 /* Routines for generating rtl. */
19831 thumb_expand_movmemqi (rtx *operands)
19833 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19834 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19835 HOST_WIDE_INT len = INTVAL (operands[2]);
19836 HOST_WIDE_INT offset = 0;
19840 emit_insn (gen_movmem12b (out, in, out, in));
19846 emit_insn (gen_movmem8b (out, in, out, in));
19852 rtx reg = gen_reg_rtx (SImode);
19853 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19854 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19861 rtx reg = gen_reg_rtx (HImode);
19862 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19863 plus_constant (in, offset))));
19864 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19872 rtx reg = gen_reg_rtx (QImode);
19873 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19874 plus_constant (in, offset))));
19875 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19881 thumb_reload_out_hi (rtx *operands)
19883 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19886 /* Handle reading a half-word from memory during reload. */
19888 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19890 gcc_unreachable ();
19893 /* Return the length of a function name prefix
19894 that starts with the character 'c'. */
19896 arm_get_strip_length (int c)
19900 ARM_NAME_ENCODING_LENGTHS
19905 /* Return a pointer to a function's name with any
19906 and all prefix encodings stripped from it. */
19908 arm_strip_name_encoding (const char *name)
19912 while ((skip = arm_get_strip_length (* name)))
19918 /* If there is a '*' anywhere in the name's prefix, then
19919 emit the stripped name verbatim, otherwise prepend an
19920 underscore if leading underscores are being used. */
19922 arm_asm_output_labelref (FILE *stream, const char *name)
19927 while ((skip = arm_get_strip_length (* name)))
19929 verbatim |= (*name == '*');
19934 fputs (name, stream);
19936 asm_fprintf (stream, "%U%s", name);
19940 arm_file_start (void)
19944 if (TARGET_UNIFIED_ASM)
19945 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19949 const char *fpu_name;
19950 if (arm_select[0].string)
19951 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19952 else if (arm_select[1].string)
19953 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19955 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19956 all_cores[arm_default_cpu].name);
19958 if (TARGET_SOFT_FLOAT)
19961 fpu_name = "softvfp";
19963 fpu_name = "softfpa";
19967 fpu_name = arm_fpu_desc->name;
19968 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
19970 if (TARGET_HARD_FLOAT)
19971 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19972 if (TARGET_HARD_FLOAT_ABI)
19973 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19976 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19978 /* Some of these attributes only apply when the corresponding features
19979 are used. However we don't have any easy way of figuring this out.
19980 Conservatively record the setting that would have been used. */
19982 /* Tag_ABI_FP_rounding. */
19983 if (flag_rounding_math)
19984 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19985 if (!flag_unsafe_math_optimizations)
19987 /* Tag_ABI_FP_denomal. */
19988 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19989 /* Tag_ABI_FP_exceptions. */
19990 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19992 /* Tag_ABI_FP_user_exceptions. */
19993 if (flag_signaling_nans)
19994 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19995 /* Tag_ABI_FP_number_model. */
19996 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19997 flag_finite_math_only ? 1 : 3);
19999 /* Tag_ABI_align8_needed. */
20000 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20001 /* Tag_ABI_align8_preserved. */
20002 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20003 /* Tag_ABI_enum_size. */
20004 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20005 flag_short_enums ? 1 : 2);
20007 /* Tag_ABI_optimization_goals. */
20010 else if (optimize >= 2)
20016 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20018 /* Tag_ABI_FP_16bit_format. */
20019 if (arm_fp16_format)
20020 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20021 (int)arm_fp16_format);
20023 if (arm_lang_output_object_attributes_hook)
20024 arm_lang_output_object_attributes_hook();
20026 default_file_start();
20030 arm_file_end (void)
20034 if (NEED_INDICATE_EXEC_STACK)
20035 /* Add .note.GNU-stack. */
20036 file_end_indicate_exec_stack ();
20038 if (! thumb_call_reg_needed)
20041 switch_to_section (text_section);
20042 asm_fprintf (asm_out_file, "\t.code 16\n");
20043 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20045 for (regno = 0; regno < LR_REGNUM; regno++)
20047 rtx label = thumb_call_via_label[regno];
20051 targetm.asm_out.internal_label (asm_out_file, "L",
20052 CODE_LABEL_NUMBER (label));
20053 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20059 /* Symbols in the text segment can be accessed without indirecting via the
20060 constant pool; it may take an extra binary operation, but this is still
20061 faster than indirecting via memory. Don't do this when not optimizing,
20062 since we won't be calculating al of the offsets necessary to do this
20066 arm_encode_section_info (tree decl, rtx rtl, int first)
20068 if (optimize > 0 && TREE_CONSTANT (decl))
20069 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20071 default_encode_section_info (decl, rtl, first);
20073 #endif /* !ARM_PE */
20076 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20078 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20079 && !strcmp (prefix, "L"))
20081 arm_ccfsm_state = 0;
20082 arm_target_insn = NULL;
20084 default_internal_label (stream, prefix, labelno);
20087 /* Output code to add DELTA to the first argument, and then jump
20088 to FUNCTION. Used for C++ multiple inheritance. */
20090 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20091 HOST_WIDE_INT delta,
20092 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20095 static int thunk_label = 0;
20098 int mi_delta = delta;
20099 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20101 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20104 mi_delta = - mi_delta;
20108 int labelno = thunk_label++;
20109 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20110 /* Thunks are entered in arm mode when avaiable. */
20111 if (TARGET_THUMB1_ONLY)
20113 /* push r3 so we can use it as a temporary. */
20114 /* TODO: Omit this save if r3 is not used. */
20115 fputs ("\tpush {r3}\n", file);
20116 fputs ("\tldr\tr3, ", file);
20120 fputs ("\tldr\tr12, ", file);
20122 assemble_name (file, label);
20123 fputc ('\n', file);
20126 /* If we are generating PIC, the ldr instruction below loads
20127 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20128 the address of the add + 8, so we have:
20130 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20133 Note that we have "+ 1" because some versions of GNU ld
20134 don't set the low bit of the result for R_ARM_REL32
20135 relocations against thumb function symbols.
20136 On ARMv6M this is +4, not +8. */
20137 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20138 assemble_name (file, labelpc);
20139 fputs (":\n", file);
20140 if (TARGET_THUMB1_ONLY)
20142 /* This is 2 insns after the start of the thunk, so we know it
20143 is 4-byte aligned. */
20144 fputs ("\tadd\tr3, pc, r3\n", file);
20145 fputs ("\tmov r12, r3\n", file);
20148 fputs ("\tadd\tr12, pc, r12\n", file);
20150 else if (TARGET_THUMB1_ONLY)
20151 fputs ("\tmov r12, r3\n", file);
20153 if (TARGET_THUMB1_ONLY)
20155 if (mi_delta > 255)
20157 fputs ("\tldr\tr3, ", file);
20158 assemble_name (file, label);
20159 fputs ("+4\n", file);
20160 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20161 mi_op, this_regno, this_regno);
20163 else if (mi_delta != 0)
20165 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20166 mi_op, this_regno, this_regno,
20172 /* TODO: Use movw/movt for large constants when available. */
20173 while (mi_delta != 0)
20175 if ((mi_delta & (3 << shift)) == 0)
20179 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20180 mi_op, this_regno, this_regno,
20181 mi_delta & (0xff << shift));
20182 mi_delta &= ~(0xff << shift);
20189 if (TARGET_THUMB1_ONLY)
20190 fputs ("\tpop\t{r3}\n", file);
20192 fprintf (file, "\tbx\tr12\n");
20193 ASM_OUTPUT_ALIGN (file, 2);
20194 assemble_name (file, label);
20195 fputs (":\n", file);
20198 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20199 rtx tem = XEXP (DECL_RTL (function), 0);
20200 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20201 tem = gen_rtx_MINUS (GET_MODE (tem),
20203 gen_rtx_SYMBOL_REF (Pmode,
20204 ggc_strdup (labelpc)));
20205 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20208 /* Output ".word .LTHUNKn". */
20209 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20211 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20212 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20216 fputs ("\tb\t", file);
20217 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20218 if (NEED_PLT_RELOC)
20219 fputs ("(PLT)", file);
20220 fputc ('\n', file);
20225 arm_emit_vector_const (FILE *file, rtx x)
20228 const char * pattern;
20230 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20232 switch (GET_MODE (x))
20234 case V2SImode: pattern = "%08x"; break;
20235 case V4HImode: pattern = "%04x"; break;
20236 case V8QImode: pattern = "%02x"; break;
20237 default: gcc_unreachable ();
20240 fprintf (file, "0x");
20241 for (i = CONST_VECTOR_NUNITS (x); i--;)
20245 element = CONST_VECTOR_ELT (x, i);
20246 fprintf (file, pattern, INTVAL (element));
20252 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20253 HFmode constant pool entries are actually loaded with ldr. */
20255 arm_emit_fp16_const (rtx c)
20260 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20261 bits = real_to_target (NULL, &r, HFmode);
20262 if (WORDS_BIG_ENDIAN)
20263 assemble_zeros (2);
20264 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20265 if (!WORDS_BIG_ENDIAN)
20266 assemble_zeros (2);
20270 arm_output_load_gr (rtx *operands)
20277 if (GET_CODE (operands [1]) != MEM
20278 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20279 || GET_CODE (reg = XEXP (sum, 0)) != REG
20280 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20281 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20282 return "wldrw%?\t%0, %1";
20284 /* Fix up an out-of-range load of a GR register. */
20285 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20286 wcgr = operands[0];
20288 output_asm_insn ("ldr%?\t%0, %1", operands);
20290 operands[0] = wcgr;
20292 output_asm_insn ("tmcr%?\t%0, %1", operands);
20293 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20300 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20301 named arg and all anonymous args onto the stack.
20302 XXX I know the prologue shouldn't be pushing registers, but it is faster
20306 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20307 enum machine_mode mode,
20310 int second_time ATTRIBUTE_UNUSED)
20314 cfun->machine->uses_anonymous_args = 1;
20315 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20317 nregs = pcum->aapcs_ncrn;
20318 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20322 nregs = pcum->nregs;
20324 if (nregs < NUM_ARG_REGS)
20325 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20328 /* Return nonzero if the CONSUMER instruction (a store) does not need
20329 PRODUCER's value to calculate the address. */
20332 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20334 rtx value = PATTERN (producer);
20335 rtx addr = PATTERN (consumer);
20337 if (GET_CODE (value) == COND_EXEC)
20338 value = COND_EXEC_CODE (value);
20339 if (GET_CODE (value) == PARALLEL)
20340 value = XVECEXP (value, 0, 0);
20341 value = XEXP (value, 0);
20342 if (GET_CODE (addr) == COND_EXEC)
20343 addr = COND_EXEC_CODE (addr);
20344 if (GET_CODE (addr) == PARALLEL)
20345 addr = XVECEXP (addr, 0, 0);
20346 addr = XEXP (addr, 0);
20348 return !reg_overlap_mentioned_p (value, addr);
20351 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20352 have an early register shift value or amount dependency on the
20353 result of PRODUCER. */
20356 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20358 rtx value = PATTERN (producer);
20359 rtx op = PATTERN (consumer);
20362 if (GET_CODE (value) == COND_EXEC)
20363 value = COND_EXEC_CODE (value);
20364 if (GET_CODE (value) == PARALLEL)
20365 value = XVECEXP (value, 0, 0);
20366 value = XEXP (value, 0);
20367 if (GET_CODE (op) == COND_EXEC)
20368 op = COND_EXEC_CODE (op);
20369 if (GET_CODE (op) == PARALLEL)
20370 op = XVECEXP (op, 0, 0);
20373 early_op = XEXP (op, 0);
20374 /* This is either an actual independent shift, or a shift applied to
20375 the first operand of another operation. We want the whole shift
20377 if (GET_CODE (early_op) == REG)
20380 return !reg_overlap_mentioned_p (value, early_op);
20383 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20384 have an early register shift value dependency on the result of
20388 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20390 rtx value = PATTERN (producer);
20391 rtx op = PATTERN (consumer);
20394 if (GET_CODE (value) == COND_EXEC)
20395 value = COND_EXEC_CODE (value);
20396 if (GET_CODE (value) == PARALLEL)
20397 value = XVECEXP (value, 0, 0);
20398 value = XEXP (value, 0);
20399 if (GET_CODE (op) == COND_EXEC)
20400 op = COND_EXEC_CODE (op);
20401 if (GET_CODE (op) == PARALLEL)
20402 op = XVECEXP (op, 0, 0);
20405 early_op = XEXP (op, 0);
20407 /* This is either an actual independent shift, or a shift applied to
20408 the first operand of another operation. We want the value being
20409 shifted, in either case. */
20410 if (GET_CODE (early_op) != REG)
20411 early_op = XEXP (early_op, 0);
20413 return !reg_overlap_mentioned_p (value, early_op);
20416 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20417 have an early register mult dependency on the result of
20421 arm_no_early_mul_dep (rtx producer, rtx consumer)
20423 rtx value = PATTERN (producer);
20424 rtx op = PATTERN (consumer);
20426 if (GET_CODE (value) == COND_EXEC)
20427 value = COND_EXEC_CODE (value);
20428 if (GET_CODE (value) == PARALLEL)
20429 value = XVECEXP (value, 0, 0);
20430 value = XEXP (value, 0);
20431 if (GET_CODE (op) == COND_EXEC)
20432 op = COND_EXEC_CODE (op);
20433 if (GET_CODE (op) == PARALLEL)
20434 op = XVECEXP (op, 0, 0);
20437 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20439 if (GET_CODE (XEXP (op, 0)) == MULT)
20440 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20442 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20448 /* We can't rely on the caller doing the proper promotion when
20449 using APCS or ATPCS. */
20452 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20454 return !TARGET_AAPCS_BASED;
20457 static enum machine_mode
20458 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20459 enum machine_mode mode,
20460 int *punsignedp ATTRIBUTE_UNUSED,
20461 const_tree fntype ATTRIBUTE_UNUSED,
20462 int for_return ATTRIBUTE_UNUSED)
20464 if (GET_MODE_CLASS (mode) == MODE_INT
20465 && GET_MODE_SIZE (mode) < 4)
20471 /* AAPCS based ABIs use short enums by default. */
20474 arm_default_short_enums (void)
20476 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20480 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20483 arm_align_anon_bitfield (void)
20485 return TARGET_AAPCS_BASED;
20489 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20492 arm_cxx_guard_type (void)
20494 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20497 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20498 has an accumulator dependency on the result of the producer (a
20499 multiplication instruction) and no other dependency on that result. */
20501 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20503 rtx mul = PATTERN (producer);
20504 rtx mac = PATTERN (consumer);
20506 rtx mac_op0, mac_op1, mac_acc;
20508 if (GET_CODE (mul) == COND_EXEC)
20509 mul = COND_EXEC_CODE (mul);
20510 if (GET_CODE (mac) == COND_EXEC)
20511 mac = COND_EXEC_CODE (mac);
20513 /* Check that mul is of the form (set (...) (mult ...))
20514 and mla is of the form (set (...) (plus (mult ...) (...))). */
20515 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20516 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20517 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20520 mul_result = XEXP (mul, 0);
20521 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20522 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20523 mac_acc = XEXP (XEXP (mac, 1), 1);
20525 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20526 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20527 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20531 /* The EABI says test the least significant bit of a guard variable. */
20534 arm_cxx_guard_mask_bit (void)
20536 return TARGET_AAPCS_BASED;
20540 /* The EABI specifies that all array cookies are 8 bytes long. */
20543 arm_get_cookie_size (tree type)
20547 if (!TARGET_AAPCS_BASED)
20548 return default_cxx_get_cookie_size (type);
20550 size = build_int_cst (sizetype, 8);
20555 /* The EABI says that array cookies should also contain the element size. */
20558 arm_cookie_has_size (void)
20560 return TARGET_AAPCS_BASED;
20564 /* The EABI says constructors and destructors should return a pointer to
20565 the object constructed/destroyed. */
20568 arm_cxx_cdtor_returns_this (void)
20570 return TARGET_AAPCS_BASED;
20573 /* The EABI says that an inline function may never be the key
20577 arm_cxx_key_method_may_be_inline (void)
20579 return !TARGET_AAPCS_BASED;
20583 arm_cxx_determine_class_data_visibility (tree decl)
20585 if (!TARGET_AAPCS_BASED
20586 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20589 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20590 is exported. However, on systems without dynamic vague linkage,
20591 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20592 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20593 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20595 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20596 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20600 arm_cxx_class_data_always_comdat (void)
20602 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20603 vague linkage if the class has no key function. */
20604 return !TARGET_AAPCS_BASED;
20608 /* The EABI says __aeabi_atexit should be used to register static
20612 arm_cxx_use_aeabi_atexit (void)
20614 return TARGET_AAPCS_BASED;
20619 arm_set_return_address (rtx source, rtx scratch)
20621 arm_stack_offsets *offsets;
20622 HOST_WIDE_INT delta;
20624 unsigned long saved_regs;
20626 offsets = arm_get_frame_offsets ();
20627 saved_regs = offsets->saved_regs_mask;
20629 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20630 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20633 if (frame_pointer_needed)
20634 addr = plus_constant(hard_frame_pointer_rtx, -4);
20637 /* LR will be the first saved register. */
20638 delta = offsets->outgoing_args - (offsets->frame + 4);
20643 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20644 GEN_INT (delta & ~4095)));
20649 addr = stack_pointer_rtx;
20651 addr = plus_constant (addr, delta);
20653 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20659 thumb_set_return_address (rtx source, rtx scratch)
20661 arm_stack_offsets *offsets;
20662 HOST_WIDE_INT delta;
20663 HOST_WIDE_INT limit;
20666 unsigned long mask;
20670 offsets = arm_get_frame_offsets ();
20671 mask = offsets->saved_regs_mask;
20672 if (mask & (1 << LR_REGNUM))
20675 /* Find the saved regs. */
20676 if (frame_pointer_needed)
20678 delta = offsets->soft_frame - offsets->saved_args;
20679 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20685 delta = offsets->outgoing_args - offsets->saved_args;
20688 /* Allow for the stack frame. */
20689 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20691 /* The link register is always the first saved register. */
20694 /* Construct the address. */
20695 addr = gen_rtx_REG (SImode, reg);
20698 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20699 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20703 addr = plus_constant (addr, delta);
20705 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20708 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20711 /* Implements target hook vector_mode_supported_p. */
20713 arm_vector_mode_supported_p (enum machine_mode mode)
20715 /* Neon also supports V2SImode, etc. listed in the clause below. */
20716 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20717 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20720 if ((TARGET_NEON || TARGET_IWMMXT)
20721 && ((mode == V2SImode)
20722 || (mode == V4HImode)
20723 || (mode == V8QImode)))
20729 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20730 ARM insns and therefore guarantee that the shift count is modulo 256.
20731 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20732 guarantee no particular behavior for out-of-range counts. */
20734 static unsigned HOST_WIDE_INT
20735 arm_shift_truncation_mask (enum machine_mode mode)
20737 return mode == SImode ? 255 : 0;
20741 /* Map internal gcc register numbers to DWARF2 register numbers. */
20744 arm_dbx_register_number (unsigned int regno)
20749 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20750 compatibility. The EABI defines them as registers 96-103. */
20751 if (IS_FPA_REGNUM (regno))
20752 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20754 if (IS_VFP_REGNUM (regno))
20756 /* See comment in arm_dwarf_register_span. */
20757 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20758 return 64 + regno - FIRST_VFP_REGNUM;
20760 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20763 if (IS_IWMMXT_GR_REGNUM (regno))
20764 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20766 if (IS_IWMMXT_REGNUM (regno))
20767 return 112 + regno - FIRST_IWMMXT_REGNUM;
20769 gcc_unreachable ();
20772 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20773 GCC models tham as 64 32-bit registers, so we need to describe this to
20774 the DWARF generation code. Other registers can use the default. */
20776 arm_dwarf_register_span (rtx rtl)
20783 regno = REGNO (rtl);
20784 if (!IS_VFP_REGNUM (regno))
20787 /* XXX FIXME: The EABI defines two VFP register ranges:
20788 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20790 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20791 corresponding D register. Until GDB supports this, we shall use the
20792 legacy encodings. We also use these encodings for D0-D15 for
20793 compatibility with older debuggers. */
20794 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20797 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20798 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20799 regno = (regno - FIRST_VFP_REGNUM) / 2;
20800 for (i = 0; i < nregs; i++)
20801 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20806 #ifdef TARGET_UNWIND_INFO
20807 /* Emit unwind directives for a store-multiple instruction or stack pointer
20808 push during alignment.
20809 These should only ever be generated by the function prologue code, so
20810 expect them to have a particular form. */
20813 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20816 HOST_WIDE_INT offset;
20817 HOST_WIDE_INT nregs;
20823 e = XVECEXP (p, 0, 0);
20824 if (GET_CODE (e) != SET)
20827 /* First insn will adjust the stack pointer. */
20828 if (GET_CODE (e) != SET
20829 || GET_CODE (XEXP (e, 0)) != REG
20830 || REGNO (XEXP (e, 0)) != SP_REGNUM
20831 || GET_CODE (XEXP (e, 1)) != PLUS)
20834 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20835 nregs = XVECLEN (p, 0) - 1;
20837 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20840 /* The function prologue may also push pc, but not annotate it as it is
20841 never restored. We turn this into a stack pointer adjustment. */
20842 if (nregs * 4 == offset - 4)
20844 fprintf (asm_out_file, "\t.pad #4\n");
20848 fprintf (asm_out_file, "\t.save {");
20850 else if (IS_VFP_REGNUM (reg))
20853 fprintf (asm_out_file, "\t.vsave {");
20855 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20857 /* FPA registers are done differently. */
20858 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20862 /* Unknown register type. */
20865 /* If the stack increment doesn't match the size of the saved registers,
20866 something has gone horribly wrong. */
20867 if (offset != nregs * reg_size)
20872 /* The remaining insns will describe the stores. */
20873 for (i = 1; i <= nregs; i++)
20875 /* Expect (set (mem <addr>) (reg)).
20876 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20877 e = XVECEXP (p, 0, i);
20878 if (GET_CODE (e) != SET
20879 || GET_CODE (XEXP (e, 0)) != MEM
20880 || GET_CODE (XEXP (e, 1)) != REG)
20883 reg = REGNO (XEXP (e, 1));
20888 fprintf (asm_out_file, ", ");
20889 /* We can't use %r for vfp because we need to use the
20890 double precision register names. */
20891 if (IS_VFP_REGNUM (reg))
20892 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20894 asm_fprintf (asm_out_file, "%r", reg);
20896 #ifdef ENABLE_CHECKING
20897 /* Check that the addresses are consecutive. */
20898 e = XEXP (XEXP (e, 0), 0);
20899 if (GET_CODE (e) == PLUS)
20901 offset += reg_size;
20902 if (GET_CODE (XEXP (e, 0)) != REG
20903 || REGNO (XEXP (e, 0)) != SP_REGNUM
20904 || GET_CODE (XEXP (e, 1)) != CONST_INT
20905 || offset != INTVAL (XEXP (e, 1)))
20909 || GET_CODE (e) != REG
20910 || REGNO (e) != SP_REGNUM)
20914 fprintf (asm_out_file, "}\n");
20917 /* Emit unwind directives for a SET. */
20920 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20928 switch (GET_CODE (e0))
20931 /* Pushing a single register. */
20932 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20933 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20934 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20937 asm_fprintf (asm_out_file, "\t.save ");
20938 if (IS_VFP_REGNUM (REGNO (e1)))
20939 asm_fprintf(asm_out_file, "{d%d}\n",
20940 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20942 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20946 if (REGNO (e0) == SP_REGNUM)
20948 /* A stack increment. */
20949 if (GET_CODE (e1) != PLUS
20950 || GET_CODE (XEXP (e1, 0)) != REG
20951 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20952 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20955 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20956 -INTVAL (XEXP (e1, 1)));
20958 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20960 HOST_WIDE_INT offset;
20962 if (GET_CODE (e1) == PLUS)
20964 if (GET_CODE (XEXP (e1, 0)) != REG
20965 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20967 reg = REGNO (XEXP (e1, 0));
20968 offset = INTVAL (XEXP (e1, 1));
20969 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20970 HARD_FRAME_POINTER_REGNUM, reg,
20971 INTVAL (XEXP (e1, 1)));
20973 else if (GET_CODE (e1) == REG)
20976 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20977 HARD_FRAME_POINTER_REGNUM, reg);
20982 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20984 /* Move from sp to reg. */
20985 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20987 else if (GET_CODE (e1) == PLUS
20988 && GET_CODE (XEXP (e1, 0)) == REG
20989 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20990 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20992 /* Set reg to offset from sp. */
20993 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20994 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20996 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20998 /* Stack pointer save before alignment. */
21000 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21013 /* Emit unwind directives for the given insn. */
21016 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21020 if (!ARM_EABI_UNWIND_TABLES)
21023 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21024 && (TREE_NOTHROW (current_function_decl)
21025 || crtl->all_throwers_are_sibcalls))
21028 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21031 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21033 pat = XEXP (pat, 0);
21035 pat = PATTERN (insn);
21037 switch (GET_CODE (pat))
21040 arm_unwind_emit_set (asm_out_file, pat);
21044 /* Store multiple. */
21045 arm_unwind_emit_sequence (asm_out_file, pat);
21054 /* Output a reference from a function exception table to the type_info
21055 object X. The EABI specifies that the symbol should be relocated by
21056 an R_ARM_TARGET2 relocation. */
21059 arm_output_ttype (rtx x)
21061 fputs ("\t.word\t", asm_out_file);
21062 output_addr_const (asm_out_file, x);
21063 /* Use special relocations for symbol references. */
21064 if (GET_CODE (x) != CONST_INT)
21065 fputs ("(TARGET2)", asm_out_file);
21066 fputc ('\n', asm_out_file);
21070 #endif /* TARGET_UNWIND_INFO */
21073 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21074 stack alignment. */
21077 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21079 rtx unspec = SET_SRC (pattern);
21080 gcc_assert (GET_CODE (unspec) == UNSPEC);
21084 case UNSPEC_STACK_ALIGN:
21085 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21086 put anything on the stack, so hopefully it won't matter.
21087 CFA = SP will be correct after alignment. */
21088 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21089 SET_DEST (pattern));
21092 gcc_unreachable ();
21097 /* Output unwind directives for the start/end of a function. */
21100 arm_output_fn_unwind (FILE * f, bool prologue)
21102 if (!ARM_EABI_UNWIND_TABLES)
21106 fputs ("\t.fnstart\n", f);
21109 /* If this function will never be unwound, then mark it as such.
21110 The came condition is used in arm_unwind_emit to suppress
21111 the frame annotations. */
21112 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21113 && (TREE_NOTHROW (current_function_decl)
21114 || crtl->all_throwers_are_sibcalls))
21115 fputs("\t.cantunwind\n", f);
21117 fputs ("\t.fnend\n", f);
21122 arm_emit_tls_decoration (FILE *fp, rtx x)
21124 enum tls_reloc reloc;
21127 val = XVECEXP (x, 0, 0);
21128 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21130 output_addr_const (fp, val);
21135 fputs ("(tlsgd)", fp);
21138 fputs ("(tlsldm)", fp);
21141 fputs ("(tlsldo)", fp);
21144 fputs ("(gottpoff)", fp);
21147 fputs ("(tpoff)", fp);
21150 gcc_unreachable ();
21158 fputs (" + (. - ", fp);
21159 output_addr_const (fp, XVECEXP (x, 0, 2));
21161 output_addr_const (fp, XVECEXP (x, 0, 3));
21171 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21174 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21176 gcc_assert (size == 4);
21177 fputs ("\t.word\t", file);
21178 output_addr_const (file, x);
21179 fputs ("(tlsldo)", file);
21183 arm_output_addr_const_extra (FILE *fp, rtx x)
21185 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21186 return arm_emit_tls_decoration (fp, x);
21187 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21190 int labelno = INTVAL (XVECEXP (x, 0, 0));
21192 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21193 assemble_name_raw (fp, label);
21197 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21199 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21203 output_addr_const (fp, XVECEXP (x, 0, 0));
21207 else if (GET_CODE (x) == CONST_VECTOR)
21208 return arm_emit_vector_const (fp, x);
21213 /* Output assembly for a shift instruction.
21214 SET_FLAGS determines how the instruction modifies the condition codes.
21215 0 - Do not set condition codes.
21216 1 - Set condition codes.
21217 2 - Use smallest instruction. */
21219 arm_output_shift(rtx * operands, int set_flags)
21222 static const char flag_chars[3] = {'?', '.', '!'};
21227 c = flag_chars[set_flags];
21228 if (TARGET_UNIFIED_ASM)
21230 shift = shift_op(operands[3], &val);
21234 operands[2] = GEN_INT(val);
21235 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21238 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21241 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21242 output_asm_insn (pattern, operands);
21246 /* Output a Thumb-1 casesi dispatch sequence. */
21248 thumb1_output_casesi (rtx *operands)
21250 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21251 addr_diff_vec_flags flags;
21253 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21255 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
21257 switch (GET_MODE(diff_vec))
21260 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21261 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21263 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21264 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21266 return "bl\t%___gnu_thumb1_case_si";
21268 gcc_unreachable ();
21272 /* Output a Thumb-2 casesi instruction. */
21274 thumb2_output_casesi (rtx *operands)
21276 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21278 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21280 output_asm_insn ("cmp\t%0, %1", operands);
21281 output_asm_insn ("bhi\t%l3", operands);
21282 switch (GET_MODE(diff_vec))
21285 return "tbb\t[%|pc, %0]";
21287 return "tbh\t[%|pc, %0, lsl #1]";
21291 output_asm_insn ("adr\t%4, %l2", operands);
21292 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21293 output_asm_insn ("add\t%4, %4, %5", operands);
21298 output_asm_insn ("adr\t%4, %l2", operands);
21299 return "ldr\t%|pc, [%4, %0, lsl #2]";
21302 gcc_unreachable ();
21306 /* Most ARM cores are single issue, but some newer ones can dual issue.
21307 The scheduler descriptions rely on this being correct. */
21309 arm_issue_rate (void)
21324 /* A table and a function to perform ARM-specific name mangling for
21325 NEON vector types in order to conform to the AAPCS (see "Procedure
21326 Call Standard for the ARM Architecture", Appendix A). To qualify
21327 for emission with the mangled names defined in that document, a
21328 vector type must not only be of the correct mode but also be
21329 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21332 enum machine_mode mode;
21333 const char *element_type_name;
21334 const char *aapcs_name;
21335 } arm_mangle_map_entry;
21337 static arm_mangle_map_entry arm_mangle_map[] = {
21338 /* 64-bit containerized types. */
21339 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21340 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21341 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21342 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21343 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21344 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21345 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21346 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21347 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21348 /* 128-bit containerized types. */
21349 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21350 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21351 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21352 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21353 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21354 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21355 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21356 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21357 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21358 { VOIDmode, NULL, NULL }
21362 arm_mangle_type (const_tree type)
21364 arm_mangle_map_entry *pos = arm_mangle_map;
21366 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21367 has to be managled as if it is in the "std" namespace. */
21368 if (TARGET_AAPCS_BASED
21369 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21371 static bool warned;
21372 if (!warned && warn_psabi && !in_system_header)
21375 inform (input_location,
21376 "the mangling of %<va_list%> has changed in GCC 4.4");
21378 return "St9__va_list";
21381 /* Half-precision float. */
21382 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21385 if (TREE_CODE (type) != VECTOR_TYPE)
21388 /* Check the mode of the vector type, and the name of the vector
21389 element type, against the table. */
21390 while (pos->mode != VOIDmode)
21392 tree elt_type = TREE_TYPE (type);
21394 if (pos->mode == TYPE_MODE (type)
21395 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21396 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21397 pos->element_type_name))
21398 return pos->aapcs_name;
21403 /* Use the default mangling for unrecognized (possibly user-defined)
21408 /* Order of allocation of core registers for Thumb: this allocation is
21409 written over the corresponding initial entries of the array
21410 initialized with REG_ALLOC_ORDER. We allocate all low registers
21411 first. Saving and restoring a low register is usually cheaper than
21412 using a call-clobbered high register. */
21414 static const int thumb_core_reg_alloc_order[] =
21416 3, 2, 1, 0, 4, 5, 6, 7,
21417 14, 12, 8, 9, 10, 11, 13, 15
21420 /* Adjust register allocation order when compiling for Thumb. */
21423 arm_order_regs_for_local_alloc (void)
21425 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21426 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21428 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21429 sizeof (thumb_core_reg_alloc_order));
21432 /* Set default optimization options. */
21434 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21436 /* Enable section anchors by default at -O1 or higher.
21437 Use 2 to distinguish from an explicit -fsection-anchors
21438 given on the command line. */
21440 flag_section_anchors = 2;
21443 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21446 arm_frame_pointer_required (void)
21448 return (cfun->has_nonlocal_label
21449 || SUBTARGET_FRAME_POINTER_REQUIRED
21450 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21453 /* Only thumb1 can't support conditional execution, so return true if
21454 the target is not thumb1. */
21456 arm_have_conditional_execution (void)
21458 return !TARGET_THUMB1;
21461 #include "gt-arm.h"