1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
166 static int aapcs_select_return_coproc (const_tree, const_tree);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
173 static void arm_encode_section_info (tree, rtx, int);
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static rtx arm_dwarf_register_span (rtx);
196 static tree arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree arm_get_cookie_size (tree);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree, rtx);
208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
212 static bool arm_cannot_copy_insn_p (rtx);
213 static bool arm_tls_symbol_p (rtx x);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t);
218 static const char *arm_invalid_return_type (const_tree t);
219 static tree arm_promoted_type (const_tree t);
220 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_scalar_mode_supported_p (enum machine_mode);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx);
229 /* Table of machine attributes. */
230 static const struct attribute_spec arm_attribute_table[] =
232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
233 /* Function calls made to this symbol must be done indirectly, because
234 it may lie outside of the 26 bit addressing range of a normal function
236 { "long_call", 0, 0, false, true, true, NULL },
237 /* Whereas these functions are always known to reside within the 26 bit
239 { "short_call", 0, 0, false, true, true, NULL },
240 /* Specify the procedure call conventions for a function. */
241 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
242 /* Interrupt Service Routines have special prologue and epilogue requirements. */
243 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
245 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
247 /* ARM/PE has three new attributes:
249 dllexport - for exporting a function/variable that will live in a dll
250 dllimport - for importing a function/variable from a dll
252 Microsoft allows multiple declspecs in one __declspec, separating
253 them with spaces. We do NOT support this. Instead, use __declspec
256 { "dllimport", 0, 0, true, false, false, NULL },
257 { "dllexport", 0, 0, true, false, false, NULL },
258 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
260 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
261 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
262 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
264 { NULL, 0, 0, false, false, false, NULL }
267 /* Initialize the GCC target structure. */
268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
269 #undef TARGET_MERGE_DECL_ATTRIBUTES
270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
273 #undef TARGET_LEGITIMIZE_ADDRESS
274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
279 #undef TARGET_ASM_FILE_START
280 #define TARGET_ASM_FILE_START arm_file_start
281 #undef TARGET_ASM_FILE_END
282 #define TARGET_ASM_FILE_END arm_file_end
284 #undef TARGET_ASM_ALIGNED_SI_OP
285 #define TARGET_ASM_ALIGNED_SI_OP NULL
286 #undef TARGET_ASM_INTEGER
287 #define TARGET_ASM_INTEGER arm_assemble_integer
289 #undef TARGET_ASM_FUNCTION_PROLOGUE
290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION arm_handle_option
300 #define TARGET_HELP arm_target_help
302 #undef TARGET_COMP_TYPE_ATTRIBUTES
303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
308 #undef TARGET_SCHED_ADJUST_COST
309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
311 #undef TARGET_ENCODE_SECTION_INFO
313 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
315 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
318 #undef TARGET_STRIP_NAME_ENCODING
319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
321 #undef TARGET_ASM_INTERNAL_LABEL
322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
324 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
327 #undef TARGET_FUNCTION_VALUE
328 #define TARGET_FUNCTION_VALUE arm_function_value
330 #undef TARGET_LIBCALL_VALUE
331 #define TARGET_LIBCALL_VALUE arm_libcall_value
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS arm_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST arm_address_cost
343 #undef TARGET_SHIFT_TRUNCATION_MASK
344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
348 #undef TARGET_MACHINE_DEPENDENT_REORG
349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
351 #undef TARGET_INIT_BUILTINS
352 #define TARGET_INIT_BUILTINS arm_init_builtins
353 #undef TARGET_EXPAND_BUILTIN
354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
356 #undef TARGET_INIT_LIBFUNCS
357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
359 #undef TARGET_PROMOTE_FUNCTION_MODE
360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
361 #undef TARGET_PROMOTE_PROTOTYPES
362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
365 #undef TARGET_ARG_PARTIAL_BYTES
366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
368 #undef TARGET_SETUP_INCOMING_VARARGS
369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
381 #undef TARGET_DEFAULT_SHORT_ENUMS
382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
384 #undef TARGET_ALIGN_ANON_BITFIELD
385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
387 #undef TARGET_NARROW_VOLATILE_BITFIELD
388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
390 #undef TARGET_CXX_GUARD_TYPE
391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
393 #undef TARGET_CXX_GUARD_MASK_BIT
394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
396 #undef TARGET_CXX_GET_COOKIE_SIZE
397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
399 #undef TARGET_CXX_COOKIE_HAS_SIZE
400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
402 #undef TARGET_CXX_CDTOR_RETURNS_THIS
403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
408 #undef TARGET_CXX_USE_AEABI_ATEXIT
409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
413 arm_cxx_determine_class_data_visibility
415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
418 #undef TARGET_RETURN_IN_MSB
419 #define TARGET_RETURN_IN_MSB arm_return_in_msb
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
424 #undef TARGET_MUST_PASS_IN_STACK
425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
427 #ifdef TARGET_UNWIND_INFO
428 #undef TARGET_UNWIND_EMIT
429 #define TARGET_UNWIND_EMIT arm_unwind_emit
431 /* EABI unwinding tables use a different format for the typeinfo tables. */
432 #undef TARGET_ASM_TTYPE
433 #define TARGET_ASM_TTYPE arm_output_ttype
435 #undef TARGET_ARM_EABI_UNWINDER
436 #define TARGET_ARM_EABI_UNWINDER true
437 #endif /* TARGET_UNWIND_INFO */
439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
442 #undef TARGET_DWARF_REGISTER_SPAN
443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
445 #undef TARGET_CANNOT_COPY_INSN_P
446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
456 #undef TARGET_CANNOT_FORCE_CONST_MEM
457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
459 #undef TARGET_MAX_ANCHOR_OFFSET
460 #define TARGET_MAX_ANCHOR_OFFSET 4095
462 /* The minimum is set such that the total size of the block
463 for a particular anchor is -4088 + 1 + 4095 bytes, which is
464 divisible by eight, ensuring natural spacing of anchors. */
465 #undef TARGET_MIN_ANCHOR_OFFSET
466 #define TARGET_MIN_ANCHOR_OFFSET -4088
468 #undef TARGET_SCHED_ISSUE_RATE
469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
471 #undef TARGET_MANGLE_TYPE
472 #define TARGET_MANGLE_TYPE arm_mangle_type
474 #undef TARGET_BUILD_BUILTIN_VA_LIST
475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
476 #undef TARGET_EXPAND_BUILTIN_VA_START
477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
486 #undef TARGET_LEGITIMATE_ADDRESS_P
487 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
489 #undef TARGET_INVALID_PARAMETER_TYPE
490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
492 #undef TARGET_INVALID_RETURN_TYPE
493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
495 #undef TARGET_PROMOTED_TYPE
496 #define TARGET_PROMOTED_TYPE arm_promoted_type
498 #undef TARGET_CONVERT_TO_TYPE
499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
501 #undef TARGET_SCALAR_MODE_SUPPORTED_P
502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
504 #undef TARGET_FRAME_POINTER_REQUIRED
505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
507 #undef TARGET_CAN_ELIMINATE
508 #define TARGET_CAN_ELIMINATE arm_can_eliminate
510 struct gcc_target targetm = TARGET_INITIALIZER;
512 /* Obstack for minipool constant handling. */
513 static struct obstack minipool_obstack;
514 static char * minipool_startobj;
516 /* The maximum number of insns skipped which
517 will be conditionalised if possible. */
518 static int max_insns_skipped = 5;
520 extern FILE * asm_out_file;
522 /* True if we are currently building a constant table. */
523 int making_const_table;
525 /* The processor for which instructions should be scheduled. */
526 enum processor_type arm_tune = arm_none;
528 /* The default processor used if not overridden by commandline. */
529 static enum processor_type arm_default_cpu = arm_none;
531 /* Which floating point hardware to schedule for. */
534 /* Which floating popint hardware to use. */
535 const struct arm_fpu_desc *arm_fpu_desc;
537 /* Whether to use floating point hardware. */
538 enum float_abi_type arm_float_abi;
540 /* Which __fp16 format to use. */
541 enum arm_fp16_format_type arm_fp16_format;
543 /* Which ABI to use. */
544 enum arm_abi_type arm_abi;
546 /* Which thread pointer model to use. */
547 enum arm_tp_type target_thread_pointer = TP_AUTO;
549 /* Used to parse -mstructure_size_boundary command line option. */
550 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
552 /* Used for Thumb call_via trampolines. */
553 rtx thumb_call_via_label[14];
554 static int thumb_call_reg_needed;
556 /* Bit values used to identify processor capabilities. */
557 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
558 #define FL_ARCH3M (1 << 1) /* Extended multiply */
559 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
560 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
561 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
562 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
563 #define FL_THUMB (1 << 6) /* Thumb aware */
564 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
565 #define FL_STRONG (1 << 8) /* StrongARM */
566 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
567 #define FL_XSCALE (1 << 10) /* XScale */
568 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
569 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
570 media instructions. */
571 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
572 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
573 Note: ARM6 & 7 derivatives only. */
574 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
575 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
576 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
578 #define FL_DIV (1 << 18) /* Hardware divide. */
579 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
580 #define FL_NEON (1 << 20) /* Neon instructions. */
581 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
584 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
586 #define FL_FOR_ARCH2 FL_NOTM
587 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
588 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
589 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
590 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
591 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
592 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
593 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
594 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
595 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
596 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
597 #define FL_FOR_ARCH6J FL_FOR_ARCH6
598 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
609 /* The bits in this mask specify which
610 instructions we are allowed to generate. */
611 static unsigned long insn_flags = 0;
613 /* The bits in this mask specify which instruction scheduling options should
615 static unsigned long tune_flags = 0;
617 /* The following are used in the arm.md file as equivalents to bits
618 in the above two flag variables. */
620 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
623 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
626 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
629 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
632 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
635 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
638 /* Nonzero if this chip supports the ARM 6K extensions. */
641 /* Nonzero if instructions not present in the 'M' profile can be used. */
642 int arm_arch_notm = 0;
644 /* Nonzero if instructions present in ARMv7E-M can be used. */
647 /* Nonzero if this chip can benefit from load scheduling. */
648 int arm_ld_sched = 0;
650 /* Nonzero if this chip is a StrongARM. */
651 int arm_tune_strongarm = 0;
653 /* Nonzero if this chip is a Cirrus variant. */
654 int arm_arch_cirrus = 0;
656 /* Nonzero if this chip supports Intel Wireless MMX technology. */
657 int arm_arch_iwmmxt = 0;
659 /* Nonzero if this chip is an XScale. */
660 int arm_arch_xscale = 0;
662 /* Nonzero if tuning for XScale */
663 int arm_tune_xscale = 0;
665 /* Nonzero if we want to tune for stores that access the write-buffer.
666 This typically means an ARM6 or ARM7 with MMU or MPU. */
667 int arm_tune_wbuf = 0;
669 /* Nonzero if tuning for Cortex-A9. */
670 int arm_tune_cortex_a9 = 0;
672 /* Nonzero if generating Thumb instructions. */
675 /* Nonzero if we should define __THUMB_INTERWORK__ in the
677 XXX This is a bit of a hack, it's intended to help work around
678 problems in GLD which doesn't understand that armv5t code is
679 interworking clean. */
680 int arm_cpp_interwork = 0;
682 /* Nonzero if chip supports Thumb 2. */
685 /* Nonzero if chip supports integer division instruction. */
688 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
689 must report the mode of the memory reference from PRINT_OPERAND to
690 PRINT_OPERAND_ADDRESS. */
691 enum machine_mode output_memory_reference_mode;
693 /* The register number to be used for the PIC offset register. */
694 unsigned arm_pic_register = INVALID_REGNUM;
696 /* Set to 1 after arm_reorg has started. Reset to start at the start of
697 the next function. */
698 static int after_arm_reorg = 0;
700 /* The maximum number of insns to be used when loading a constant. */
701 static int arm_constant_limit = 3;
703 static enum arm_pcs arm_pcs_default;
705 /* For an explanation of these variables, see final_prescan_insn below. */
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen = 0;
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes[] =
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
733 /* Initialization code. */
737 const char *const name;
738 enum processor_type core;
740 const unsigned long flags;
741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
744 /* Not all of these give usefully different compilation alternatives,
745 but there is no simple way of generalizing them. */
746 static const struct processors all_cores[] =
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
751 #include "arm-cores.def"
753 {NULL, arm_none, NULL, 0, NULL}
756 static const struct processors all_architectures[] =
758 /* ARM Architectures */
759 /* We don't specify rtx_costs here as it will be figured out
762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
765 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
766 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
767 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
768 implementations that support it, so we will leave it out for now. */
769 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
770 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
771 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
772 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
773 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
774 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
775 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
776 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
777 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
778 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
779 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
780 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
781 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
782 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
783 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
784 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
785 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
786 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
787 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
788 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
789 {NULL, arm_none, NULL, 0 , NULL}
792 struct arm_cpu_select
796 const struct processors * processors;
799 /* This is a magic structure. The 'string' field is magically filled in
800 with a pointer to the value specified by the user on the command line
801 assuming that the user has specified such a value. */
803 static struct arm_cpu_select arm_select[] =
805 /* string name processors */
806 { NULL, "-mcpu=", all_cores },
807 { NULL, "-march=", all_architectures },
808 { NULL, "-mtune=", all_cores }
811 /* Defines representing the indexes into the above table. */
812 #define ARM_OPT_SET_CPU 0
813 #define ARM_OPT_SET_ARCH 1
814 #define ARM_OPT_SET_TUNE 2
816 /* The name of the preprocessor macro to define for this architecture. */
818 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
820 /* Available values for -mfpu=. */
822 static const struct arm_fpu_desc all_fpus[] =
824 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
825 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
826 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
827 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
828 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
829 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
830 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
831 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
832 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
833 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
834 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
835 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
836 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
837 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
838 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
839 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
840 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
841 /* Compatibility aliases. */
842 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
849 enum float_abi_type abi_type;
853 /* Available values for -mfloat-abi=. */
855 static const struct float_abi all_float_abis[] =
857 {"soft", ARM_FLOAT_ABI_SOFT},
858 {"softfp", ARM_FLOAT_ABI_SOFTFP},
859 {"hard", ARM_FLOAT_ABI_HARD}
866 enum arm_fp16_format_type fp16_format_type;
870 /* Available values for -mfp16-format=. */
872 static const struct fp16_format all_fp16_formats[] =
874 {"none", ARM_FP16_FORMAT_NONE},
875 {"ieee", ARM_FP16_FORMAT_IEEE},
876 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
883 enum arm_abi_type abi_type;
887 /* Available values for -mabi=. */
889 static const struct abi_name arm_all_abis[] =
891 {"apcs-gnu", ARM_ABI_APCS},
892 {"atpcs", ARM_ABI_ATPCS},
893 {"aapcs", ARM_ABI_AAPCS},
894 {"iwmmxt", ARM_ABI_IWMMXT},
895 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
898 /* Supported TLS relocations. */
908 /* Emit an insn that's a simple single-set. Both the operands must be known
911 emit_set_insn (rtx x, rtx y)
913 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
916 /* Return the number of bits set in VALUE. */
918 bit_count (unsigned long value)
920 unsigned long count = 0;
925 value &= value - 1; /* Clear the least-significant set bit. */
931 /* Set up library functions unique to ARM. */
934 arm_init_libfuncs (void)
936 /* There are no special library functions unless we are using the
941 /* The functions below are described in Section 4 of the "Run-Time
942 ABI for the ARM architecture", Version 1.0. */
944 /* Double-precision floating-point arithmetic. Table 2. */
945 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
946 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
947 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
948 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
949 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
951 /* Double-precision comparisons. Table 3. */
952 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
953 set_optab_libfunc (ne_optab, DFmode, NULL);
954 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
955 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
956 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
957 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
958 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
960 /* Single-precision floating-point arithmetic. Table 4. */
961 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
962 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
963 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
964 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
965 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
967 /* Single-precision comparisons. Table 5. */
968 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
969 set_optab_libfunc (ne_optab, SFmode, NULL);
970 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
971 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
972 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
973 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
974 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
976 /* Floating-point to integer conversions. Table 6. */
977 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
978 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
979 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
980 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
981 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
982 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
983 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
984 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
986 /* Conversions between floating types. Table 7. */
987 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
988 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
990 /* Integer to floating-point conversions. Table 8. */
991 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
992 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
993 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
994 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
995 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
996 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
997 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
998 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1000 /* Long long. Table 9. */
1001 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1002 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1003 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1004 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1005 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1006 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1007 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1008 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1010 /* Integer (32/32->32) division. \S 4.3.1. */
1011 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1012 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1014 /* The divmod functions are designed so that they can be used for
1015 plain division, even though they return both the quotient and the
1016 remainder. The quotient is returned in the usual location (i.e.,
1017 r0 for SImode, {r0, r1} for DImode), just as would be expected
1018 for an ordinary division routine. Because the AAPCS calling
1019 conventions specify that all of { r0, r1, r2, r3 } are
1020 callee-saved registers, there is no need to tell the compiler
1021 explicitly that those registers are clobbered by these
1023 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1024 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1026 /* For SImode division the ABI provides div-without-mod routines,
1027 which are faster. */
1028 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1029 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1031 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1032 divmod libcalls instead. */
1033 set_optab_libfunc (smod_optab, DImode, NULL);
1034 set_optab_libfunc (umod_optab, DImode, NULL);
1035 set_optab_libfunc (smod_optab, SImode, NULL);
1036 set_optab_libfunc (umod_optab, SImode, NULL);
1038 /* Half-precision float operations. The compiler handles all operations
1039 with NULL libfuncs by converting the SFmode. */
1040 switch (arm_fp16_format)
1042 case ARM_FP16_FORMAT_IEEE:
1043 case ARM_FP16_FORMAT_ALTERNATIVE:
1046 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1047 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1049 : "__gnu_f2h_alternative"));
1050 set_conv_libfunc (sext_optab, SFmode, HFmode,
1051 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1053 : "__gnu_h2f_alternative"));
1056 set_optab_libfunc (add_optab, HFmode, NULL);
1057 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1058 set_optab_libfunc (smul_optab, HFmode, NULL);
1059 set_optab_libfunc (neg_optab, HFmode, NULL);
1060 set_optab_libfunc (sub_optab, HFmode, NULL);
1063 set_optab_libfunc (eq_optab, HFmode, NULL);
1064 set_optab_libfunc (ne_optab, HFmode, NULL);
1065 set_optab_libfunc (lt_optab, HFmode, NULL);
1066 set_optab_libfunc (le_optab, HFmode, NULL);
1067 set_optab_libfunc (ge_optab, HFmode, NULL);
1068 set_optab_libfunc (gt_optab, HFmode, NULL);
1069 set_optab_libfunc (unord_optab, HFmode, NULL);
1076 if (TARGET_AAPCS_BASED)
1077 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1080 /* On AAPCS systems, this is the "struct __va_list". */
1081 static GTY(()) tree va_list_type;
1083 /* Return the type to use as __builtin_va_list. */
1085 arm_build_builtin_va_list (void)
1090 if (!TARGET_AAPCS_BASED)
1091 return std_build_builtin_va_list ();
1093 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1101 The C Library ABI further reinforces this definition in \S
1104 We must follow this definition exactly. The structure tag
1105 name is visible in C++ mangled names, and thus forms a part
1106 of the ABI. The field name may be used by people who
1107 #include <stdarg.h>. */
1108 /* Create the type. */
1109 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1110 /* Give it the required name. */
1111 va_list_name = build_decl (BUILTINS_LOCATION,
1113 get_identifier ("__va_list"),
1115 DECL_ARTIFICIAL (va_list_name) = 1;
1116 TYPE_NAME (va_list_type) = va_list_name;
1117 /* Create the __ap field. */
1118 ap_field = build_decl (BUILTINS_LOCATION,
1120 get_identifier ("__ap"),
1122 DECL_ARTIFICIAL (ap_field) = 1;
1123 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1124 TYPE_FIELDS (va_list_type) = ap_field;
1125 /* Compute its layout. */
1126 layout_type (va_list_type);
1128 return va_list_type;
1131 /* Return an expression of type "void *" pointing to the next
1132 available argument in a variable-argument list. VALIST is the
1133 user-level va_list object, of type __builtin_va_list. */
1135 arm_extract_valist_ptr (tree valist)
1137 if (TREE_TYPE (valist) == error_mark_node)
1138 return error_mark_node;
1140 /* On an AAPCS target, the pointer is stored within "struct
1142 if (TARGET_AAPCS_BASED)
1144 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1145 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1146 valist, ap_field, NULL_TREE);
1152 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1154 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1156 valist = arm_extract_valist_ptr (valist);
1157 std_expand_builtin_va_start (valist, nextarg);
1160 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1162 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1165 valist = arm_extract_valist_ptr (valist);
1166 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1169 /* Implement TARGET_HANDLE_OPTION. */
1172 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1177 arm_select[1].string = arg;
1181 arm_select[0].string = arg;
1184 case OPT_mhard_float:
1185 target_float_abi_name = "hard";
1188 case OPT_msoft_float:
1189 target_float_abi_name = "soft";
1193 arm_select[2].string = arg;
1202 arm_target_help (void)
1205 static int columns = 0;
1208 /* If we have not done so already, obtain the desired maximum width of
1209 the output. Note - this is a duplication of the code at the start of
1210 gcc/opts.c:print_specific_help() - the two copies should probably be
1211 replaced by a single function. */
1216 GET_ENVIRONMENT (p, "COLUMNS");
1219 int value = atoi (p);
1226 /* Use a reasonable default. */
1230 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1232 /* The - 2 is because we know that the last entry in the array is NULL. */
1233 i = ARRAY_SIZE (all_cores) - 2;
1235 printf (" %s", all_cores[i].name);
1236 remaining = columns - (strlen (all_cores[i].name) + 4);
1237 gcc_assert (remaining >= 0);
1241 int len = strlen (all_cores[i].name);
1243 if (remaining > len + 2)
1245 printf (", %s", all_cores[i].name);
1246 remaining -= len + 2;
1252 printf ("\n %s", all_cores[i].name);
1253 remaining = columns - (len + 4);
1257 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1259 i = ARRAY_SIZE (all_architectures) - 2;
1262 printf (" %s", all_architectures[i].name);
1263 remaining = columns - (strlen (all_architectures[i].name) + 4);
1264 gcc_assert (remaining >= 0);
1268 int len = strlen (all_architectures[i].name);
1270 if (remaining > len + 2)
1272 printf (", %s", all_architectures[i].name);
1273 remaining -= len + 2;
1279 printf ("\n %s", all_architectures[i].name);
1280 remaining = columns - (len + 4);
1287 /* Fix up any incompatible options that the user has specified.
1288 This has now turned into a maze. */
1290 arm_override_options (void)
1293 enum processor_type target_arch_cpu = arm_none;
1294 enum processor_type selected_cpu = arm_none;
1296 /* Set up the flags based on the cpu/architecture selected by the user. */
1297 for (i = ARRAY_SIZE (arm_select); i--;)
1299 struct arm_cpu_select * ptr = arm_select + i;
1301 if (ptr->string != NULL && ptr->string[0] != '\0')
1303 const struct processors * sel;
1305 for (sel = ptr->processors; sel->name != NULL; sel++)
1306 if (streq (ptr->string, sel->name))
1308 /* Set the architecture define. */
1309 if (i != ARM_OPT_SET_TUNE)
1310 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1312 /* Determine the processor core for which we should
1313 tune code-generation. */
1314 if (/* -mcpu= is a sensible default. */
1315 i == ARM_OPT_SET_CPU
1316 /* -mtune= overrides -mcpu= and -march=. */
1317 || i == ARM_OPT_SET_TUNE)
1318 arm_tune = (enum processor_type) (sel - ptr->processors);
1320 /* Remember the CPU associated with this architecture.
1321 If no other option is used to set the CPU type,
1322 we'll use this to guess the most suitable tuning
1324 if (i == ARM_OPT_SET_ARCH)
1325 target_arch_cpu = sel->core;
1327 if (i == ARM_OPT_SET_CPU)
1328 selected_cpu = (enum processor_type) (sel - ptr->processors);
1330 if (i != ARM_OPT_SET_TUNE)
1332 /* If we have been given an architecture and a processor
1333 make sure that they are compatible. We only generate
1334 a warning though, and we prefer the CPU over the
1336 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1337 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1340 insn_flags = sel->flags;
1346 if (sel->name == NULL)
1347 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1351 /* Guess the tuning options from the architecture if necessary. */
1352 if (arm_tune == arm_none)
1353 arm_tune = target_arch_cpu;
1355 /* If the user did not specify a processor, choose one for them. */
1356 if (insn_flags == 0)
1358 const struct processors * sel;
1359 unsigned int sought;
1361 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1362 if (selected_cpu == arm_none)
1364 #ifdef SUBTARGET_CPU_DEFAULT
1365 /* Use the subtarget default CPU if none was specified by
1367 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1369 /* Default to ARM6. */
1370 if (selected_cpu == arm_none)
1371 selected_cpu = arm6;
1373 sel = &all_cores[selected_cpu];
1375 insn_flags = sel->flags;
1377 /* Now check to see if the user has specified some command line
1378 switch that require certain abilities from the cpu. */
1381 if (TARGET_INTERWORK || TARGET_THUMB)
1383 sought |= (FL_THUMB | FL_MODE32);
1385 /* There are no ARM processors that support both APCS-26 and
1386 interworking. Therefore we force FL_MODE26 to be removed
1387 from insn_flags here (if it was set), so that the search
1388 below will always be able to find a compatible processor. */
1389 insn_flags &= ~FL_MODE26;
1392 if (sought != 0 && ((sought & insn_flags) != sought))
1394 /* Try to locate a CPU type that supports all of the abilities
1395 of the default CPU, plus the extra abilities requested by
1397 for (sel = all_cores; sel->name != NULL; sel++)
1398 if ((sel->flags & sought) == (sought | insn_flags))
1401 if (sel->name == NULL)
1403 unsigned current_bit_count = 0;
1404 const struct processors * best_fit = NULL;
1406 /* Ideally we would like to issue an error message here
1407 saying that it was not possible to find a CPU compatible
1408 with the default CPU, but which also supports the command
1409 line options specified by the programmer, and so they
1410 ought to use the -mcpu=<name> command line option to
1411 override the default CPU type.
1413 If we cannot find a cpu that has both the
1414 characteristics of the default cpu and the given
1415 command line options we scan the array again looking
1416 for a best match. */
1417 for (sel = all_cores; sel->name != NULL; sel++)
1418 if ((sel->flags & sought) == sought)
1422 count = bit_count (sel->flags & insn_flags);
1424 if (count >= current_bit_count)
1427 current_bit_count = count;
1431 gcc_assert (best_fit);
1435 insn_flags = sel->flags;
1437 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1438 arm_default_cpu = (enum processor_type) (sel - all_cores);
1439 if (arm_tune == arm_none)
1440 arm_tune = arm_default_cpu;
1443 /* The processor for which we should tune should now have been
1445 gcc_assert (arm_tune != arm_none);
1447 tune_flags = all_cores[(int)arm_tune].flags;
1449 if (target_fp16_format_name)
1451 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1453 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1455 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1459 if (i == ARRAY_SIZE (all_fp16_formats))
1460 error ("invalid __fp16 format option: -mfp16-format=%s",
1461 target_fp16_format_name);
1464 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1466 if (target_abi_name)
1468 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1470 if (streq (arm_all_abis[i].name, target_abi_name))
1472 arm_abi = arm_all_abis[i].abi_type;
1476 if (i == ARRAY_SIZE (arm_all_abis))
1477 error ("invalid ABI option: -mabi=%s", target_abi_name);
1480 arm_abi = ARM_DEFAULT_ABI;
1482 /* Make sure that the processor choice does not conflict with any of the
1483 other command line choices. */
1484 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1485 error ("target CPU does not support ARM mode");
1487 /* BPABI targets use linker tricks to allow interworking on cores
1488 without thumb support. */
1489 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1491 warning (0, "target CPU does not support interworking" );
1492 target_flags &= ~MASK_INTERWORK;
1495 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1497 warning (0, "target CPU does not support THUMB instructions");
1498 target_flags &= ~MASK_THUMB;
1501 if (TARGET_APCS_FRAME && TARGET_THUMB)
1503 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1504 target_flags &= ~MASK_APCS_FRAME;
1507 /* Callee super interworking implies thumb interworking. Adding
1508 this to the flags here simplifies the logic elsewhere. */
1509 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1510 target_flags |= MASK_INTERWORK;
1512 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1513 from here where no function is being compiled currently. */
1514 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1515 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1517 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1518 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1520 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1521 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1523 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1525 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1526 target_flags |= MASK_APCS_FRAME;
1529 if (TARGET_POKE_FUNCTION_NAME)
1530 target_flags |= MASK_APCS_FRAME;
1532 if (TARGET_APCS_REENT && flag_pic)
1533 error ("-fpic and -mapcs-reent are incompatible");
1535 if (TARGET_APCS_REENT)
1536 warning (0, "APCS reentrant code not supported. Ignored");
1538 /* If this target is normally configured to use APCS frames, warn if they
1539 are turned off and debugging is turned on. */
1541 && write_symbols != NO_DEBUG
1542 && !TARGET_APCS_FRAME
1543 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1544 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1546 if (TARGET_APCS_FLOAT)
1547 warning (0, "passing floating point arguments in fp regs not yet supported");
1549 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1550 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1551 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1552 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1553 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1554 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1555 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1556 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1557 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1558 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1559 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1560 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1561 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1563 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1564 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1565 thumb_code = (TARGET_ARM == 0);
1566 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1567 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1568 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1569 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1570 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1572 /* If we are not using the default (ARM mode) section anchor offset
1573 ranges, then set the correct ranges now. */
1576 /* Thumb-1 LDR instructions cannot have negative offsets.
1577 Permissible positive offset ranges are 5-bit (for byte loads),
1578 6-bit (for halfword loads), or 7-bit (for word loads).
1579 Empirical results suggest a 7-bit anchor range gives the best
1580 overall code size. */
1581 targetm.min_anchor_offset = 0;
1582 targetm.max_anchor_offset = 127;
1584 else if (TARGET_THUMB2)
1586 /* The minimum is set such that the total size of the block
1587 for a particular anchor is 248 + 1 + 4095 bytes, which is
1588 divisible by eight, ensuring natural spacing of anchors. */
1589 targetm.min_anchor_offset = -248;
1590 targetm.max_anchor_offset = 4095;
1593 /* V5 code we generate is completely interworking capable, so we turn off
1594 TARGET_INTERWORK here to avoid many tests later on. */
1596 /* XXX However, we must pass the right pre-processor defines to CPP
1597 or GLD can get confused. This is a hack. */
1598 if (TARGET_INTERWORK)
1599 arm_cpp_interwork = 1;
1602 target_flags &= ~MASK_INTERWORK;
1604 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1605 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1607 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1608 error ("iwmmxt abi requires an iwmmxt capable cpu");
1610 if (target_fpu_name == NULL && target_fpe_name != NULL)
1612 if (streq (target_fpe_name, "2"))
1613 target_fpu_name = "fpe2";
1614 else if (streq (target_fpe_name, "3"))
1615 target_fpu_name = "fpe3";
1617 error ("invalid floating point emulation option: -mfpe=%s",
1621 if (target_fpu_name == NULL)
1623 #ifdef FPUTYPE_DEFAULT
1624 target_fpu_name = FPUTYPE_DEFAULT;
1626 if (arm_arch_cirrus)
1627 target_fpu_name = "maverick";
1629 target_fpu_name = "fpe2";
1633 arm_fpu_desc = NULL;
1634 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1636 if (streq (all_fpus[i].name, target_fpu_name))
1638 arm_fpu_desc = &all_fpus[i];
1645 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1649 switch (arm_fpu_desc->model)
1651 case ARM_FP_MODEL_FPA:
1652 if (arm_fpu_desc->rev == 2)
1653 arm_fpu_attr = FPU_FPE2;
1654 else if (arm_fpu_desc->rev == 3)
1655 arm_fpu_attr = FPU_FPE3;
1657 arm_fpu_attr = FPU_FPA;
1660 case ARM_FP_MODEL_MAVERICK:
1661 arm_fpu_attr = FPU_MAVERICK;
1664 case ARM_FP_MODEL_VFP:
1665 arm_fpu_attr = FPU_VFP;
1672 if (target_float_abi_name != NULL)
1674 /* The user specified a FP ABI. */
1675 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1677 if (streq (all_float_abis[i].name, target_float_abi_name))
1679 arm_float_abi = all_float_abis[i].abi_type;
1683 if (i == ARRAY_SIZE (all_float_abis))
1684 error ("invalid floating point abi: -mfloat-abi=%s",
1685 target_float_abi_name);
1688 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1690 if (TARGET_AAPCS_BASED
1691 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1692 error ("FPA is unsupported in the AAPCS");
1694 if (TARGET_AAPCS_BASED)
1696 if (TARGET_CALLER_INTERWORKING)
1697 error ("AAPCS does not support -mcaller-super-interworking");
1699 if (TARGET_CALLEE_INTERWORKING)
1700 error ("AAPCS does not support -mcallee-super-interworking");
1703 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1704 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1705 will ever exist. GCC makes no attempt to support this combination. */
1706 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1707 sorry ("iWMMXt and hardware floating point");
1709 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1710 if (TARGET_THUMB2 && TARGET_IWMMXT)
1711 sorry ("Thumb-2 iWMMXt");
1713 /* __fp16 support currently assumes the core has ldrh. */
1714 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1715 sorry ("__fp16 and no ldrh");
1717 /* If soft-float is specified then don't use FPU. */
1718 if (TARGET_SOFT_FLOAT)
1719 arm_fpu_attr = FPU_NONE;
1721 if (TARGET_AAPCS_BASED)
1723 if (arm_abi == ARM_ABI_IWMMXT)
1724 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1725 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1726 && TARGET_HARD_FLOAT
1728 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1730 arm_pcs_default = ARM_PCS_AAPCS;
1734 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1735 sorry ("-mfloat-abi=hard and VFP");
1737 if (arm_abi == ARM_ABI_APCS)
1738 arm_pcs_default = ARM_PCS_APCS;
1740 arm_pcs_default = ARM_PCS_ATPCS;
1743 /* For arm2/3 there is no need to do any scheduling if there is only
1744 a floating point emulator, or we are doing software floating-point. */
1745 if ((TARGET_SOFT_FLOAT
1746 || (TARGET_FPA && arm_fpu_desc->rev))
1747 && (tune_flags & FL_MODE32) == 0)
1748 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1750 if (target_thread_switch)
1752 if (strcmp (target_thread_switch, "soft") == 0)
1753 target_thread_pointer = TP_SOFT;
1754 else if (strcmp (target_thread_switch, "auto") == 0)
1755 target_thread_pointer = TP_AUTO;
1756 else if (strcmp (target_thread_switch, "cp15") == 0)
1757 target_thread_pointer = TP_CP15;
1759 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1762 /* Use the cp15 method if it is available. */
1763 if (target_thread_pointer == TP_AUTO)
1765 if (arm_arch6k && !TARGET_THUMB1)
1766 target_thread_pointer = TP_CP15;
1768 target_thread_pointer = TP_SOFT;
1771 if (TARGET_HARD_TP && TARGET_THUMB1)
1772 error ("can not use -mtp=cp15 with 16-bit Thumb");
1774 /* Override the default structure alignment for AAPCS ABI. */
1775 if (TARGET_AAPCS_BASED)
1776 arm_structure_size_boundary = 8;
1778 if (structure_size_string != NULL)
1780 int size = strtol (structure_size_string, NULL, 0);
1782 if (size == 8 || size == 32
1783 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1784 arm_structure_size_boundary = size;
1786 warning (0, "structure size boundary can only be set to %s",
1787 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1790 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1792 error ("RTP PIC is incompatible with Thumb");
1796 /* If stack checking is disabled, we can use r10 as the PIC register,
1797 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1798 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1800 if (TARGET_VXWORKS_RTP)
1801 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1802 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1805 if (flag_pic && TARGET_VXWORKS_RTP)
1806 arm_pic_register = 9;
1808 if (arm_pic_register_string != NULL)
1810 int pic_register = decode_reg_name (arm_pic_register_string);
1813 warning (0, "-mpic-register= is useless without -fpic");
1815 /* Prevent the user from choosing an obviously stupid PIC register. */
1816 else if (pic_register < 0 || call_used_regs[pic_register]
1817 || pic_register == HARD_FRAME_POINTER_REGNUM
1818 || pic_register == STACK_POINTER_REGNUM
1819 || pic_register >= PC_REGNUM
1820 || (TARGET_VXWORKS_RTP
1821 && (unsigned int) pic_register != arm_pic_register))
1822 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1824 arm_pic_register = pic_register;
1827 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1828 if (fix_cm3_ldrd == 2)
1830 if (selected_cpu == cortexm3)
1836 if (TARGET_THUMB1 && flag_schedule_insns)
1838 /* Don't warn since it's on by default in -O2. */
1839 flag_schedule_insns = 0;
1844 arm_constant_limit = 1;
1846 /* If optimizing for size, bump the number of instructions that we
1847 are prepared to conditionally execute (even on a StrongARM). */
1848 max_insns_skipped = 6;
1852 /* For processors with load scheduling, it never costs more than
1853 2 cycles to load a constant, and the load scheduler may well
1854 reduce that to 1. */
1856 arm_constant_limit = 1;
1858 /* On XScale the longer latency of a load makes it more difficult
1859 to achieve a good schedule, so it's faster to synthesize
1860 constants that can be done in two insns. */
1861 if (arm_tune_xscale)
1862 arm_constant_limit = 2;
1864 /* StrongARM has early execution of branches, so a sequence
1865 that is worth skipping is shorter. */
1866 if (arm_tune_strongarm)
1867 max_insns_skipped = 3;
1870 /* Hot/Cold partitioning is not currently supported, since we can't
1871 handle literal pool placement in that case. */
1872 if (flag_reorder_blocks_and_partition)
1874 inform (input_location,
1875 "-freorder-blocks-and-partition not supported on this architecture");
1876 flag_reorder_blocks_and_partition = 0;
1877 flag_reorder_blocks = 1;
1880 /* Register global variables with the garbage collector. */
1881 arm_add_gc_roots ();
1885 arm_add_gc_roots (void)
1887 gcc_obstack_init(&minipool_obstack);
1888 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1891 /* A table of known ARM exception types.
1892 For use with the interrupt function attribute. */
1896 const char *const arg;
1897 const unsigned long return_value;
1901 static const isr_attribute_arg isr_attribute_args [] =
1903 { "IRQ", ARM_FT_ISR },
1904 { "irq", ARM_FT_ISR },
1905 { "FIQ", ARM_FT_FIQ },
1906 { "fiq", ARM_FT_FIQ },
1907 { "ABORT", ARM_FT_ISR },
1908 { "abort", ARM_FT_ISR },
1909 { "ABORT", ARM_FT_ISR },
1910 { "abort", ARM_FT_ISR },
1911 { "UNDEF", ARM_FT_EXCEPTION },
1912 { "undef", ARM_FT_EXCEPTION },
1913 { "SWI", ARM_FT_EXCEPTION },
1914 { "swi", ARM_FT_EXCEPTION },
1915 { NULL, ARM_FT_NORMAL }
1918 /* Returns the (interrupt) function type of the current
1919 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1921 static unsigned long
1922 arm_isr_value (tree argument)
1924 const isr_attribute_arg * ptr;
1928 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1930 /* No argument - default to IRQ. */
1931 if (argument == NULL_TREE)
1934 /* Get the value of the argument. */
1935 if (TREE_VALUE (argument) == NULL_TREE
1936 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1937 return ARM_FT_UNKNOWN;
1939 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1941 /* Check it against the list of known arguments. */
1942 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1943 if (streq (arg, ptr->arg))
1944 return ptr->return_value;
1946 /* An unrecognized interrupt type. */
1947 return ARM_FT_UNKNOWN;
1950 /* Computes the type of the current function. */
1952 static unsigned long
1953 arm_compute_func_type (void)
1955 unsigned long type = ARM_FT_UNKNOWN;
1959 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1961 /* Decide if the current function is volatile. Such functions
1962 never return, and many memory cycles can be saved by not storing
1963 register values that will never be needed again. This optimization
1964 was added to speed up context switching in a kernel application. */
1966 && (TREE_NOTHROW (current_function_decl)
1967 || !(flag_unwind_tables
1968 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1969 && TREE_THIS_VOLATILE (current_function_decl))
1970 type |= ARM_FT_VOLATILE;
1972 if (cfun->static_chain_decl != NULL)
1973 type |= ARM_FT_NESTED;
1975 attr = DECL_ATTRIBUTES (current_function_decl);
1977 a = lookup_attribute ("naked", attr);
1979 type |= ARM_FT_NAKED;
1981 a = lookup_attribute ("isr", attr);
1983 a = lookup_attribute ("interrupt", attr);
1986 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1988 type |= arm_isr_value (TREE_VALUE (a));
1993 /* Returns the type of the current function. */
1996 arm_current_func_type (void)
1998 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1999 cfun->machine->func_type = arm_compute_func_type ();
2001 return cfun->machine->func_type;
2005 arm_allocate_stack_slots_for_args (void)
2007 /* Naked functions should not allocate stack slots for arguments. */
2008 return !IS_NAKED (arm_current_func_type ());
2012 /* Output assembler code for a block containing the constant parts
2013 of a trampoline, leaving space for the variable parts.
2015 On the ARM, (if r8 is the static chain regnum, and remembering that
2016 referencing pc adds an offset of 8) the trampoline looks like:
2019 .word static chain value
2020 .word function's address
2021 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2024 arm_asm_trampoline_template (FILE *f)
2028 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2029 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2031 else if (TARGET_THUMB2)
2033 /* The Thumb-2 trampoline is similar to the arm implementation.
2034 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2035 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2036 STATIC_CHAIN_REGNUM, PC_REGNUM);
2037 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2041 ASM_OUTPUT_ALIGN (f, 2);
2042 fprintf (f, "\t.code\t16\n");
2043 fprintf (f, ".Ltrampoline_start:\n");
2044 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2045 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2046 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2047 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2048 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2049 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2051 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2052 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2055 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2058 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2060 rtx fnaddr, mem, a_tramp;
2062 emit_block_move (m_tramp, assemble_trampoline_template (),
2063 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2065 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2066 emit_move_insn (mem, chain_value);
2068 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2069 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2070 emit_move_insn (mem, fnaddr);
2072 a_tramp = XEXP (m_tramp, 0);
2073 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2074 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2075 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2078 /* Thumb trampolines should be entered in thumb mode, so set
2079 the bottom bit of the address. */
2082 arm_trampoline_adjust_address (rtx addr)
2085 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2086 NULL, 0, OPTAB_LIB_WIDEN);
2090 /* Return 1 if it is possible to return using a single instruction.
2091 If SIBLING is non-null, this is a test for a return before a sibling
2092 call. SIBLING is the call insn, so we can examine its register usage. */
2095 use_return_insn (int iscond, rtx sibling)
2098 unsigned int func_type;
2099 unsigned long saved_int_regs;
2100 unsigned HOST_WIDE_INT stack_adjust;
2101 arm_stack_offsets *offsets;
2103 /* Never use a return instruction before reload has run. */
2104 if (!reload_completed)
2107 func_type = arm_current_func_type ();
2109 /* Naked, volatile and stack alignment functions need special
2111 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2114 /* So do interrupt functions that use the frame pointer and Thumb
2115 interrupt functions. */
2116 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2119 offsets = arm_get_frame_offsets ();
2120 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2122 /* As do variadic functions. */
2123 if (crtl->args.pretend_args_size
2124 || cfun->machine->uses_anonymous_args
2125 /* Or if the function calls __builtin_eh_return () */
2126 || crtl->calls_eh_return
2127 /* Or if the function calls alloca */
2128 || cfun->calls_alloca
2129 /* Or if there is a stack adjustment. However, if the stack pointer
2130 is saved on the stack, we can use a pre-incrementing stack load. */
2131 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2132 && stack_adjust == 4)))
2135 saved_int_regs = offsets->saved_regs_mask;
2137 /* Unfortunately, the insn
2139 ldmib sp, {..., sp, ...}
2141 triggers a bug on most SA-110 based devices, such that the stack
2142 pointer won't be correctly restored if the instruction takes a
2143 page fault. We work around this problem by popping r3 along with
2144 the other registers, since that is never slower than executing
2145 another instruction.
2147 We test for !arm_arch5 here, because code for any architecture
2148 less than this could potentially be run on one of the buggy
2150 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2152 /* Validate that r3 is a call-clobbered register (always true in
2153 the default abi) ... */
2154 if (!call_used_regs[3])
2157 /* ... that it isn't being used for a return value ... */
2158 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2161 /* ... or for a tail-call argument ... */
2164 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2166 if (find_regno_fusage (sibling, USE, 3))
2170 /* ... and that there are no call-saved registers in r0-r2
2171 (always true in the default ABI). */
2172 if (saved_int_regs & 0x7)
2176 /* Can't be done if interworking with Thumb, and any registers have been
2178 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2181 /* On StrongARM, conditional returns are expensive if they aren't
2182 taken and multiple registers have been stacked. */
2183 if (iscond && arm_tune_strongarm)
2185 /* Conditional return when just the LR is stored is a simple
2186 conditional-load instruction, that's not expensive. */
2187 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2191 && arm_pic_register != INVALID_REGNUM
2192 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2196 /* If there are saved registers but the LR isn't saved, then we need
2197 two instructions for the return. */
2198 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2201 /* Can't be done if any of the FPA regs are pushed,
2202 since this also requires an insn. */
2203 if (TARGET_HARD_FLOAT && TARGET_FPA)
2204 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2205 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2208 /* Likewise VFP regs. */
2209 if (TARGET_HARD_FLOAT && TARGET_VFP)
2210 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2211 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2214 if (TARGET_REALLY_IWMMXT)
2215 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2216 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2222 /* Return TRUE if int I is a valid immediate ARM constant. */
2225 const_ok_for_arm (HOST_WIDE_INT i)
2229 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2230 be all zero, or all one. */
2231 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2232 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2233 != ((~(unsigned HOST_WIDE_INT) 0)
2234 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2237 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2239 /* Fast return for 0 and small values. We must do this for zero, since
2240 the code below can't handle that one case. */
2241 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2244 /* Get the number of trailing zeros. */
2245 lowbit = ffs((int) i) - 1;
2247 /* Only even shifts are allowed in ARM mode so round down to the
2248 nearest even number. */
2252 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2257 /* Allow rotated constants in ARM mode. */
2259 && ((i & ~0xc000003f) == 0
2260 || (i & ~0xf000000f) == 0
2261 || (i & ~0xfc000003) == 0))
2268 /* Allow repeated pattern. */
2271 if (i == v || i == (v | (v << 8)))
2278 /* Return true if I is a valid constant for the operation CODE. */
2280 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2282 if (const_ok_for_arm (i))
2306 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2308 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2314 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2318 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2325 /* Emit a sequence of insns to handle a large constant.
2326 CODE is the code of the operation required, it can be any of SET, PLUS,
2327 IOR, AND, XOR, MINUS;
2328 MODE is the mode in which the operation is being performed;
2329 VAL is the integer to operate on;
2330 SOURCE is the other operand (a register, or a null-pointer for SET);
2331 SUBTARGETS means it is safe to create scratch registers if that will
2332 either produce a simpler sequence, or we will want to cse the values.
2333 Return value is the number of insns emitted. */
2335 /* ??? Tweak this for thumb2. */
2337 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2338 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2342 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2343 cond = COND_EXEC_TEST (PATTERN (insn));
2347 if (subtargets || code == SET
2348 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2349 && REGNO (target) != REGNO (source)))
2351 /* After arm_reorg has been called, we can't fix up expensive
2352 constants by pushing them into memory so we must synthesize
2353 them in-line, regardless of the cost. This is only likely to
2354 be more costly on chips that have load delay slots and we are
2355 compiling without running the scheduler (so no splitting
2356 occurred before the final instruction emission).
2358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2360 if (!after_arm_reorg
2362 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2364 > arm_constant_limit + (code != SET)))
2368 /* Currently SET is the only monadic value for CODE, all
2369 the rest are diadic. */
2370 if (TARGET_USE_MOVT)
2371 arm_emit_movpair (target, GEN_INT (val));
2373 emit_set_insn (target, GEN_INT (val));
2379 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2381 if (TARGET_USE_MOVT)
2382 arm_emit_movpair (temp, GEN_INT (val));
2384 emit_set_insn (temp, GEN_INT (val));
2386 /* For MINUS, the value is subtracted from, since we never
2387 have subtraction of a constant. */
2389 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2391 emit_set_insn (target,
2392 gen_rtx_fmt_ee (code, mode, source, temp));
2398 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2402 /* Return the number of instructions required to synthesize the given
2403 constant, if we start emitting them from bit-position I. */
2405 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2407 HOST_WIDE_INT temp1;
2408 int step_size = TARGET_ARM ? 2 : 1;
2411 gcc_assert (TARGET_ARM || i == 0);
2419 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2424 temp1 = remainder & ((0x0ff << end)
2425 | ((i < end) ? (0xff >> (32 - end)) : 0));
2426 remainder &= ~temp1;
2431 } while (remainder);
2436 find_best_start (unsigned HOST_WIDE_INT remainder)
2438 int best_consecutive_zeros = 0;
2442 /* If we aren't targetting ARM, the best place to start is always at
2447 for (i = 0; i < 32; i += 2)
2449 int consecutive_zeros = 0;
2451 if (!(remainder & (3 << i)))
2453 while ((i < 32) && !(remainder & (3 << i)))
2455 consecutive_zeros += 2;
2458 if (consecutive_zeros > best_consecutive_zeros)
2460 best_consecutive_zeros = consecutive_zeros;
2461 best_start = i - consecutive_zeros;
2467 /* So long as it won't require any more insns to do so, it's
2468 desirable to emit a small constant (in bits 0...9) in the last
2469 insn. This way there is more chance that it can be combined with
2470 a later addressing insn to form a pre-indexed load or store
2471 operation. Consider:
2473 *((volatile int *)0xe0000100) = 1;
2474 *((volatile int *)0xe0000110) = 2;
2476 We want this to wind up as:
2480 str rB, [rA, #0x100]
2482 str rB, [rA, #0x110]
2484 rather than having to synthesize both large constants from scratch.
2486 Therefore, we calculate how many insns would be required to emit
2487 the constant starting from `best_start', and also starting from
2488 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2489 yield a shorter sequence, we may as well use zero. */
2491 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2492 && (count_insns_for_constant (remainder, 0) <=
2493 count_insns_for_constant (remainder, best_start)))
2499 /* Emit an instruction with the indicated PATTERN. If COND is
2500 non-NULL, conditionalize the execution of the instruction on COND
2504 emit_constant_insn (rtx cond, rtx pattern)
2507 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2508 emit_insn (pattern);
2511 /* As above, but extra parameter GENERATE which, if clear, suppresses
2513 /* ??? This needs more work for thumb2. */
2516 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2517 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2522 int final_invert = 0;
2523 int can_negate_initial = 0;
2526 int num_bits_set = 0;
2527 int set_sign_bit_copies = 0;
2528 int clear_sign_bit_copies = 0;
2529 int clear_zero_bit_copies = 0;
2530 int set_zero_bit_copies = 0;
2532 unsigned HOST_WIDE_INT temp1, temp2;
2533 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2534 int step_size = TARGET_ARM ? 2 : 1;
2536 /* Find out which operations are safe for a given CODE. Also do a quick
2537 check for degenerate cases; these can occur when DImode operations
2549 can_negate_initial = 1;
2553 if (remainder == 0xffffffff)
2556 emit_constant_insn (cond,
2557 gen_rtx_SET (VOIDmode, target,
2558 GEN_INT (ARM_SIGN_EXTEND (val))));
2564 if (reload_completed && rtx_equal_p (target, source))
2568 emit_constant_insn (cond,
2569 gen_rtx_SET (VOIDmode, target, source));
2581 emit_constant_insn (cond,
2582 gen_rtx_SET (VOIDmode, target, const0_rtx));
2585 if (remainder == 0xffffffff)
2587 if (reload_completed && rtx_equal_p (target, source))
2590 emit_constant_insn (cond,
2591 gen_rtx_SET (VOIDmode, target, source));
2600 if (reload_completed && rtx_equal_p (target, source))
2603 emit_constant_insn (cond,
2604 gen_rtx_SET (VOIDmode, target, source));
2608 if (remainder == 0xffffffff)
2611 emit_constant_insn (cond,
2612 gen_rtx_SET (VOIDmode, target,
2613 gen_rtx_NOT (mode, source)));
2619 /* We treat MINUS as (val - source), since (source - val) is always
2620 passed as (source + (-val)). */
2624 emit_constant_insn (cond,
2625 gen_rtx_SET (VOIDmode, target,
2626 gen_rtx_NEG (mode, source)));
2629 if (const_ok_for_arm (val))
2632 emit_constant_insn (cond,
2633 gen_rtx_SET (VOIDmode, target,
2634 gen_rtx_MINUS (mode, GEN_INT (val),
2646 /* If we can do it in one insn get out quickly. */
2647 if (const_ok_for_arm (val)
2648 || (can_negate_initial && const_ok_for_arm (-val))
2649 || (can_invert && const_ok_for_arm (~val)))
2652 emit_constant_insn (cond,
2653 gen_rtx_SET (VOIDmode, target,
2655 ? gen_rtx_fmt_ee (code, mode, source,
2661 /* Calculate a few attributes that may be useful for specific
2663 /* Count number of leading zeros. */
2664 for (i = 31; i >= 0; i--)
2666 if ((remainder & (1 << i)) == 0)
2667 clear_sign_bit_copies++;
2672 /* Count number of leading 1's. */
2673 for (i = 31; i >= 0; i--)
2675 if ((remainder & (1 << i)) != 0)
2676 set_sign_bit_copies++;
2681 /* Count number of trailing zero's. */
2682 for (i = 0; i <= 31; i++)
2684 if ((remainder & (1 << i)) == 0)
2685 clear_zero_bit_copies++;
2690 /* Count number of trailing 1's. */
2691 for (i = 0; i <= 31; i++)
2693 if ((remainder & (1 << i)) != 0)
2694 set_zero_bit_copies++;
2702 /* See if we can use movw. */
2703 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2706 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2711 /* See if we can do this by sign_extending a constant that is known
2712 to be negative. This is a good, way of doing it, since the shift
2713 may well merge into a subsequent insn. */
2714 if (set_sign_bit_copies > 1)
2716 if (const_ok_for_arm
2717 (temp1 = ARM_SIGN_EXTEND (remainder
2718 << (set_sign_bit_copies - 1))))
2722 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2723 emit_constant_insn (cond,
2724 gen_rtx_SET (VOIDmode, new_src,
2726 emit_constant_insn (cond,
2727 gen_ashrsi3 (target, new_src,
2728 GEN_INT (set_sign_bit_copies - 1)));
2732 /* For an inverted constant, we will need to set the low bits,
2733 these will be shifted out of harm's way. */
2734 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2735 if (const_ok_for_arm (~temp1))
2739 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2740 emit_constant_insn (cond,
2741 gen_rtx_SET (VOIDmode, new_src,
2743 emit_constant_insn (cond,
2744 gen_ashrsi3 (target, new_src,
2745 GEN_INT (set_sign_bit_copies - 1)));
2751 /* See if we can calculate the value as the difference between two
2752 valid immediates. */
2753 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2755 int topshift = clear_sign_bit_copies & ~1;
2757 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2758 & (0xff000000 >> topshift));
2760 /* If temp1 is zero, then that means the 9 most significant
2761 bits of remainder were 1 and we've caused it to overflow.
2762 When topshift is 0 we don't need to do anything since we
2763 can borrow from 'bit 32'. */
2764 if (temp1 == 0 && topshift != 0)
2765 temp1 = 0x80000000 >> (topshift - 1);
2767 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2769 if (const_ok_for_arm (temp2))
2773 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2774 emit_constant_insn (cond,
2775 gen_rtx_SET (VOIDmode, new_src,
2777 emit_constant_insn (cond,
2778 gen_addsi3 (target, new_src,
2786 /* See if we can generate this by setting the bottom (or the top)
2787 16 bits, and then shifting these into the other half of the
2788 word. We only look for the simplest cases, to do more would cost
2789 too much. Be careful, however, not to generate this when the
2790 alternative would take fewer insns. */
2791 if (val & 0xffff0000)
2793 temp1 = remainder & 0xffff0000;
2794 temp2 = remainder & 0x0000ffff;
2796 /* Overlaps outside this range are best done using other methods. */
2797 for (i = 9; i < 24; i++)
2799 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2800 && !const_ok_for_arm (temp2))
2802 rtx new_src = (subtargets
2803 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2805 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2806 source, subtargets, generate);
2814 gen_rtx_ASHIFT (mode, source,
2821 /* Don't duplicate cases already considered. */
2822 for (i = 17; i < 24; i++)
2824 if (((temp1 | (temp1 >> i)) == remainder)
2825 && !const_ok_for_arm (temp1))
2827 rtx new_src = (subtargets
2828 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2830 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2831 source, subtargets, generate);
2836 gen_rtx_SET (VOIDmode, target,
2839 gen_rtx_LSHIFTRT (mode, source,
2850 /* If we have IOR or XOR, and the constant can be loaded in a
2851 single instruction, and we can find a temporary to put it in,
2852 then this can be done in two instructions instead of 3-4. */
2854 /* TARGET can't be NULL if SUBTARGETS is 0 */
2855 || (reload_completed && !reg_mentioned_p (target, source)))
2857 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2861 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2863 emit_constant_insn (cond,
2864 gen_rtx_SET (VOIDmode, sub,
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, target,
2868 gen_rtx_fmt_ee (code, mode,
2879 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2880 and the remainder 0s for e.g. 0xfff00000)
2881 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2883 This can be done in 2 instructions by using shifts with mov or mvn.
2888 mvn r0, r0, lsr #12 */
2889 if (set_sign_bit_copies > 8
2890 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2894 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2895 rtx shift = GEN_INT (set_sign_bit_copies);
2899 gen_rtx_SET (VOIDmode, sub,
2901 gen_rtx_ASHIFT (mode,
2906 gen_rtx_SET (VOIDmode, target,
2908 gen_rtx_LSHIFTRT (mode, sub,
2915 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2917 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2919 For eg. r0 = r0 | 0xfff
2924 if (set_zero_bit_copies > 8
2925 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2929 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2930 rtx shift = GEN_INT (set_zero_bit_copies);
2934 gen_rtx_SET (VOIDmode, sub,
2936 gen_rtx_LSHIFTRT (mode,
2941 gen_rtx_SET (VOIDmode, target,
2943 gen_rtx_ASHIFT (mode, sub,
2949 /* This will never be reached for Thumb2 because orn is a valid
2950 instruction. This is for Thumb1 and the ARM 32 bit cases.
2952 x = y | constant (such that ~constant is a valid constant)
2954 x = ~(~y & ~constant).
2956 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2960 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2961 emit_constant_insn (cond,
2962 gen_rtx_SET (VOIDmode, sub,
2963 gen_rtx_NOT (mode, source)));
2966 sub = gen_reg_rtx (mode);
2967 emit_constant_insn (cond,
2968 gen_rtx_SET (VOIDmode, sub,
2969 gen_rtx_AND (mode, source,
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, target,
2973 gen_rtx_NOT (mode, sub)));
2980 /* See if two shifts will do 2 or more insn's worth of work. */
2981 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2983 HOST_WIDE_INT shift_mask = ((0xffffffff
2984 << (32 - clear_sign_bit_copies))
2987 if ((remainder | shift_mask) != 0xffffffff)
2991 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2992 insns = arm_gen_constant (AND, mode, cond,
2993 remainder | shift_mask,
2994 new_src, source, subtargets, 1);
2999 rtx targ = subtargets ? NULL_RTX : target;
3000 insns = arm_gen_constant (AND, mode, cond,
3001 remainder | shift_mask,
3002 targ, source, subtargets, 0);
3008 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3009 rtx shift = GEN_INT (clear_sign_bit_copies);
3011 emit_insn (gen_ashlsi3 (new_src, source, shift));
3012 emit_insn (gen_lshrsi3 (target, new_src, shift));
3018 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3020 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3022 if ((remainder | shift_mask) != 0xffffffff)
3026 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3028 insns = arm_gen_constant (AND, mode, cond,
3029 remainder | shift_mask,
3030 new_src, source, subtargets, 1);
3035 rtx targ = subtargets ? NULL_RTX : target;
3037 insns = arm_gen_constant (AND, mode, cond,
3038 remainder | shift_mask,
3039 targ, source, subtargets, 0);
3045 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3046 rtx shift = GEN_INT (clear_zero_bit_copies);
3048 emit_insn (gen_lshrsi3 (new_src, source, shift));
3049 emit_insn (gen_ashlsi3 (target, new_src, shift));
3061 for (i = 0; i < 32; i++)
3062 if (remainder & (1 << i))
3066 || (code != IOR && can_invert && num_bits_set > 16))
3067 remainder ^= 0xffffffff;
3068 else if (code == PLUS && num_bits_set > 16)
3069 remainder = (-remainder) & 0xffffffff;
3071 /* For XOR, if more than half the bits are set and there's a sequence
3072 of more than 8 consecutive ones in the pattern then we can XOR by the
3073 inverted constant and then invert the final result; this may save an
3074 instruction and might also lead to the final mvn being merged with
3075 some other operation. */
3076 else if (code == XOR && num_bits_set > 16
3077 && (count_insns_for_constant (remainder ^ 0xffffffff,
3079 (remainder ^ 0xffffffff))
3080 < count_insns_for_constant (remainder,
3081 find_best_start (remainder))))
3083 remainder ^= 0xffffffff;
3092 /* Now try and find a way of doing the job in either two or three
3094 We start by looking for the largest block of zeros that are aligned on
3095 a 2-bit boundary, we then fill up the temps, wrapping around to the
3096 top of the word when we drop off the bottom.
3097 In the worst case this code should produce no more than four insns.
3098 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3099 best place to start. */
3101 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3104 /* Now start emitting the insns. */
3105 i = find_best_start (remainder);
3112 if (remainder & (3 << (i - 2)))
3117 temp1 = remainder & ((0x0ff << end)
3118 | ((i < end) ? (0xff >> (32 - end)) : 0));
3119 remainder &= ~temp1;
3123 rtx new_src, temp1_rtx;
3125 if (code == SET || code == MINUS)
3127 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3128 if (can_invert && code != MINUS)
3133 if ((final_invert || remainder) && subtargets)
3134 new_src = gen_reg_rtx (mode);
3139 else if (can_negate)
3143 temp1 = trunc_int_for_mode (temp1, mode);
3144 temp1_rtx = GEN_INT (temp1);
3148 else if (code == MINUS)
3149 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3151 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3153 emit_constant_insn (cond,
3154 gen_rtx_SET (VOIDmode, new_src,
3164 else if (code == MINUS)
3170 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3180 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3181 gen_rtx_NOT (mode, source)));
3188 /* Canonicalize a comparison so that we are more likely to recognize it.
3189 This can be done for a few constant compares, where we can make the
3190 immediate value easier to load. */
3193 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3196 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3197 unsigned HOST_WIDE_INT maxval;
3198 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3209 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3211 *op1 = GEN_INT (i + 1);
3212 return code == GT ? GE : LT;
3219 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3221 *op1 = GEN_INT (i - 1);
3222 return code == GE ? GT : LE;
3228 if (i != ~((unsigned HOST_WIDE_INT) 0)
3229 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3231 *op1 = GEN_INT (i + 1);
3232 return code == GTU ? GEU : LTU;
3239 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3241 *op1 = GEN_INT (i - 1);
3242 return code == GEU ? GTU : LEU;
3254 /* Define how to find the value returned by a function. */
3257 arm_function_value(const_tree type, const_tree func,
3258 bool outgoing ATTRIBUTE_UNUSED)
3260 enum machine_mode mode;
3261 int unsignedp ATTRIBUTE_UNUSED;
3262 rtx r ATTRIBUTE_UNUSED;
3264 mode = TYPE_MODE (type);
3266 if (TARGET_AAPCS_BASED)
3267 return aapcs_allocate_return_reg (mode, type, func);
3269 /* Promote integer types. */
3270 if (INTEGRAL_TYPE_P (type))
3271 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3273 /* Promotes small structs returned in a register to full-word size
3274 for big-endian AAPCS. */
3275 if (arm_return_in_msb (type))
3277 HOST_WIDE_INT size = int_size_in_bytes (type);
3278 if (size % UNITS_PER_WORD != 0)
3280 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3281 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3285 return LIBCALL_VALUE (mode);
3289 libcall_eq (const void *p1, const void *p2)
3291 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3295 libcall_hash (const void *p1)
3297 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3301 add_libcall (htab_t htab, rtx libcall)
3303 *htab_find_slot (htab, libcall, INSERT) = libcall;
3307 arm_libcall_uses_aapcs_base (const_rtx libcall)
3309 static bool init_done = false;
3310 static htab_t libcall_htab;
3316 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3318 add_libcall (libcall_htab,
3319 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3320 add_libcall (libcall_htab,
3321 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3322 add_libcall (libcall_htab,
3323 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3324 add_libcall (libcall_htab,
3325 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3327 add_libcall (libcall_htab,
3328 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3329 add_libcall (libcall_htab,
3330 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3331 add_libcall (libcall_htab,
3332 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3333 add_libcall (libcall_htab,
3334 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3336 add_libcall (libcall_htab,
3337 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3338 add_libcall (libcall_htab,
3339 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3340 add_libcall (libcall_htab,
3341 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3342 add_libcall (libcall_htab,
3343 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3344 add_libcall (libcall_htab,
3345 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3346 add_libcall (libcall_htab,
3347 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3350 return libcall && htab_find (libcall_htab, libcall) != NULL;
3354 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3356 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3357 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3359 /* The following libcalls return their result in integer registers,
3360 even though they return a floating point value. */
3361 if (arm_libcall_uses_aapcs_base (libcall))
3362 return gen_rtx_REG (mode, ARG_REGISTER(1));
3366 return LIBCALL_VALUE (mode);
3369 /* Determine the amount of memory needed to store the possible return
3370 registers of an untyped call. */
3372 arm_apply_result_size (void)
3378 if (TARGET_HARD_FLOAT_ABI)
3384 if (TARGET_MAVERICK)
3387 if (TARGET_IWMMXT_ABI)
3394 /* Decide whether TYPE should be returned in memory (true)
3395 or in a register (false). FNTYPE is the type of the function making
3398 arm_return_in_memory (const_tree type, const_tree fntype)
3402 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3404 if (TARGET_AAPCS_BASED)
3406 /* Simple, non-aggregate types (ie not including vectors and
3407 complex) are always returned in a register (or registers).
3408 We don't care about which register here, so we can short-cut
3409 some of the detail. */
3410 if (!AGGREGATE_TYPE_P (type)
3411 && TREE_CODE (type) != VECTOR_TYPE
3412 && TREE_CODE (type) != COMPLEX_TYPE)
3415 /* Any return value that is no larger than one word can be
3417 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3420 /* Check any available co-processors to see if they accept the
3421 type as a register candidate (VFP, for example, can return
3422 some aggregates in consecutive registers). These aren't
3423 available if the call is variadic. */
3424 if (aapcs_select_return_coproc (type, fntype) >= 0)
3427 /* Vector values should be returned using ARM registers, not
3428 memory (unless they're over 16 bytes, which will break since
3429 we only have four call-clobbered registers to play with). */
3430 if (TREE_CODE (type) == VECTOR_TYPE)
3431 return (size < 0 || size > (4 * UNITS_PER_WORD));
3433 /* The rest go in memory. */
3437 if (TREE_CODE (type) == VECTOR_TYPE)
3438 return (size < 0 || size > (4 * UNITS_PER_WORD));
3440 if (!AGGREGATE_TYPE_P (type) &&
3441 (TREE_CODE (type) != VECTOR_TYPE))
3442 /* All simple types are returned in registers. */
3445 if (arm_abi != ARM_ABI_APCS)
3447 /* ATPCS and later return aggregate types in memory only if they are
3448 larger than a word (or are variable size). */
3449 return (size < 0 || size > UNITS_PER_WORD);
3452 /* For the arm-wince targets we choose to be compatible with Microsoft's
3453 ARM and Thumb compilers, which always return aggregates in memory. */
3455 /* All structures/unions bigger than one word are returned in memory.
3456 Also catch the case where int_size_in_bytes returns -1. In this case
3457 the aggregate is either huge or of variable size, and in either case
3458 we will want to return it via memory and not in a register. */
3459 if (size < 0 || size > UNITS_PER_WORD)
3462 if (TREE_CODE (type) == RECORD_TYPE)
3466 /* For a struct the APCS says that we only return in a register
3467 if the type is 'integer like' and every addressable element
3468 has an offset of zero. For practical purposes this means
3469 that the structure can have at most one non bit-field element
3470 and that this element must be the first one in the structure. */
3472 /* Find the first field, ignoring non FIELD_DECL things which will
3473 have been created by C++. */
3474 for (field = TYPE_FIELDS (type);
3475 field && TREE_CODE (field) != FIELD_DECL;
3476 field = TREE_CHAIN (field))
3480 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3482 /* Check that the first field is valid for returning in a register. */
3484 /* ... Floats are not allowed */
3485 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3488 /* ... Aggregates that are not themselves valid for returning in
3489 a register are not allowed. */
3490 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3493 /* Now check the remaining fields, if any. Only bitfields are allowed,
3494 since they are not addressable. */
3495 for (field = TREE_CHAIN (field);
3497 field = TREE_CHAIN (field))
3499 if (TREE_CODE (field) != FIELD_DECL)
3502 if (!DECL_BIT_FIELD_TYPE (field))
3509 if (TREE_CODE (type) == UNION_TYPE)
3513 /* Unions can be returned in registers if every element is
3514 integral, or can be returned in an integer register. */
3515 for (field = TYPE_FIELDS (type);
3517 field = TREE_CHAIN (field))
3519 if (TREE_CODE (field) != FIELD_DECL)
3522 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3525 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3531 #endif /* not ARM_WINCE */
3533 /* Return all other types in memory. */
3537 /* Indicate whether or not words of a double are in big-endian order. */
3540 arm_float_words_big_endian (void)
3542 if (TARGET_MAVERICK)
3545 /* For FPA, float words are always big-endian. For VFP, floats words
3546 follow the memory system mode. */
3554 return (TARGET_BIG_END ? 1 : 0);
3559 const struct pcs_attribute_arg
3563 } pcs_attribute_args[] =
3565 {"aapcs", ARM_PCS_AAPCS},
3566 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3568 /* We could recognize these, but changes would be needed elsewhere
3569 * to implement them. */
3570 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3571 {"atpcs", ARM_PCS_ATPCS},
3572 {"apcs", ARM_PCS_APCS},
3574 {NULL, ARM_PCS_UNKNOWN}
3578 arm_pcs_from_attribute (tree attr)
3580 const struct pcs_attribute_arg *ptr;
3583 /* Get the value of the argument. */
3584 if (TREE_VALUE (attr) == NULL_TREE
3585 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3586 return ARM_PCS_UNKNOWN;
3588 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3590 /* Check it against the list of known arguments. */
3591 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3592 if (streq (arg, ptr->arg))
3595 /* An unrecognized interrupt type. */
3596 return ARM_PCS_UNKNOWN;
3599 /* Get the PCS variant to use for this call. TYPE is the function's type
3600 specification, DECL is the specific declartion. DECL may be null if
3601 the call could be indirect or if this is a library call. */
3603 arm_get_pcs_model (const_tree type, const_tree decl)
3605 bool user_convention = false;
3606 enum arm_pcs user_pcs = arm_pcs_default;
3611 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3614 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3615 user_convention = true;
3618 if (TARGET_AAPCS_BASED)
3620 /* Detect varargs functions. These always use the base rules
3621 (no argument is ever a candidate for a co-processor
3623 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3624 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3625 != void_type_node));
3627 if (user_convention)
3629 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3630 sorry ("Non-AAPCS derived PCS variant");
3631 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3632 error ("Variadic functions must use the base AAPCS variant");
3636 return ARM_PCS_AAPCS;
3637 else if (user_convention)
3639 else if (decl && flag_unit_at_a_time)
3641 /* Local functions never leak outside this compilation unit,
3642 so we are free to use whatever conventions are
3644 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3645 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3647 return ARM_PCS_AAPCS_LOCAL;
3650 else if (user_convention && user_pcs != arm_pcs_default)
3651 sorry ("PCS variant");
3653 /* For everything else we use the target's default. */
3654 return arm_pcs_default;
3659 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3660 const_tree fntype ATTRIBUTE_UNUSED,
3661 rtx libcall ATTRIBUTE_UNUSED,
3662 const_tree fndecl ATTRIBUTE_UNUSED)
3664 /* Record the unallocated VFP registers. */
3665 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3666 pcum->aapcs_vfp_reg_alloc = 0;
3669 /* Walk down the type tree of TYPE counting consecutive base elements.
3670 If *MODEP is VOIDmode, then set it to the first valid floating point
3671 type. If a non-floating point type is found, or if a floating point
3672 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3673 otherwise return the count in the sub-tree. */
3675 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3677 enum machine_mode mode;
3680 switch (TREE_CODE (type))
3683 mode = TYPE_MODE (type);
3684 if (mode != DFmode && mode != SFmode)
3687 if (*modep == VOIDmode)
3696 mode = TYPE_MODE (TREE_TYPE (type));
3697 if (mode != DFmode && mode != SFmode)
3700 if (*modep == VOIDmode)
3709 /* Use V2SImode and V4SImode as representatives of all 64-bit
3710 and 128-bit vector types, whether or not those modes are
3711 supported with the present options. */
3712 size = int_size_in_bytes (type);
3725 if (*modep == VOIDmode)
3728 /* Vector modes are considered to be opaque: two vectors are
3729 equivalent for the purposes of being homogeneous aggregates
3730 if they are the same size. */
3739 tree index = TYPE_DOMAIN (type);
3741 /* Can't handle incomplete types. */
3742 if (!COMPLETE_TYPE_P(type))
3745 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3748 || !TYPE_MAX_VALUE (index)
3749 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3750 || !TYPE_MIN_VALUE (index)
3751 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3755 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3756 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3758 /* There must be no padding. */
3759 if (!host_integerp (TYPE_SIZE (type), 1)
3760 || (tree_low_cst (TYPE_SIZE (type), 1)
3761 != count * GET_MODE_BITSIZE (*modep)))
3773 /* Can't handle incomplete types. */
3774 if (!COMPLETE_TYPE_P(type))
3777 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3779 if (TREE_CODE (field) != FIELD_DECL)
3782 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3788 /* There must be no padding. */
3789 if (!host_integerp (TYPE_SIZE (type), 1)
3790 || (tree_low_cst (TYPE_SIZE (type), 1)
3791 != count * GET_MODE_BITSIZE (*modep)))
3798 case QUAL_UNION_TYPE:
3800 /* These aren't very interesting except in a degenerate case. */
3805 /* Can't handle incomplete types. */
3806 if (!COMPLETE_TYPE_P(type))
3809 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3811 if (TREE_CODE (field) != FIELD_DECL)
3814 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3817 count = count > sub_count ? count : sub_count;
3820 /* There must be no padding. */
3821 if (!host_integerp (TYPE_SIZE (type), 1)
3822 || (tree_low_cst (TYPE_SIZE (type), 1)
3823 != count * GET_MODE_BITSIZE (*modep)))
3836 /* Return true if PCS_VARIANT should use VFP registers. */
3838 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3840 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3843 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3846 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3847 (TARGET_VFP_DOUBLE || !is_double));
3851 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3852 enum machine_mode mode, const_tree type,
3853 enum machine_mode *base_mode, int *count)
3855 enum machine_mode new_mode = VOIDmode;
3857 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3858 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3859 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3864 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3867 new_mode = (mode == DCmode ? DFmode : SFmode);
3869 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3871 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3873 if (ag_count > 0 && ag_count <= 4)
3882 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3885 *base_mode = new_mode;
3890 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3891 enum machine_mode mode, const_tree type)
3893 int count ATTRIBUTE_UNUSED;
3894 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3896 if (!use_vfp_abi (pcs_variant, false))
3898 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3903 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3906 if (!use_vfp_abi (pcum->pcs_variant, false))
3909 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3910 &pcum->aapcs_vfp_rmode,
3911 &pcum->aapcs_vfp_rcount);
3915 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3916 const_tree type ATTRIBUTE_UNUSED)
3918 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3919 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3922 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3923 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3925 pcum->aapcs_vfp_reg_alloc = mask << regno;
3926 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3929 int rcount = pcum->aapcs_vfp_rcount;
3931 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3935 /* Avoid using unsupported vector modes. */
3936 if (rmode == V2SImode)
3938 else if (rmode == V4SImode)
3945 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3946 for (i = 0; i < rcount; i++)
3948 rtx tmp = gen_rtx_REG (rmode,
3949 FIRST_VFP_REGNUM + regno + i * rshift);
3950 tmp = gen_rtx_EXPR_LIST
3952 GEN_INT (i * GET_MODE_SIZE (rmode)));
3953 XVECEXP (par, 0, i) = tmp;
3956 pcum->aapcs_reg = par;
3959 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3966 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3967 enum machine_mode mode,
3968 const_tree type ATTRIBUTE_UNUSED)
3970 if (!use_vfp_abi (pcs_variant, false))
3973 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3976 enum machine_mode ag_mode;
3981 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3986 if (ag_mode == V2SImode)
3988 else if (ag_mode == V4SImode)
3994 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3995 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3996 for (i = 0; i < count; i++)
3998 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3999 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4000 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4001 XVECEXP (par, 0, i) = tmp;
4007 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4011 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4012 enum machine_mode mode ATTRIBUTE_UNUSED,
4013 const_tree type ATTRIBUTE_UNUSED)
4015 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4016 pcum->aapcs_vfp_reg_alloc = 0;
4020 #define AAPCS_CP(X) \
4022 aapcs_ ## X ## _cum_init, \
4023 aapcs_ ## X ## _is_call_candidate, \
4024 aapcs_ ## X ## _allocate, \
4025 aapcs_ ## X ## _is_return_candidate, \
4026 aapcs_ ## X ## _allocate_return_reg, \
4027 aapcs_ ## X ## _advance \
4030 /* Table of co-processors that can be used to pass arguments in
4031 registers. Idealy no arugment should be a candidate for more than
4032 one co-processor table entry, but the table is processed in order
4033 and stops after the first match. If that entry then fails to put
4034 the argument into a co-processor register, the argument will go on
4038 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4039 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4041 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4042 BLKmode) is a candidate for this co-processor's registers; this
4043 function should ignore any position-dependent state in
4044 CUMULATIVE_ARGS and only use call-type dependent information. */
4045 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4047 /* Return true if the argument does get a co-processor register; it
4048 should set aapcs_reg to an RTX of the register allocated as is
4049 required for a return from FUNCTION_ARG. */
4050 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4052 /* Return true if a result of mode MODE (or type TYPE if MODE is
4053 BLKmode) is can be returned in this co-processor's registers. */
4054 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4056 /* Allocate and return an RTX element to hold the return type of a
4057 call, this routine must not fail and will only be called if
4058 is_return_candidate returned true with the same parameters. */
4059 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4061 /* Finish processing this argument and prepare to start processing
4063 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4064 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4072 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4077 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4078 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4085 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4087 /* We aren't passed a decl, so we can't check that a call is local.
4088 However, it isn't clear that that would be a win anyway, since it
4089 might limit some tail-calling opportunities. */
4090 enum arm_pcs pcs_variant;
4094 const_tree fndecl = NULL_TREE;
4096 if (TREE_CODE (fntype) == FUNCTION_DECL)
4099 fntype = TREE_TYPE (fntype);
4102 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4105 pcs_variant = arm_pcs_default;
4107 if (pcs_variant != ARM_PCS_AAPCS)
4111 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4112 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4121 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4124 /* We aren't passed a decl, so we can't check that a call is local.
4125 However, it isn't clear that that would be a win anyway, since it
4126 might limit some tail-calling opportunities. */
4127 enum arm_pcs pcs_variant;
4128 int unsignedp ATTRIBUTE_UNUSED;
4132 const_tree fndecl = NULL_TREE;
4134 if (TREE_CODE (fntype) == FUNCTION_DECL)
4137 fntype = TREE_TYPE (fntype);
4140 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4143 pcs_variant = arm_pcs_default;
4145 /* Promote integer types. */
4146 if (type && INTEGRAL_TYPE_P (type))
4147 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4149 if (pcs_variant != ARM_PCS_AAPCS)
4153 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4154 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4156 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4160 /* Promotes small structs returned in a register to full-word size
4161 for big-endian AAPCS. */
4162 if (type && arm_return_in_msb (type))
4164 HOST_WIDE_INT size = int_size_in_bytes (type);
4165 if (size % UNITS_PER_WORD != 0)
4167 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4168 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4172 return gen_rtx_REG (mode, R0_REGNUM);
4176 aapcs_libcall_value (enum machine_mode mode)
4178 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4181 /* Lay out a function argument using the AAPCS rules. The rule
4182 numbers referred to here are those in the AAPCS. */
4184 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4185 tree type, int named)
4190 /* We only need to do this once per argument. */
4191 if (pcum->aapcs_arg_processed)
4194 pcum->aapcs_arg_processed = true;
4196 /* Special case: if named is false then we are handling an incoming
4197 anonymous argument which is on the stack. */
4201 /* Is this a potential co-processor register candidate? */
4202 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4204 int slot = aapcs_select_call_coproc (pcum, mode, type);
4205 pcum->aapcs_cprc_slot = slot;
4207 /* We don't have to apply any of the rules from part B of the
4208 preparation phase, these are handled elsewhere in the
4213 /* A Co-processor register candidate goes either in its own
4214 class of registers or on the stack. */
4215 if (!pcum->aapcs_cprc_failed[slot])
4217 /* C1.cp - Try to allocate the argument to co-processor
4219 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4222 /* C2.cp - Put the argument on the stack and note that we
4223 can't assign any more candidates in this slot. We also
4224 need to note that we have allocated stack space, so that
4225 we won't later try to split a non-cprc candidate between
4226 core registers and the stack. */
4227 pcum->aapcs_cprc_failed[slot] = true;
4228 pcum->can_split = false;
4231 /* We didn't get a register, so this argument goes on the
4233 gcc_assert (pcum->can_split == false);
4238 /* C3 - For double-word aligned arguments, round the NCRN up to the
4239 next even number. */
4240 ncrn = pcum->aapcs_ncrn;
4241 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4244 nregs = ARM_NUM_REGS2(mode, type);
4246 /* Sigh, this test should really assert that nregs > 0, but a GCC
4247 extension allows empty structs and then gives them empty size; it
4248 then allows such a structure to be passed by value. For some of
4249 the code below we have to pretend that such an argument has
4250 non-zero size so that we 'locate' it correctly either in
4251 registers or on the stack. */
4252 gcc_assert (nregs >= 0);
4254 nregs2 = nregs ? nregs : 1;
4256 /* C4 - Argument fits entirely in core registers. */
4257 if (ncrn + nregs2 <= NUM_ARG_REGS)
4259 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4260 pcum->aapcs_next_ncrn = ncrn + nregs;
4264 /* C5 - Some core registers left and there are no arguments already
4265 on the stack: split this argument between the remaining core
4266 registers and the stack. */
4267 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4269 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4270 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4271 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4275 /* C6 - NCRN is set to 4. */
4276 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4278 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4282 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4283 for a call to a function whose data type is FNTYPE.
4284 For a library call, FNTYPE is NULL. */
4286 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4288 tree fndecl ATTRIBUTE_UNUSED)
4290 /* Long call handling. */
4292 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4294 pcum->pcs_variant = arm_pcs_default;
4296 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4298 if (arm_libcall_uses_aapcs_base (libname))
4299 pcum->pcs_variant = ARM_PCS_AAPCS;
4301 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4302 pcum->aapcs_reg = NULL_RTX;
4303 pcum->aapcs_partial = 0;
4304 pcum->aapcs_arg_processed = false;
4305 pcum->aapcs_cprc_slot = -1;
4306 pcum->can_split = true;
4308 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4312 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4314 pcum->aapcs_cprc_failed[i] = false;
4315 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4323 /* On the ARM, the offset starts at 0. */
4325 pcum->iwmmxt_nregs = 0;
4326 pcum->can_split = true;
4328 /* Varargs vectors are treated the same as long long.
4329 named_count avoids having to change the way arm handles 'named' */
4330 pcum->named_count = 0;
4333 if (TARGET_REALLY_IWMMXT && fntype)
4337 for (fn_arg = TYPE_ARG_TYPES (fntype);
4339 fn_arg = TREE_CHAIN (fn_arg))
4340 pcum->named_count += 1;
4342 if (! pcum->named_count)
4343 pcum->named_count = INT_MAX;
4348 /* Return true if mode/type need doubleword alignment. */
4350 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4352 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4353 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4357 /* Determine where to put an argument to a function.
4358 Value is zero to push the argument on the stack,
4359 or a hard register in which to store the argument.
4361 MODE is the argument's machine mode.
4362 TYPE is the data type of the argument (as a tree).
4363 This is null for libcalls where that information may
4365 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4366 the preceding args and about the function being called.
4367 NAMED is nonzero if this argument is a named parameter
4368 (otherwise it is an extra parameter matching an ellipsis). */
4371 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4372 tree type, int named)
4376 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4377 a call insn (op3 of a call_value insn). */
4378 if (mode == VOIDmode)
4381 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4383 aapcs_layout_arg (pcum, mode, type, named);
4384 return pcum->aapcs_reg;
4387 /* Varargs vectors are treated the same as long long.
4388 named_count avoids having to change the way arm handles 'named' */
4389 if (TARGET_IWMMXT_ABI
4390 && arm_vector_mode_supported_p (mode)
4391 && pcum->named_count > pcum->nargs + 1)
4393 if (pcum->iwmmxt_nregs <= 9)
4394 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4397 pcum->can_split = false;
4402 /* Put doubleword aligned quantities in even register pairs. */
4404 && ARM_DOUBLEWORD_ALIGN
4405 && arm_needs_doubleword_align (mode, type))
4408 if (mode == VOIDmode)
4409 /* Pick an arbitrary value for operand 2 of the call insn. */
4412 /* Only allow splitting an arg between regs and memory if all preceding
4413 args were allocated to regs. For args passed by reference we only count
4414 the reference pointer. */
4415 if (pcum->can_split)
4418 nregs = ARM_NUM_REGS2 (mode, type);
4420 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4423 return gen_rtx_REG (mode, pcum->nregs);
4427 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4428 tree type, bool named)
4430 int nregs = pcum->nregs;
4432 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4434 aapcs_layout_arg (pcum, mode, type, named);
4435 return pcum->aapcs_partial;
4438 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4441 if (NUM_ARG_REGS > nregs
4442 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4444 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4450 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4451 tree type, bool named)
4453 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4455 aapcs_layout_arg (pcum, mode, type, named);
4457 if (pcum->aapcs_cprc_slot >= 0)
4459 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4461 pcum->aapcs_cprc_slot = -1;
4464 /* Generic stuff. */
4465 pcum->aapcs_arg_processed = false;
4466 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4467 pcum->aapcs_reg = NULL_RTX;
4468 pcum->aapcs_partial = 0;
4473 if (arm_vector_mode_supported_p (mode)
4474 && pcum->named_count > pcum->nargs
4475 && TARGET_IWMMXT_ABI)
4476 pcum->iwmmxt_nregs += 1;
4478 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4482 /* Variable sized types are passed by reference. This is a GCC
4483 extension to the ARM ABI. */
4486 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4487 enum machine_mode mode ATTRIBUTE_UNUSED,
4488 const_tree type, bool named ATTRIBUTE_UNUSED)
4490 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4493 /* Encode the current state of the #pragma [no_]long_calls. */
4496 OFF, /* No #pragma [no_]long_calls is in effect. */
4497 LONG, /* #pragma long_calls is in effect. */
4498 SHORT /* #pragma no_long_calls is in effect. */
4501 static arm_pragma_enum arm_pragma_long_calls = OFF;
4504 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4506 arm_pragma_long_calls = LONG;
4510 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4512 arm_pragma_long_calls = SHORT;
4516 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4518 arm_pragma_long_calls = OFF;
4521 /* Handle an attribute requiring a FUNCTION_DECL;
4522 arguments as in struct attribute_spec.handler. */
4524 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4525 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4527 if (TREE_CODE (*node) != FUNCTION_DECL)
4529 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4531 *no_add_attrs = true;
4537 /* Handle an "interrupt" or "isr" attribute;
4538 arguments as in struct attribute_spec.handler. */
4540 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4545 if (TREE_CODE (*node) != FUNCTION_DECL)
4547 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4549 *no_add_attrs = true;
4551 /* FIXME: the argument if any is checked for type attributes;
4552 should it be checked for decl ones? */
4556 if (TREE_CODE (*node) == FUNCTION_TYPE
4557 || TREE_CODE (*node) == METHOD_TYPE)
4559 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4561 warning (OPT_Wattributes, "%qE attribute ignored",
4563 *no_add_attrs = true;
4566 else if (TREE_CODE (*node) == POINTER_TYPE
4567 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4568 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4569 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4571 *node = build_variant_type_copy (*node);
4572 TREE_TYPE (*node) = build_type_attribute_variant
4574 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4575 *no_add_attrs = true;
4579 /* Possibly pass this attribute on from the type to a decl. */
4580 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4581 | (int) ATTR_FLAG_FUNCTION_NEXT
4582 | (int) ATTR_FLAG_ARRAY_NEXT))
4584 *no_add_attrs = true;
4585 return tree_cons (name, args, NULL_TREE);
4589 warning (OPT_Wattributes, "%qE attribute ignored",
4598 /* Handle a "pcs" attribute; arguments as in struct
4599 attribute_spec.handler. */
4601 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4604 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4606 warning (OPT_Wattributes, "%qE attribute ignored", name);
4607 *no_add_attrs = true;
4612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4613 /* Handle the "notshared" attribute. This attribute is another way of
4614 requesting hidden visibility. ARM's compiler supports
4615 "__declspec(notshared)"; we support the same thing via an
4619 arm_handle_notshared_attribute (tree *node,
4620 tree name ATTRIBUTE_UNUSED,
4621 tree args ATTRIBUTE_UNUSED,
4622 int flags ATTRIBUTE_UNUSED,
4625 tree decl = TYPE_NAME (*node);
4629 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4630 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4631 *no_add_attrs = false;
4637 /* Return 0 if the attributes for two types are incompatible, 1 if they
4638 are compatible, and 2 if they are nearly compatible (which causes a
4639 warning to be generated). */
4641 arm_comp_type_attributes (const_tree type1, const_tree type2)
4645 /* Check for mismatch of non-default calling convention. */
4646 if (TREE_CODE (type1) != FUNCTION_TYPE)
4649 /* Check for mismatched call attributes. */
4650 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4651 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4652 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4653 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4655 /* Only bother to check if an attribute is defined. */
4656 if (l1 | l2 | s1 | s2)
4658 /* If one type has an attribute, the other must have the same attribute. */
4659 if ((l1 != l2) || (s1 != s2))
4662 /* Disallow mixed attributes. */
4663 if ((l1 & s2) || (l2 & s1))
4667 /* Check for mismatched ISR attribute. */
4668 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4670 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4671 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4673 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4680 /* Assigns default attributes to newly defined type. This is used to
4681 set short_call/long_call attributes for function types of
4682 functions defined inside corresponding #pragma scopes. */
4684 arm_set_default_type_attributes (tree type)
4686 /* Add __attribute__ ((long_call)) to all functions, when
4687 inside #pragma long_calls or __attribute__ ((short_call)),
4688 when inside #pragma no_long_calls. */
4689 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4691 tree type_attr_list, attr_name;
4692 type_attr_list = TYPE_ATTRIBUTES (type);
4694 if (arm_pragma_long_calls == LONG)
4695 attr_name = get_identifier ("long_call");
4696 else if (arm_pragma_long_calls == SHORT)
4697 attr_name = get_identifier ("short_call");
4701 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4702 TYPE_ATTRIBUTES (type) = type_attr_list;
4706 /* Return true if DECL is known to be linked into section SECTION. */
4709 arm_function_in_section_p (tree decl, section *section)
4711 /* We can only be certain about functions defined in the same
4712 compilation unit. */
4713 if (!TREE_STATIC (decl))
4716 /* Make sure that SYMBOL always binds to the definition in this
4717 compilation unit. */
4718 if (!targetm.binds_local_p (decl))
4721 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4722 if (!DECL_SECTION_NAME (decl))
4724 /* Make sure that we will not create a unique section for DECL. */
4725 if (flag_function_sections || DECL_ONE_ONLY (decl))
4729 return function_section (decl) == section;
4732 /* Return nonzero if a 32-bit "long_call" should be generated for
4733 a call from the current function to DECL. We generate a long_call
4736 a. has an __attribute__((long call))
4737 or b. is within the scope of a #pragma long_calls
4738 or c. the -mlong-calls command line switch has been specified
4740 However we do not generate a long call if the function:
4742 d. has an __attribute__ ((short_call))
4743 or e. is inside the scope of a #pragma no_long_calls
4744 or f. is defined in the same section as the current function. */
4747 arm_is_long_call_p (tree decl)
4752 return TARGET_LONG_CALLS;
4754 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4755 if (lookup_attribute ("short_call", attrs))
4758 /* For "f", be conservative, and only cater for cases in which the
4759 whole of the current function is placed in the same section. */
4760 if (!flag_reorder_blocks_and_partition
4761 && TREE_CODE (decl) == FUNCTION_DECL
4762 && arm_function_in_section_p (decl, current_function_section ()))
4765 if (lookup_attribute ("long_call", attrs))
4768 return TARGET_LONG_CALLS;
4771 /* Return nonzero if it is ok to make a tail-call to DECL. */
4773 arm_function_ok_for_sibcall (tree decl, tree exp)
4775 unsigned long func_type;
4777 if (cfun->machine->sibcall_blocked)
4780 /* Never tailcall something for which we have no decl, or if we
4781 are in Thumb mode. */
4782 if (decl == NULL || TARGET_THUMB)
4785 /* The PIC register is live on entry to VxWorks PLT entries, so we
4786 must make the call before restoring the PIC register. */
4787 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4790 /* Cannot tail-call to long calls, since these are out of range of
4791 a branch instruction. */
4792 if (arm_is_long_call_p (decl))
4795 /* If we are interworking and the function is not declared static
4796 then we can't tail-call it unless we know that it exists in this
4797 compilation unit (since it might be a Thumb routine). */
4798 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4801 func_type = arm_current_func_type ();
4802 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4803 if (IS_INTERRUPT (func_type))
4806 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4808 /* Check that the return value locations are the same. For
4809 example that we aren't returning a value from the sibling in
4810 a VFP register but then need to transfer it to a core
4814 a = arm_function_value (TREE_TYPE (exp), decl, false);
4815 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4817 if (!rtx_equal_p (a, b))
4821 /* Never tailcall if function may be called with a misaligned SP. */
4822 if (IS_STACKALIGN (func_type))
4825 /* Everything else is ok. */
4830 /* Addressing mode support functions. */
4832 /* Return nonzero if X is a legitimate immediate operand when compiling
4833 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4835 legitimate_pic_operand_p (rtx x)
4837 if (GET_CODE (x) == SYMBOL_REF
4838 || (GET_CODE (x) == CONST
4839 && GET_CODE (XEXP (x, 0)) == PLUS
4840 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4846 /* Record that the current function needs a PIC register. Initialize
4847 cfun->machine->pic_reg if we have not already done so. */
4850 require_pic_register (void)
4852 /* A lot of the logic here is made obscure by the fact that this
4853 routine gets called as part of the rtx cost estimation process.
4854 We don't want those calls to affect any assumptions about the real
4855 function; and further, we can't call entry_of_function() until we
4856 start the real expansion process. */
4857 if (!crtl->uses_pic_offset_table)
4859 gcc_assert (can_create_pseudo_p ());
4860 if (arm_pic_register != INVALID_REGNUM)
4862 if (!cfun->machine->pic_reg)
4863 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4865 /* Play games to avoid marking the function as needing pic
4866 if we are being called as part of the cost-estimation
4868 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4869 crtl->uses_pic_offset_table = 1;
4875 if (!cfun->machine->pic_reg)
4876 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4878 /* Play games to avoid marking the function as needing pic
4879 if we are being called as part of the cost-estimation
4881 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4883 crtl->uses_pic_offset_table = 1;
4886 arm_load_pic_register (0UL);
4890 /* We can be called during expansion of PHI nodes, where
4891 we can't yet emit instructions directly in the final
4892 insn stream. Queue the insns on the entry edge, they will
4893 be committed after everything else is expanded. */
4894 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4901 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4903 if (GET_CODE (orig) == SYMBOL_REF
4904 || GET_CODE (orig) == LABEL_REF)
4906 rtx pic_ref, address;
4910 /* If this function doesn't have a pic register, create one now. */
4911 require_pic_register ();
4915 gcc_assert (can_create_pseudo_p ());
4916 reg = gen_reg_rtx (Pmode);
4922 address = gen_reg_rtx (Pmode);
4927 emit_insn (gen_pic_load_addr_32bit (address, orig));
4928 else /* TARGET_THUMB1 */
4929 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4931 /* VxWorks does not impose a fixed gap between segments; the run-time
4932 gap can be different from the object-file gap. We therefore can't
4933 use GOTOFF unless we are absolutely sure that the symbol is in the
4934 same segment as the GOT. Unfortunately, the flexibility of linker
4935 scripts means that we can't be sure of that in general, so assume
4936 that GOTOFF is never valid on VxWorks. */
4937 if ((GET_CODE (orig) == LABEL_REF
4938 || (GET_CODE (orig) == SYMBOL_REF &&
4939 SYMBOL_REF_LOCAL_P (orig)))
4941 && !TARGET_VXWORKS_RTP)
4942 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4945 pic_ref = gen_const_mem (Pmode,
4946 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4950 insn = emit_move_insn (reg, pic_ref);
4952 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4954 set_unique_reg_note (insn, REG_EQUAL, orig);
4958 else if (GET_CODE (orig) == CONST)
4962 if (GET_CODE (XEXP (orig, 0)) == PLUS
4963 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4966 /* Handle the case where we have: const (UNSPEC_TLS). */
4967 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4968 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4971 /* Handle the case where we have:
4972 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4974 if (GET_CODE (XEXP (orig, 0)) == PLUS
4975 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4976 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4978 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4984 gcc_assert (can_create_pseudo_p ());
4985 reg = gen_reg_rtx (Pmode);
4988 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4990 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4991 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4992 base == reg ? 0 : reg);
4994 if (GET_CODE (offset) == CONST_INT)
4996 /* The base register doesn't really matter, we only want to
4997 test the index for the appropriate mode. */
4998 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5000 gcc_assert (can_create_pseudo_p ());
5001 offset = force_reg (Pmode, offset);
5004 if (GET_CODE (offset) == CONST_INT)
5005 return plus_constant (base, INTVAL (offset));
5008 if (GET_MODE_SIZE (mode) > 4
5009 && (GET_MODE_CLASS (mode) == MODE_INT
5010 || TARGET_SOFT_FLOAT))
5012 emit_insn (gen_addsi3 (reg, base, offset));
5016 return gen_rtx_PLUS (Pmode, base, offset);
5023 /* Find a spare register to use during the prolog of a function. */
5026 thumb_find_work_register (unsigned long pushed_regs_mask)
5030 /* Check the argument registers first as these are call-used. The
5031 register allocation order means that sometimes r3 might be used
5032 but earlier argument registers might not, so check them all. */
5033 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5034 if (!df_regs_ever_live_p (reg))
5037 /* Before going on to check the call-saved registers we can try a couple
5038 more ways of deducing that r3 is available. The first is when we are
5039 pushing anonymous arguments onto the stack and we have less than 4
5040 registers worth of fixed arguments(*). In this case r3 will be part of
5041 the variable argument list and so we can be sure that it will be
5042 pushed right at the start of the function. Hence it will be available
5043 for the rest of the prologue.
5044 (*): ie crtl->args.pretend_args_size is greater than 0. */
5045 if (cfun->machine->uses_anonymous_args
5046 && crtl->args.pretend_args_size > 0)
5047 return LAST_ARG_REGNUM;
5049 /* The other case is when we have fixed arguments but less than 4 registers
5050 worth. In this case r3 might be used in the body of the function, but
5051 it is not being used to convey an argument into the function. In theory
5052 we could just check crtl->args.size to see how many bytes are
5053 being passed in argument registers, but it seems that it is unreliable.
5054 Sometimes it will have the value 0 when in fact arguments are being
5055 passed. (See testcase execute/20021111-1.c for an example). So we also
5056 check the args_info.nregs field as well. The problem with this field is
5057 that it makes no allowances for arguments that are passed to the
5058 function but which are not used. Hence we could miss an opportunity
5059 when a function has an unused argument in r3. But it is better to be
5060 safe than to be sorry. */
5061 if (! cfun->machine->uses_anonymous_args
5062 && crtl->args.size >= 0
5063 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5064 && crtl->args.info.nregs < 4)
5065 return LAST_ARG_REGNUM;
5067 /* Otherwise look for a call-saved register that is going to be pushed. */
5068 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5069 if (pushed_regs_mask & (1 << reg))
5074 /* Thumb-2 can use high regs. */
5075 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5076 if (pushed_regs_mask & (1 << reg))
5079 /* Something went wrong - thumb_compute_save_reg_mask()
5080 should have arranged for a suitable register to be pushed. */
5084 static GTY(()) int pic_labelno;
5086 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5090 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5092 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5094 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5097 gcc_assert (flag_pic);
5099 pic_reg = cfun->machine->pic_reg;
5100 if (TARGET_VXWORKS_RTP)
5102 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5103 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5104 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5106 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5108 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5109 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5113 /* We use an UNSPEC rather than a LABEL_REF because this label
5114 never appears in the code stream. */
5116 labelno = GEN_INT (pic_labelno++);
5117 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5118 l1 = gen_rtx_CONST (VOIDmode, l1);
5120 /* On the ARM the PC register contains 'dot + 8' at the time of the
5121 addition, on the Thumb it is 'dot + 4'. */
5122 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5123 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5125 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5129 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5131 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5133 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5135 else /* TARGET_THUMB1 */
5137 if (arm_pic_register != INVALID_REGNUM
5138 && REGNO (pic_reg) > LAST_LO_REGNUM)
5140 /* We will have pushed the pic register, so we should always be
5141 able to find a work register. */
5142 pic_tmp = gen_rtx_REG (SImode,
5143 thumb_find_work_register (saved_regs));
5144 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5145 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5148 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5149 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5153 /* Need to emit this whether or not we obey regdecls,
5154 since setjmp/longjmp can cause life info to screw up. */
5159 /* Return nonzero if X is valid as an ARM state addressing register. */
5161 arm_address_register_rtx_p (rtx x, int strict_p)
5165 if (GET_CODE (x) != REG)
5171 return ARM_REGNO_OK_FOR_BASE_P (regno);
5173 return (regno <= LAST_ARM_REGNUM
5174 || regno >= FIRST_PSEUDO_REGISTER
5175 || regno == FRAME_POINTER_REGNUM
5176 || regno == ARG_POINTER_REGNUM);
5179 /* Return TRUE if this rtx is the difference of a symbol and a label,
5180 and will reduce to a PC-relative relocation in the object file.
5181 Expressions like this can be left alone when generating PIC, rather
5182 than forced through the GOT. */
5184 pcrel_constant_p (rtx x)
5186 if (GET_CODE (x) == MINUS)
5187 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5192 /* Return nonzero if X is a valid ARM state address operand. */
5194 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5198 enum rtx_code code = GET_CODE (x);
5200 if (arm_address_register_rtx_p (x, strict_p))
5203 use_ldrd = (TARGET_LDRD
5205 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5207 if (code == POST_INC || code == PRE_DEC
5208 || ((code == PRE_INC || code == POST_DEC)
5209 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5210 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5212 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5213 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5214 && GET_CODE (XEXP (x, 1)) == PLUS
5215 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5217 rtx addend = XEXP (XEXP (x, 1), 1);
5219 /* Don't allow ldrd post increment by register because it's hard
5220 to fixup invalid register choices. */
5222 && GET_CODE (x) == POST_MODIFY
5223 && GET_CODE (addend) == REG)
5226 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5227 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5230 /* After reload constants split into minipools will have addresses
5231 from a LABEL_REF. */
5232 else if (reload_completed
5233 && (code == LABEL_REF
5235 && GET_CODE (XEXP (x, 0)) == PLUS
5236 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5240 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5243 else if (code == PLUS)
5245 rtx xop0 = XEXP (x, 0);
5246 rtx xop1 = XEXP (x, 1);
5248 return ((arm_address_register_rtx_p (xop0, strict_p)
5249 && GET_CODE(xop1) == CONST_INT
5250 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5251 || (arm_address_register_rtx_p (xop1, strict_p)
5252 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5256 /* Reload currently can't handle MINUS, so disable this for now */
5257 else if (GET_CODE (x) == MINUS)
5259 rtx xop0 = XEXP (x, 0);
5260 rtx xop1 = XEXP (x, 1);
5262 return (arm_address_register_rtx_p (xop0, strict_p)
5263 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5267 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5268 && code == SYMBOL_REF
5269 && CONSTANT_POOL_ADDRESS_P (x)
5271 && symbol_mentioned_p (get_pool_constant (x))
5272 && ! pcrel_constant_p (get_pool_constant (x))))
5278 /* Return nonzero if X is a valid Thumb-2 address operand. */
5280 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5283 enum rtx_code code = GET_CODE (x);
5285 if (arm_address_register_rtx_p (x, strict_p))
5288 use_ldrd = (TARGET_LDRD
5290 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5292 if (code == POST_INC || code == PRE_DEC
5293 || ((code == PRE_INC || code == POST_DEC)
5294 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5295 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5297 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5298 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5299 && GET_CODE (XEXP (x, 1)) == PLUS
5300 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5302 /* Thumb-2 only has autoincrement by constant. */
5303 rtx addend = XEXP (XEXP (x, 1), 1);
5304 HOST_WIDE_INT offset;
5306 if (GET_CODE (addend) != CONST_INT)
5309 offset = INTVAL(addend);
5310 if (GET_MODE_SIZE (mode) <= 4)
5311 return (offset > -256 && offset < 256);
5313 return (use_ldrd && offset > -1024 && offset < 1024
5314 && (offset & 3) == 0);
5317 /* After reload constants split into minipools will have addresses
5318 from a LABEL_REF. */
5319 else if (reload_completed
5320 && (code == LABEL_REF
5322 && GET_CODE (XEXP (x, 0)) == PLUS
5323 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5324 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5327 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5330 else if (code == PLUS)
5332 rtx xop0 = XEXP (x, 0);
5333 rtx xop1 = XEXP (x, 1);
5335 return ((arm_address_register_rtx_p (xop0, strict_p)
5336 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5337 || (arm_address_register_rtx_p (xop1, strict_p)
5338 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5341 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5342 && code == SYMBOL_REF
5343 && CONSTANT_POOL_ADDRESS_P (x)
5345 && symbol_mentioned_p (get_pool_constant (x))
5346 && ! pcrel_constant_p (get_pool_constant (x))))
5352 /* Return nonzero if INDEX is valid for an address index operand in
5355 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5358 HOST_WIDE_INT range;
5359 enum rtx_code code = GET_CODE (index);
5361 /* Standard coprocessor addressing modes. */
5362 if (TARGET_HARD_FLOAT
5363 && (TARGET_FPA || TARGET_MAVERICK)
5364 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5365 || (TARGET_MAVERICK && mode == DImode)))
5366 return (code == CONST_INT && INTVAL (index) < 1024
5367 && INTVAL (index) > -1024
5368 && (INTVAL (index) & 3) == 0);
5371 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5372 return (code == CONST_INT
5373 && INTVAL (index) < 1016
5374 && INTVAL (index) > -1024
5375 && (INTVAL (index) & 3) == 0);
5377 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5378 return (code == CONST_INT
5379 && INTVAL (index) < 1024
5380 && INTVAL (index) > -1024
5381 && (INTVAL (index) & 3) == 0);
5383 if (arm_address_register_rtx_p (index, strict_p)
5384 && (GET_MODE_SIZE (mode) <= 4))
5387 if (mode == DImode || mode == DFmode)
5389 if (code == CONST_INT)
5391 HOST_WIDE_INT val = INTVAL (index);
5394 return val > -256 && val < 256;
5396 return val > -4096 && val < 4092;
5399 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5402 if (GET_MODE_SIZE (mode) <= 4
5406 || (mode == QImode && outer == SIGN_EXTEND))))
5410 rtx xiop0 = XEXP (index, 0);
5411 rtx xiop1 = XEXP (index, 1);
5413 return ((arm_address_register_rtx_p (xiop0, strict_p)
5414 && power_of_two_operand (xiop1, SImode))
5415 || (arm_address_register_rtx_p (xiop1, strict_p)
5416 && power_of_two_operand (xiop0, SImode)));
5418 else if (code == LSHIFTRT || code == ASHIFTRT
5419 || code == ASHIFT || code == ROTATERT)
5421 rtx op = XEXP (index, 1);
5423 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5424 && GET_CODE (op) == CONST_INT
5426 && INTVAL (op) <= 31);
5430 /* For ARM v4 we may be doing a sign-extend operation during the
5436 || (outer == SIGN_EXTEND && mode == QImode))
5442 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5444 return (code == CONST_INT
5445 && INTVAL (index) < range
5446 && INTVAL (index) > -range);
5449 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5450 index operand. i.e. 1, 2, 4 or 8. */
5452 thumb2_index_mul_operand (rtx op)
5456 if (GET_CODE(op) != CONST_INT)
5460 return (val == 1 || val == 2 || val == 4 || val == 8);
5463 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5465 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5467 enum rtx_code code = GET_CODE (index);
5469 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5470 /* Standard coprocessor addressing modes. */
5471 if (TARGET_HARD_FLOAT
5472 && (TARGET_FPA || TARGET_MAVERICK)
5473 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5474 || (TARGET_MAVERICK && mode == DImode)))
5475 return (code == CONST_INT && INTVAL (index) < 1024
5476 && INTVAL (index) > -1024
5477 && (INTVAL (index) & 3) == 0);
5479 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5481 /* For DImode assume values will usually live in core regs
5482 and only allow LDRD addressing modes. */
5483 if (!TARGET_LDRD || mode != DImode)
5484 return (code == CONST_INT
5485 && INTVAL (index) < 1024
5486 && INTVAL (index) > -1024
5487 && (INTVAL (index) & 3) == 0);
5491 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5492 return (code == CONST_INT
5493 && INTVAL (index) < 1016
5494 && INTVAL (index) > -1024
5495 && (INTVAL (index) & 3) == 0);
5497 if (arm_address_register_rtx_p (index, strict_p)
5498 && (GET_MODE_SIZE (mode) <= 4))
5501 if (mode == DImode || mode == DFmode)
5503 if (code == CONST_INT)
5505 HOST_WIDE_INT val = INTVAL (index);
5506 /* ??? Can we assume ldrd for thumb2? */
5507 /* Thumb-2 ldrd only has reg+const addressing modes. */
5508 /* ldrd supports offsets of +-1020.
5509 However the ldr fallback does not. */
5510 return val > -256 && val < 256 && (val & 3) == 0;
5518 rtx xiop0 = XEXP (index, 0);
5519 rtx xiop1 = XEXP (index, 1);
5521 return ((arm_address_register_rtx_p (xiop0, strict_p)
5522 && thumb2_index_mul_operand (xiop1))
5523 || (arm_address_register_rtx_p (xiop1, strict_p)
5524 && thumb2_index_mul_operand (xiop0)));
5526 else if (code == ASHIFT)
5528 rtx op = XEXP (index, 1);
5530 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5531 && GET_CODE (op) == CONST_INT
5533 && INTVAL (op) <= 3);
5536 return (code == CONST_INT
5537 && INTVAL (index) < 4096
5538 && INTVAL (index) > -256);
5541 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5543 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5547 if (GET_CODE (x) != REG)
5553 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5555 return (regno <= LAST_LO_REGNUM
5556 || regno > LAST_VIRTUAL_REGISTER
5557 || regno == FRAME_POINTER_REGNUM
5558 || (GET_MODE_SIZE (mode) >= 4
5559 && (regno == STACK_POINTER_REGNUM
5560 || regno >= FIRST_PSEUDO_REGISTER
5561 || x == hard_frame_pointer_rtx
5562 || x == arg_pointer_rtx)));
5565 /* Return nonzero if x is a legitimate index register. This is the case
5566 for any base register that can access a QImode object. */
5568 thumb1_index_register_rtx_p (rtx x, int strict_p)
5570 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5573 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5575 The AP may be eliminated to either the SP or the FP, so we use the
5576 least common denominator, e.g. SImode, and offsets from 0 to 64.
5578 ??? Verify whether the above is the right approach.
5580 ??? Also, the FP may be eliminated to the SP, so perhaps that
5581 needs special handling also.
5583 ??? Look at how the mips16 port solves this problem. It probably uses
5584 better ways to solve some of these problems.
5586 Although it is not incorrect, we don't accept QImode and HImode
5587 addresses based on the frame pointer or arg pointer until the
5588 reload pass starts. This is so that eliminating such addresses
5589 into stack based ones won't produce impossible code. */
5591 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5593 /* ??? Not clear if this is right. Experiment. */
5594 if (GET_MODE_SIZE (mode) < 4
5595 && !(reload_in_progress || reload_completed)
5596 && (reg_mentioned_p (frame_pointer_rtx, x)
5597 || reg_mentioned_p (arg_pointer_rtx, x)
5598 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5599 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5600 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5601 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5604 /* Accept any base register. SP only in SImode or larger. */
5605 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5608 /* This is PC relative data before arm_reorg runs. */
5609 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5610 && GET_CODE (x) == SYMBOL_REF
5611 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5614 /* This is PC relative data after arm_reorg runs. */
5615 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5617 && (GET_CODE (x) == LABEL_REF
5618 || (GET_CODE (x) == CONST
5619 && GET_CODE (XEXP (x, 0)) == PLUS
5620 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5621 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5624 /* Post-inc indexing only supported for SImode and larger. */
5625 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5626 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5629 else if (GET_CODE (x) == PLUS)
5631 /* REG+REG address can be any two index registers. */
5632 /* We disallow FRAME+REG addressing since we know that FRAME
5633 will be replaced with STACK, and SP relative addressing only
5634 permits SP+OFFSET. */
5635 if (GET_MODE_SIZE (mode) <= 4
5636 && XEXP (x, 0) != frame_pointer_rtx
5637 && XEXP (x, 1) != frame_pointer_rtx
5638 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5639 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5642 /* REG+const has 5-7 bit offset for non-SP registers. */
5643 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5644 || XEXP (x, 0) == arg_pointer_rtx)
5645 && GET_CODE (XEXP (x, 1)) == CONST_INT
5646 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5649 /* REG+const has 10-bit offset for SP, but only SImode and
5650 larger is supported. */
5651 /* ??? Should probably check for DI/DFmode overflow here
5652 just like GO_IF_LEGITIMATE_OFFSET does. */
5653 else if (GET_CODE (XEXP (x, 0)) == REG
5654 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5655 && GET_MODE_SIZE (mode) >= 4
5656 && GET_CODE (XEXP (x, 1)) == CONST_INT
5657 && INTVAL (XEXP (x, 1)) >= 0
5658 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5659 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5662 else if (GET_CODE (XEXP (x, 0)) == REG
5663 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5664 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5665 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5666 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5667 && GET_MODE_SIZE (mode) >= 4
5668 && GET_CODE (XEXP (x, 1)) == CONST_INT
5669 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5673 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5674 && GET_MODE_SIZE (mode) == 4
5675 && GET_CODE (x) == SYMBOL_REF
5676 && CONSTANT_POOL_ADDRESS_P (x)
5678 && symbol_mentioned_p (get_pool_constant (x))
5679 && ! pcrel_constant_p (get_pool_constant (x))))
5685 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5686 instruction of mode MODE. */
5688 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5690 switch (GET_MODE_SIZE (mode))
5693 return val >= 0 && val < 32;
5696 return val >= 0 && val < 64 && (val & 1) == 0;
5700 && (val + GET_MODE_SIZE (mode)) <= 128
5706 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5709 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5710 else if (TARGET_THUMB2)
5711 return thumb2_legitimate_address_p (mode, x, strict_p);
5712 else /* if (TARGET_THUMB1) */
5713 return thumb1_legitimate_address_p (mode, x, strict_p);
5716 /* Build the SYMBOL_REF for __tls_get_addr. */
5718 static GTY(()) rtx tls_get_addr_libfunc;
5721 get_tls_get_addr (void)
5723 if (!tls_get_addr_libfunc)
5724 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5725 return tls_get_addr_libfunc;
5729 arm_load_tp (rtx target)
5732 target = gen_reg_rtx (SImode);
5736 /* Can return in any reg. */
5737 emit_insn (gen_load_tp_hard (target));
5741 /* Always returned in r0. Immediately copy the result into a pseudo,
5742 otherwise other uses of r0 (e.g. setting up function arguments) may
5743 clobber the value. */
5747 emit_insn (gen_load_tp_soft ());
5749 tmp = gen_rtx_REG (SImode, 0);
5750 emit_move_insn (target, tmp);
5756 load_tls_operand (rtx x, rtx reg)
5760 if (reg == NULL_RTX)
5761 reg = gen_reg_rtx (SImode);
5763 tmp = gen_rtx_CONST (SImode, x);
5765 emit_move_insn (reg, tmp);
5771 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5773 rtx insns, label, labelno, sum;
5777 labelno = GEN_INT (pic_labelno++);
5778 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5779 label = gen_rtx_CONST (VOIDmode, label);
5781 sum = gen_rtx_UNSPEC (Pmode,
5782 gen_rtvec (4, x, GEN_INT (reloc), label,
5783 GEN_INT (TARGET_ARM ? 8 : 4)),
5785 reg = load_tls_operand (sum, reg);
5788 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5789 else if (TARGET_THUMB2)
5790 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5791 else /* TARGET_THUMB1 */
5792 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5794 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5795 Pmode, 1, reg, Pmode);
5797 insns = get_insns ();
5804 legitimize_tls_address (rtx x, rtx reg)
5806 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5807 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5811 case TLS_MODEL_GLOBAL_DYNAMIC:
5812 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5813 dest = gen_reg_rtx (Pmode);
5814 emit_libcall_block (insns, dest, ret, x);
5817 case TLS_MODEL_LOCAL_DYNAMIC:
5818 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5820 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5821 share the LDM result with other LD model accesses. */
5822 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5824 dest = gen_reg_rtx (Pmode);
5825 emit_libcall_block (insns, dest, ret, eqv);
5827 /* Load the addend. */
5828 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5830 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5831 return gen_rtx_PLUS (Pmode, dest, addend);
5833 case TLS_MODEL_INITIAL_EXEC:
5834 labelno = GEN_INT (pic_labelno++);
5835 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5836 label = gen_rtx_CONST (VOIDmode, label);
5837 sum = gen_rtx_UNSPEC (Pmode,
5838 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5839 GEN_INT (TARGET_ARM ? 8 : 4)),
5841 reg = load_tls_operand (sum, reg);
5844 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5845 else if (TARGET_THUMB2)
5846 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5849 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5850 emit_move_insn (reg, gen_const_mem (SImode, reg));
5853 tp = arm_load_tp (NULL_RTX);
5855 return gen_rtx_PLUS (Pmode, tp, reg);
5857 case TLS_MODEL_LOCAL_EXEC:
5858 tp = arm_load_tp (NULL_RTX);
5860 reg = gen_rtx_UNSPEC (Pmode,
5861 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5863 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5865 return gen_rtx_PLUS (Pmode, tp, reg);
5872 /* Try machine-dependent ways of modifying an illegitimate address
5873 to be legitimate. If we find one, return the new, valid address. */
5875 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5879 /* TODO: legitimize_address for Thumb2. */
5882 return thumb_legitimize_address (x, orig_x, mode);
5885 if (arm_tls_symbol_p (x))
5886 return legitimize_tls_address (x, NULL_RTX);
5888 if (GET_CODE (x) == PLUS)
5890 rtx xop0 = XEXP (x, 0);
5891 rtx xop1 = XEXP (x, 1);
5893 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5894 xop0 = force_reg (SImode, xop0);
5896 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5897 xop1 = force_reg (SImode, xop1);
5899 if (ARM_BASE_REGISTER_RTX_P (xop0)
5900 && GET_CODE (xop1) == CONST_INT)
5902 HOST_WIDE_INT n, low_n;
5906 /* VFP addressing modes actually allow greater offsets, but for
5907 now we just stick with the lowest common denominator. */
5909 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5921 low_n = ((mode) == TImode ? 0
5922 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5926 base_reg = gen_reg_rtx (SImode);
5927 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5928 emit_move_insn (base_reg, val);
5929 x = plus_constant (base_reg, low_n);
5931 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5932 x = gen_rtx_PLUS (SImode, xop0, xop1);
5935 /* XXX We don't allow MINUS any more -- see comment in
5936 arm_legitimate_address_outer_p (). */
5937 else if (GET_CODE (x) == MINUS)
5939 rtx xop0 = XEXP (x, 0);
5940 rtx xop1 = XEXP (x, 1);
5942 if (CONSTANT_P (xop0))
5943 xop0 = force_reg (SImode, xop0);
5945 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5946 xop1 = force_reg (SImode, xop1);
5948 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5949 x = gen_rtx_MINUS (SImode, xop0, xop1);
5952 /* Make sure to take full advantage of the pre-indexed addressing mode
5953 with absolute addresses which often allows for the base register to
5954 be factorized for multiple adjacent memory references, and it might
5955 even allows for the mini pool to be avoided entirely. */
5956 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5959 HOST_WIDE_INT mask, base, index;
5962 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5963 use a 8-bit index. So let's use a 12-bit index for SImode only and
5964 hope that arm_gen_constant will enable ldrb to use more bits. */
5965 bits = (mode == SImode) ? 12 : 8;
5966 mask = (1 << bits) - 1;
5967 base = INTVAL (x) & ~mask;
5968 index = INTVAL (x) & mask;
5969 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5971 /* It'll most probably be more efficient to generate the base
5972 with more bits set and use a negative index instead. */
5976 base_reg = force_reg (SImode, GEN_INT (base));
5977 x = plus_constant (base_reg, index);
5982 /* We need to find and carefully transform any SYMBOL and LABEL
5983 references; so go back to the original address expression. */
5984 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5986 if (new_x != orig_x)
5994 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5995 to be legitimate. If we find one, return the new, valid address. */
5997 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5999 if (arm_tls_symbol_p (x))
6000 return legitimize_tls_address (x, NULL_RTX);
6002 if (GET_CODE (x) == PLUS
6003 && GET_CODE (XEXP (x, 1)) == CONST_INT
6004 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6005 || INTVAL (XEXP (x, 1)) < 0))
6007 rtx xop0 = XEXP (x, 0);
6008 rtx xop1 = XEXP (x, 1);
6009 HOST_WIDE_INT offset = INTVAL (xop1);
6011 /* Try and fold the offset into a biasing of the base register and
6012 then offsetting that. Don't do this when optimizing for space
6013 since it can cause too many CSEs. */
6014 if (optimize_size && offset >= 0
6015 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6017 HOST_WIDE_INT delta;
6020 delta = offset - (256 - GET_MODE_SIZE (mode));
6021 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6022 delta = 31 * GET_MODE_SIZE (mode);
6024 delta = offset & (~31 * GET_MODE_SIZE (mode));
6026 xop0 = force_operand (plus_constant (xop0, offset - delta),
6028 x = plus_constant (xop0, delta);
6030 else if (offset < 0 && offset > -256)
6031 /* Small negative offsets are best done with a subtract before the
6032 dereference, forcing these into a register normally takes two
6034 x = force_operand (x, NULL_RTX);
6037 /* For the remaining cases, force the constant into a register. */
6038 xop1 = force_reg (SImode, xop1);
6039 x = gen_rtx_PLUS (SImode, xop0, xop1);
6042 else if (GET_CODE (x) == PLUS
6043 && s_register_operand (XEXP (x, 1), SImode)
6044 && !s_register_operand (XEXP (x, 0), SImode))
6046 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6048 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6053 /* We need to find and carefully transform any SYMBOL and LABEL
6054 references; so go back to the original address expression. */
6055 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6057 if (new_x != orig_x)
6065 thumb_legitimize_reload_address (rtx *x_p,
6066 enum machine_mode mode,
6067 int opnum, int type,
6068 int ind_levels ATTRIBUTE_UNUSED)
6072 if (GET_CODE (x) == PLUS
6073 && GET_MODE_SIZE (mode) < 4
6074 && REG_P (XEXP (x, 0))
6075 && XEXP (x, 0) == stack_pointer_rtx
6076 && GET_CODE (XEXP (x, 1)) == CONST_INT
6077 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6082 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6083 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6087 /* If both registers are hi-regs, then it's better to reload the
6088 entire expression rather than each register individually. That
6089 only requires one reload register rather than two. */
6090 if (GET_CODE (x) == PLUS
6091 && REG_P (XEXP (x, 0))
6092 && REG_P (XEXP (x, 1))
6093 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6094 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6099 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6100 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6107 /* Test for various thread-local symbols. */
6109 /* Return TRUE if X is a thread-local symbol. */
6112 arm_tls_symbol_p (rtx x)
6114 if (! TARGET_HAVE_TLS)
6117 if (GET_CODE (x) != SYMBOL_REF)
6120 return SYMBOL_REF_TLS_MODEL (x) != 0;
6123 /* Helper for arm_tls_referenced_p. */
6126 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6128 if (GET_CODE (*x) == SYMBOL_REF)
6129 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6131 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6132 TLS offsets, not real symbol references. */
6133 if (GET_CODE (*x) == UNSPEC
6134 && XINT (*x, 1) == UNSPEC_TLS)
6140 /* Return TRUE if X contains any TLS symbol references. */
6143 arm_tls_referenced_p (rtx x)
6145 if (! TARGET_HAVE_TLS)
6148 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6151 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6154 arm_cannot_force_const_mem (rtx x)
6158 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6160 split_const (x, &base, &offset);
6161 if (GET_CODE (base) == SYMBOL_REF
6162 && !offset_within_block_p (base, INTVAL (offset)))
6165 return arm_tls_referenced_p (x);
6168 #define REG_OR_SUBREG_REG(X) \
6169 (GET_CODE (X) == REG \
6170 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6172 #define REG_OR_SUBREG_RTX(X) \
6173 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6175 #ifndef COSTS_N_INSNS
6176 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6179 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6181 enum machine_mode mode = GET_MODE (x);
6194 return COSTS_N_INSNS (1);
6197 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6200 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6207 return COSTS_N_INSNS (2) + cycles;
6209 return COSTS_N_INSNS (1) + 16;
6212 return (COSTS_N_INSNS (1)
6213 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6214 + GET_CODE (SET_DEST (x)) == MEM));
6219 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6221 if (thumb_shiftable_const (INTVAL (x)))
6222 return COSTS_N_INSNS (2);
6223 return COSTS_N_INSNS (3);
6225 else if ((outer == PLUS || outer == COMPARE)
6226 && INTVAL (x) < 256 && INTVAL (x) > -256)
6228 else if ((outer == IOR || outer == XOR || outer == AND)
6229 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6230 return COSTS_N_INSNS (1);
6231 else if (outer == AND)
6234 /* This duplicates the tests in the andsi3 expander. */
6235 for (i = 9; i <= 31; i++)
6236 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6237 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6238 return COSTS_N_INSNS (2);
6240 else if (outer == ASHIFT || outer == ASHIFTRT
6241 || outer == LSHIFTRT)
6243 return COSTS_N_INSNS (2);
6249 return COSTS_N_INSNS (3);
6267 /* XXX another guess. */
6268 /* Memory costs quite a lot for the first word, but subsequent words
6269 load at the equivalent of a single insn each. */
6270 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6271 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6276 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6281 /* XXX still guessing. */
6282 switch (GET_MODE (XEXP (x, 0)))
6285 return (1 + (mode == DImode ? 4 : 0)
6286 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6289 return (4 + (mode == DImode ? 4 : 0)
6290 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6293 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6305 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6307 enum machine_mode mode = GET_MODE (x);
6308 enum rtx_code subcode;
6310 enum rtx_code code = GET_CODE (x);
6317 /* Memory costs quite a lot for the first word, but subsequent words
6318 load at the equivalent of a single insn each. */
6319 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6326 if (TARGET_HARD_FLOAT && mode == SFmode)
6327 *total = COSTS_N_INSNS (2);
6328 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6329 *total = COSTS_N_INSNS (4);
6331 *total = COSTS_N_INSNS (20);
6335 if (GET_CODE (XEXP (x, 1)) == REG)
6336 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6337 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6338 *total = rtx_cost (XEXP (x, 1), code, speed);
6344 *total += COSTS_N_INSNS (4);
6349 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6350 *total += rtx_cost (XEXP (x, 0), code, speed);
6353 *total += COSTS_N_INSNS (3);
6357 *total += COSTS_N_INSNS (1);
6358 /* Increase the cost of complex shifts because they aren't any faster,
6359 and reduce dual issue opportunities. */
6360 if (arm_tune_cortex_a9
6361 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6369 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6371 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6372 *total = COSTS_N_INSNS (1);
6374 *total = COSTS_N_INSNS (20);
6377 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6378 /* Thumb2 does not have RSB, so all arguments must be
6379 registers (subtracting a constant is canonicalized as
6380 addition of the negated constant). */
6386 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6387 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6388 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6390 *total += rtx_cost (XEXP (x, 1), code, speed);
6394 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6395 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6397 *total += rtx_cost (XEXP (x, 0), code, speed);
6404 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6406 if (TARGET_HARD_FLOAT
6408 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6410 *total = COSTS_N_INSNS (1);
6411 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6412 && arm_const_double_rtx (XEXP (x, 0)))
6414 *total += rtx_cost (XEXP (x, 1), code, speed);
6418 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6419 && arm_const_double_rtx (XEXP (x, 1)))
6421 *total += rtx_cost (XEXP (x, 0), code, speed);
6427 *total = COSTS_N_INSNS (20);
6431 *total = COSTS_N_INSNS (1);
6432 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6433 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6435 *total += rtx_cost (XEXP (x, 1), code, speed);
6439 subcode = GET_CODE (XEXP (x, 1));
6440 if (subcode == ASHIFT || subcode == ASHIFTRT
6441 || subcode == LSHIFTRT
6442 || subcode == ROTATE || subcode == ROTATERT)
6444 *total += rtx_cost (XEXP (x, 0), code, speed);
6445 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6449 /* A shift as a part of RSB costs no more than RSB itself. */
6450 if (GET_CODE (XEXP (x, 0)) == MULT
6451 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6453 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6454 *total += rtx_cost (XEXP (x, 1), code, speed);
6459 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6461 *total += rtx_cost (XEXP (x, 0), code, speed);
6462 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6466 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6467 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6469 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6470 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6471 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6472 *total += COSTS_N_INSNS (1);
6480 if (code == PLUS && arm_arch6 && mode == SImode
6481 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6482 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6484 *total = COSTS_N_INSNS (1);
6485 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6487 *total += rtx_cost (XEXP (x, 1), code, speed);
6491 /* MLA: All arguments must be registers. We filter out
6492 multiplication by a power of two, so that we fall down into
6494 if (GET_CODE (XEXP (x, 0)) == MULT
6495 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6497 /* The cost comes from the cost of the multiply. */
6501 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6503 if (TARGET_HARD_FLOAT
6505 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6507 *total = COSTS_N_INSNS (1);
6508 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6509 && arm_const_double_rtx (XEXP (x, 1)))
6511 *total += rtx_cost (XEXP (x, 0), code, speed);
6518 *total = COSTS_N_INSNS (20);
6522 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6523 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6525 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6526 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6527 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6528 *total += COSTS_N_INSNS (1);
6534 case AND: case XOR: case IOR:
6537 /* Normally the frame registers will be spilt into reg+const during
6538 reload, so it is a bad idea to combine them with other instructions,
6539 since then they might not be moved outside of loops. As a compromise
6540 we allow integration with ops that have a constant as their second
6542 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6543 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6544 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6545 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6546 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6551 *total += COSTS_N_INSNS (2);
6552 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6553 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6555 *total += rtx_cost (XEXP (x, 0), code, speed);
6562 *total += COSTS_N_INSNS (1);
6563 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6564 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6566 *total += rtx_cost (XEXP (x, 0), code, speed);
6569 subcode = GET_CODE (XEXP (x, 0));
6570 if (subcode == ASHIFT || subcode == ASHIFTRT
6571 || subcode == LSHIFTRT
6572 || subcode == ROTATE || subcode == ROTATERT)
6574 *total += rtx_cost (XEXP (x, 1), code, speed);
6575 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6580 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6582 *total += rtx_cost (XEXP (x, 1), code, speed);
6583 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6587 if (subcode == UMIN || subcode == UMAX
6588 || subcode == SMIN || subcode == SMAX)
6590 *total = COSTS_N_INSNS (3);
6597 /* This should have been handled by the CPU specific routines. */
6601 if (arm_arch3m && mode == SImode
6602 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6603 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6604 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6605 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6606 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6607 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6609 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6612 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6616 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6618 if (TARGET_HARD_FLOAT
6620 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6622 *total = COSTS_N_INSNS (1);
6625 *total = COSTS_N_INSNS (2);
6631 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6632 if (mode == SImode && code == NOT)
6634 subcode = GET_CODE (XEXP (x, 0));
6635 if (subcode == ASHIFT || subcode == ASHIFTRT
6636 || subcode == LSHIFTRT
6637 || subcode == ROTATE || subcode == ROTATERT
6639 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6641 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6642 /* Register shifts cost an extra cycle. */
6643 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6644 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6653 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6655 *total = COSTS_N_INSNS (4);
6659 operand = XEXP (x, 0);
6661 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6662 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6663 && GET_CODE (XEXP (operand, 0)) == REG
6664 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6665 *total += COSTS_N_INSNS (1);
6666 *total += (rtx_cost (XEXP (x, 1), code, speed)
6667 + rtx_cost (XEXP (x, 2), code, speed));
6671 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6673 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6679 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6680 && mode == SImode && XEXP (x, 1) == const0_rtx)
6682 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6688 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6689 && mode == SImode && XEXP (x, 1) == const0_rtx)
6691 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6711 /* SCC insns. In the case where the comparison has already been
6712 performed, then they cost 2 instructions. Otherwise they need
6713 an additional comparison before them. */
6714 *total = COSTS_N_INSNS (2);
6715 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6722 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6728 *total += COSTS_N_INSNS (1);
6729 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6730 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6732 *total += rtx_cost (XEXP (x, 0), code, speed);
6736 subcode = GET_CODE (XEXP (x, 0));
6737 if (subcode == ASHIFT || subcode == ASHIFTRT
6738 || subcode == LSHIFTRT
6739 || subcode == ROTATE || subcode == ROTATERT)
6741 *total += rtx_cost (XEXP (x, 1), code, speed);
6742 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6747 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6749 *total += rtx_cost (XEXP (x, 1), code, speed);
6750 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6760 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6761 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6762 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6763 *total += rtx_cost (XEXP (x, 1), code, speed);
6767 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6769 if (TARGET_HARD_FLOAT
6771 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6773 *total = COSTS_N_INSNS (1);
6776 *total = COSTS_N_INSNS (20);
6779 *total = COSTS_N_INSNS (1);
6781 *total += COSTS_N_INSNS (3);
6785 if (GET_MODE_CLASS (mode) == MODE_INT)
6789 *total += COSTS_N_INSNS (1);
6791 if (GET_MODE (XEXP (x, 0)) != SImode)
6795 if (GET_CODE (XEXP (x, 0)) != MEM)
6796 *total += COSTS_N_INSNS (1);
6798 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6799 *total += COSTS_N_INSNS (2);
6808 if (GET_MODE_CLASS (mode) == MODE_INT)
6811 *total += COSTS_N_INSNS (1);
6813 if (GET_MODE (XEXP (x, 0)) != SImode)
6817 if (GET_CODE (XEXP (x, 0)) != MEM)
6818 *total += COSTS_N_INSNS (1);
6820 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6821 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6828 switch (GET_MODE (XEXP (x, 0)))
6835 *total = COSTS_N_INSNS (1);
6845 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6849 if (const_ok_for_arm (INTVAL (x))
6850 || const_ok_for_arm (~INTVAL (x)))
6851 *total = COSTS_N_INSNS (1);
6853 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6854 INTVAL (x), NULL_RTX,
6861 *total = COSTS_N_INSNS (3);
6865 *total = COSTS_N_INSNS (1);
6869 *total = COSTS_N_INSNS (1);
6870 *total += rtx_cost (XEXP (x, 0), code, speed);
6874 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6875 && (mode == SFmode || !TARGET_VFP_SINGLE))
6876 *total = COSTS_N_INSNS (1);
6878 *total = COSTS_N_INSNS (4);
6882 *total = COSTS_N_INSNS (4);
6887 /* RTX costs when optimizing for size. */
6889 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6892 enum machine_mode mode = GET_MODE (x);
6895 /* XXX TBD. For now, use the standard costs. */
6896 *total = thumb1_rtx_costs (x, code, outer_code);
6900 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6904 /* A memory access costs 1 insn if the mode is small, or the address is
6905 a single register, otherwise it costs one insn per word. */
6906 if (REG_P (XEXP (x, 0)))
6907 *total = COSTS_N_INSNS (1);
6909 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6916 /* Needs a libcall, so it costs about this. */
6917 *total = COSTS_N_INSNS (2);
6921 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6923 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6931 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6933 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6936 else if (mode == SImode)
6938 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6939 /* Slightly disparage register shifts, but not by much. */
6940 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6941 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6945 /* Needs a libcall. */
6946 *total = COSTS_N_INSNS (2);
6950 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6951 && (mode == SFmode || !TARGET_VFP_SINGLE))
6953 *total = COSTS_N_INSNS (1);
6959 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6960 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6962 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6963 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6964 || subcode1 == ROTATE || subcode1 == ROTATERT
6965 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6966 || subcode1 == ASHIFTRT)
6968 /* It's just the cost of the two operands. */
6973 *total = COSTS_N_INSNS (1);
6977 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6981 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6982 && (mode == SFmode || !TARGET_VFP_SINGLE))
6984 *total = COSTS_N_INSNS (1);
6988 /* A shift as a part of ADD costs nothing. */
6989 if (GET_CODE (XEXP (x, 0)) == MULT
6990 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6992 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6994 *total += rtx_cost (XEXP (x, 1), code, false);
6999 case AND: case XOR: case IOR:
7002 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7004 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7005 || subcode == LSHIFTRT || subcode == ASHIFTRT
7006 || (code == AND && subcode == NOT))
7008 /* It's just the cost of the two operands. */
7014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7018 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7022 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7023 && (mode == SFmode || !TARGET_VFP_SINGLE))
7025 *total = COSTS_N_INSNS (1);
7031 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7040 if (cc_register (XEXP (x, 0), VOIDmode))
7043 *total = COSTS_N_INSNS (1);
7047 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7048 && (mode == SFmode || !TARGET_VFP_SINGLE))
7049 *total = COSTS_N_INSNS (1);
7051 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7056 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7058 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7059 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7062 *total += COSTS_N_INSNS (1);
7067 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7069 switch (GET_MODE (XEXP (x, 0)))
7072 *total += COSTS_N_INSNS (1);
7076 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7082 *total += COSTS_N_INSNS (2);
7087 *total += COSTS_N_INSNS (1);
7092 if (const_ok_for_arm (INTVAL (x)))
7093 /* A multiplication by a constant requires another instruction
7094 to load the constant to a register. */
7095 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7097 else if (const_ok_for_arm (~INTVAL (x)))
7098 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7099 else if (const_ok_for_arm (-INTVAL (x)))
7101 if (outer_code == COMPARE || outer_code == PLUS
7102 || outer_code == MINUS)
7105 *total = COSTS_N_INSNS (1);
7108 *total = COSTS_N_INSNS (2);
7114 *total = COSTS_N_INSNS (2);
7118 *total = COSTS_N_INSNS (4);
7123 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7124 cost of these slightly. */
7125 *total = COSTS_N_INSNS (1) + 1;
7129 if (mode != VOIDmode)
7130 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7132 *total = COSTS_N_INSNS (4); /* How knows? */
7137 /* RTX costs when optimizing for size. */
7139 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7143 return arm_size_rtx_costs (x, (enum rtx_code) code,
7144 (enum rtx_code) outer_code, total);
7146 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7147 (enum rtx_code) outer_code,
7151 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7152 supported on any "slowmul" cores, so it can be ignored. */
7155 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7156 int *total, bool speed)
7158 enum machine_mode mode = GET_MODE (x);
7162 *total = thumb1_rtx_costs (x, code, outer_code);
7169 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7172 *total = COSTS_N_INSNS (20);
7176 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7178 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7179 & (unsigned HOST_WIDE_INT) 0xffffffff);
7180 int cost, const_ok = const_ok_for_arm (i);
7181 int j, booth_unit_size;
7183 /* Tune as appropriate. */
7184 cost = const_ok ? 4 : 8;
7185 booth_unit_size = 2;
7186 for (j = 0; i && j < 32; j += booth_unit_size)
7188 i >>= booth_unit_size;
7192 *total = COSTS_N_INSNS (cost);
7193 *total += rtx_cost (XEXP (x, 0), code, speed);
7197 *total = COSTS_N_INSNS (20);
7201 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7206 /* RTX cost for cores with a fast multiply unit (M variants). */
7209 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7210 int *total, bool speed)
7212 enum machine_mode mode = GET_MODE (x);
7216 *total = thumb1_rtx_costs (x, code, outer_code);
7220 /* ??? should thumb2 use different costs? */
7224 /* There is no point basing this on the tuning, since it is always the
7225 fast variant if it exists at all. */
7227 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7228 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7229 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7231 *total = COSTS_N_INSNS(2);
7238 *total = COSTS_N_INSNS (5);
7242 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7244 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7245 & (unsigned HOST_WIDE_INT) 0xffffffff);
7246 int cost, const_ok = const_ok_for_arm (i);
7247 int j, booth_unit_size;
7249 /* Tune as appropriate. */
7250 cost = const_ok ? 4 : 8;
7251 booth_unit_size = 8;
7252 for (j = 0; i && j < 32; j += booth_unit_size)
7254 i >>= booth_unit_size;
7258 *total = COSTS_N_INSNS(cost);
7264 *total = COSTS_N_INSNS (4);
7268 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7270 if (TARGET_HARD_FLOAT
7272 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7274 *total = COSTS_N_INSNS (1);
7279 /* Requires a lib call */
7280 *total = COSTS_N_INSNS (20);
7284 return arm_rtx_costs_1 (x, outer_code, total, speed);
7289 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7290 so it can be ignored. */
7293 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7295 enum machine_mode mode = GET_MODE (x);
7299 *total = thumb1_rtx_costs (x, code, outer_code);
7306 if (GET_CODE (XEXP (x, 0)) != MULT)
7307 return arm_rtx_costs_1 (x, outer_code, total, speed);
7309 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7310 will stall until the multiplication is complete. */
7311 *total = COSTS_N_INSNS (3);
7315 /* There is no point basing this on the tuning, since it is always the
7316 fast variant if it exists at all. */
7318 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7319 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7320 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7322 *total = COSTS_N_INSNS (2);
7329 *total = COSTS_N_INSNS (5);
7333 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7335 /* If operand 1 is a constant we can more accurately
7336 calculate the cost of the multiply. The multiplier can
7337 retire 15 bits on the first cycle and a further 12 on the
7338 second. We do, of course, have to load the constant into
7339 a register first. */
7340 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7341 /* There's a general overhead of one cycle. */
7343 unsigned HOST_WIDE_INT masked_const;
7348 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7350 masked_const = i & 0xffff8000;
7351 if (masked_const != 0)
7354 masked_const = i & 0xf8000000;
7355 if (masked_const != 0)
7358 *total = COSTS_N_INSNS (cost);
7364 *total = COSTS_N_INSNS (3);
7368 /* Requires a lib call */
7369 *total = COSTS_N_INSNS (20);
7373 return arm_rtx_costs_1 (x, outer_code, total, speed);
7378 /* RTX costs for 9e (and later) cores. */
7381 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7382 int *total, bool speed)
7384 enum machine_mode mode = GET_MODE (x);
7391 *total = COSTS_N_INSNS (3);
7395 *total = thumb1_rtx_costs (x, code, outer_code);
7403 /* There is no point basing this on the tuning, since it is always the
7404 fast variant if it exists at all. */
7406 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7407 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7408 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7410 *total = COSTS_N_INSNS (2);
7417 *total = COSTS_N_INSNS (5);
7423 *total = COSTS_N_INSNS (2);
7427 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7429 if (TARGET_HARD_FLOAT
7431 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7433 *total = COSTS_N_INSNS (1);
7438 *total = COSTS_N_INSNS (20);
7442 return arm_rtx_costs_1 (x, outer_code, total, speed);
7445 /* All address computations that can be done are free, but rtx cost returns
7446 the same for practically all of them. So we weight the different types
7447 of address here in the order (most pref first):
7448 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7450 arm_arm_address_cost (rtx x)
7452 enum rtx_code c = GET_CODE (x);
7454 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7456 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7461 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7464 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7474 arm_thumb_address_cost (rtx x)
7476 enum rtx_code c = GET_CODE (x);
7481 && GET_CODE (XEXP (x, 0)) == REG
7482 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7489 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7491 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7495 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7499 /* Some true dependencies can have a higher cost depending
7500 on precisely how certain input operands are used. */
7502 && REG_NOTE_KIND (link) == 0
7503 && recog_memoized (insn) >= 0
7504 && recog_memoized (dep) >= 0)
7506 int shift_opnum = get_attr_shift (insn);
7507 enum attr_type attr_type = get_attr_type (dep);
7509 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7510 operand for INSN. If we have a shifted input operand and the
7511 instruction we depend on is another ALU instruction, then we may
7512 have to account for an additional stall. */
7513 if (shift_opnum != 0
7514 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7516 rtx shifted_operand;
7519 /* Get the shifted operand. */
7520 extract_insn (insn);
7521 shifted_operand = recog_data.operand[shift_opnum];
7523 /* Iterate over all the operands in DEP. If we write an operand
7524 that overlaps with SHIFTED_OPERAND, then we have increase the
7525 cost of this dependency. */
7527 preprocess_constraints ();
7528 for (opno = 0; opno < recog_data.n_operands; opno++)
7530 /* We can ignore strict inputs. */
7531 if (recog_data.operand_type[opno] == OP_IN)
7534 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7541 /* XXX This is not strictly true for the FPA. */
7542 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7543 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7546 /* Call insns don't incur a stall, even if they follow a load. */
7547 if (REG_NOTE_KIND (link) == 0
7548 && GET_CODE (insn) == CALL_INSN)
7551 if ((i_pat = single_set (insn)) != NULL
7552 && GET_CODE (SET_SRC (i_pat)) == MEM
7553 && (d_pat = single_set (dep)) != NULL
7554 && GET_CODE (SET_DEST (d_pat)) == MEM)
7556 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7557 /* This is a load after a store, there is no conflict if the load reads
7558 from a cached area. Assume that loads from the stack, and from the
7559 constant pool are cached, and that others will miss. This is a
7562 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7563 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7564 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7565 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7572 static int fp_consts_inited = 0;
7574 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7575 static const char * const strings_fp[8] =
7578 "4", "5", "0.5", "10"
7581 static REAL_VALUE_TYPE values_fp[8];
7584 init_fp_table (void)
7590 fp_consts_inited = 1;
7592 fp_consts_inited = 8;
7594 for (i = 0; i < fp_consts_inited; i++)
7596 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7601 /* Return TRUE if rtx X is a valid immediate FP constant. */
7603 arm_const_double_rtx (rtx x)
7608 if (!fp_consts_inited)
7611 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7612 if (REAL_VALUE_MINUS_ZERO (r))
7615 for (i = 0; i < fp_consts_inited; i++)
7616 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7622 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7624 neg_const_double_rtx_ok_for_fpa (rtx x)
7629 if (!fp_consts_inited)
7632 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7633 r = REAL_VALUE_NEGATE (r);
7634 if (REAL_VALUE_MINUS_ZERO (r))
7637 for (i = 0; i < 8; i++)
7638 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7645 /* VFPv3 has a fairly wide range of representable immediates, formed from
7646 "quarter-precision" floating-point values. These can be evaluated using this
7647 formula (with ^ for exponentiation):
7651 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7652 16 <= n <= 31 and 0 <= r <= 7.
7654 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7656 - A (most-significant) is the sign bit.
7657 - BCD are the exponent (encoded as r XOR 3).
7658 - EFGH are the mantissa (encoded as n - 16).
7661 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7662 fconst[sd] instruction, or -1 if X isn't suitable. */
7664 vfp3_const_double_index (rtx x)
7666 REAL_VALUE_TYPE r, m;
7668 unsigned HOST_WIDE_INT mantissa, mant_hi;
7669 unsigned HOST_WIDE_INT mask;
7670 HOST_WIDE_INT m1, m2;
7671 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7673 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7678 /* We can't represent these things, so detect them first. */
7679 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7682 /* Extract sign, exponent and mantissa. */
7683 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7684 r = REAL_VALUE_ABS (r);
7685 exponent = REAL_EXP (&r);
7686 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7687 highest (sign) bit, with a fixed binary point at bit point_pos.
7688 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7689 bits for the mantissa, this may fail (low bits would be lost). */
7690 real_ldexp (&m, &r, point_pos - exponent);
7691 REAL_VALUE_TO_INT (&m1, &m2, m);
7695 /* If there are bits set in the low part of the mantissa, we can't
7696 represent this value. */
7700 /* Now make it so that mantissa contains the most-significant bits, and move
7701 the point_pos to indicate that the least-significant bits have been
7703 point_pos -= HOST_BITS_PER_WIDE_INT;
7706 /* We can permit four significant bits of mantissa only, plus a high bit
7707 which is always 1. */
7708 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7709 if ((mantissa & mask) != 0)
7712 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7713 mantissa >>= point_pos - 5;
7715 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7716 floating-point immediate zero with Neon using an integer-zero load, but
7717 that case is handled elsewhere.) */
7721 gcc_assert (mantissa >= 16 && mantissa <= 31);
7723 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7724 normalized significands are in the range [1, 2). (Our mantissa is shifted
7725 left 4 places at this point relative to normalized IEEE754 values). GCC
7726 internally uses [0.5, 1) (see real.c), so the exponent returned from
7727 REAL_EXP must be altered. */
7728 exponent = 5 - exponent;
7730 if (exponent < 0 || exponent > 7)
7733 /* Sign, mantissa and exponent are now in the correct form to plug into the
7734 formula described in the comment above. */
7735 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7738 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7740 vfp3_const_double_rtx (rtx x)
7745 return vfp3_const_double_index (x) != -1;
7748 /* Recognize immediates which can be used in various Neon instructions. Legal
7749 immediates are described by the following table (for VMVN variants, the
7750 bitwise inverse of the constant shown is recognized. In either case, VMOV
7751 is output and the correct instruction to use for a given constant is chosen
7752 by the assembler). The constant shown is replicated across all elements of
7753 the destination vector.
7755 insn elems variant constant (binary)
7756 ---- ----- ------- -----------------
7757 vmov i32 0 00000000 00000000 00000000 abcdefgh
7758 vmov i32 1 00000000 00000000 abcdefgh 00000000
7759 vmov i32 2 00000000 abcdefgh 00000000 00000000
7760 vmov i32 3 abcdefgh 00000000 00000000 00000000
7761 vmov i16 4 00000000 abcdefgh
7762 vmov i16 5 abcdefgh 00000000
7763 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7764 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7765 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7766 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7767 vmvn i16 10 00000000 abcdefgh
7768 vmvn i16 11 abcdefgh 00000000
7769 vmov i32 12 00000000 00000000 abcdefgh 11111111
7770 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7771 vmov i32 14 00000000 abcdefgh 11111111 11111111
7772 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7774 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7775 eeeeeeee ffffffff gggggggg hhhhhhhh
7776 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7778 For case 18, B = !b. Representable values are exactly those accepted by
7779 vfp3_const_double_index, but are output as floating-point numbers rather
7782 Variants 0-5 (inclusive) may also be used as immediates for the second
7783 operand of VORR/VBIC instructions.
7785 The INVERSE argument causes the bitwise inverse of the given operand to be
7786 recognized instead (used for recognizing legal immediates for the VAND/VORN
7787 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7788 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7789 output, rather than the real insns vbic/vorr).
7791 INVERSE makes no difference to the recognition of float vectors.
7793 The return value is the variant of immediate as shown in the above table, or
7794 -1 if the given value doesn't match any of the listed patterns.
7797 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7798 rtx *modconst, int *elementwidth)
7800 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7802 for (i = 0; i < idx; i += (STRIDE)) \
7807 immtype = (CLASS); \
7808 elsize = (ELSIZE); \
7812 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7813 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7814 unsigned char bytes[16];
7815 int immtype = -1, matches;
7816 unsigned int invmask = inverse ? 0xff : 0;
7818 /* Vectors of float constants. */
7819 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7821 rtx el0 = CONST_VECTOR_ELT (op, 0);
7824 if (!vfp3_const_double_rtx (el0))
7827 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7829 for (i = 1; i < n_elts; i++)
7831 rtx elt = CONST_VECTOR_ELT (op, i);
7834 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7836 if (!REAL_VALUES_EQUAL (r0, re))
7841 *modconst = CONST_VECTOR_ELT (op, 0);
7849 /* Splat vector constant out into a byte vector. */
7850 for (i = 0; i < n_elts; i++)
7852 rtx el = CONST_VECTOR_ELT (op, i);
7853 unsigned HOST_WIDE_INT elpart;
7854 unsigned int part, parts;
7856 if (GET_CODE (el) == CONST_INT)
7858 elpart = INTVAL (el);
7861 else if (GET_CODE (el) == CONST_DOUBLE)
7863 elpart = CONST_DOUBLE_LOW (el);
7869 for (part = 0; part < parts; part++)
7872 for (byte = 0; byte < innersize; byte++)
7874 bytes[idx++] = (elpart & 0xff) ^ invmask;
7875 elpart >>= BITS_PER_UNIT;
7877 if (GET_CODE (el) == CONST_DOUBLE)
7878 elpart = CONST_DOUBLE_HIGH (el);
7883 gcc_assert (idx == GET_MODE_SIZE (mode));
7887 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7888 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7890 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7891 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7893 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7894 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7896 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7897 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7899 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7901 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7903 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7904 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7906 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7907 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7909 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7910 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7912 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7913 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7915 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7917 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7919 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7920 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7922 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7923 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7925 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7926 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7928 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7929 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7931 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7933 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7934 && bytes[i] == bytes[(i + 8) % idx]);
7942 *elementwidth = elsize;
7946 unsigned HOST_WIDE_INT imm = 0;
7948 /* Un-invert bytes of recognized vector, if necessary. */
7950 for (i = 0; i < idx; i++)
7951 bytes[i] ^= invmask;
7955 /* FIXME: Broken on 32-bit H_W_I hosts. */
7956 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7958 for (i = 0; i < 8; i++)
7959 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7960 << (i * BITS_PER_UNIT);
7962 *modconst = GEN_INT (imm);
7966 unsigned HOST_WIDE_INT imm = 0;
7968 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7969 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7971 *modconst = GEN_INT (imm);
7979 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7980 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7981 float elements), and a modified constant (whatever should be output for a
7982 VMOV) in *MODCONST. */
7985 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7986 rtx *modconst, int *elementwidth)
7990 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7996 *modconst = tmpconst;
7999 *elementwidth = tmpwidth;
8004 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8005 the immediate is valid, write a constant suitable for using as an operand
8006 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8007 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8010 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8011 rtx *modconst, int *elementwidth)
8015 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8017 if (retval < 0 || retval > 5)
8021 *modconst = tmpconst;
8024 *elementwidth = tmpwidth;
8029 /* Return a string suitable for output of Neon immediate logic operation
8033 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8034 int inverse, int quad)
8036 int width, is_valid;
8037 static char templ[40];
8039 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8041 gcc_assert (is_valid != 0);
8044 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8046 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8051 /* Output a sequence of pairwise operations to implement a reduction.
8052 NOTE: We do "too much work" here, because pairwise operations work on two
8053 registers-worth of operands in one go. Unfortunately we can't exploit those
8054 extra calculations to do the full operation in fewer steps, I don't think.
8055 Although all vector elements of the result but the first are ignored, we
8056 actually calculate the same result in each of the elements. An alternative
8057 such as initially loading a vector with zero to use as each of the second
8058 operands would use up an additional register and take an extra instruction,
8059 for no particular gain. */
8062 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8063 rtx (*reduc) (rtx, rtx, rtx))
8065 enum machine_mode inner = GET_MODE_INNER (mode);
8066 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8069 for (i = parts / 2; i >= 1; i /= 2)
8071 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8072 emit_insn (reduc (dest, tmpsum, tmpsum));
8077 /* If VALS is a vector constant that can be loaded into a register
8078 using VDUP, generate instructions to do so and return an RTX to
8079 assign to the register. Otherwise return NULL_RTX. */
8082 neon_vdup_constant (rtx vals)
8084 enum machine_mode mode = GET_MODE (vals);
8085 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8086 int n_elts = GET_MODE_NUNITS (mode);
8087 bool all_same = true;
8091 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8094 for (i = 0; i < n_elts; ++i)
8096 x = XVECEXP (vals, 0, i);
8097 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8102 /* The elements are not all the same. We could handle repeating
8103 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8104 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8108 /* We can load this constant by using VDUP and a constant in a
8109 single ARM register. This will be cheaper than a vector
8112 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8113 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8117 /* Generate code to load VALS, which is a PARALLEL containing only
8118 constants (for vec_init) or CONST_VECTOR, efficiently into a
8119 register. Returns an RTX to copy into the register, or NULL_RTX
8120 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8123 neon_make_constant (rtx vals)
8125 enum machine_mode mode = GET_MODE (vals);
8127 rtx const_vec = NULL_RTX;
8128 int n_elts = GET_MODE_NUNITS (mode);
8132 if (GET_CODE (vals) == CONST_VECTOR)
8134 else if (GET_CODE (vals) == PARALLEL)
8136 /* A CONST_VECTOR must contain only CONST_INTs and
8137 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8138 Only store valid constants in a CONST_VECTOR. */
8139 for (i = 0; i < n_elts; ++i)
8141 rtx x = XVECEXP (vals, 0, i);
8142 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8145 if (n_const == n_elts)
8146 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8151 if (const_vec != NULL
8152 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8153 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8155 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8156 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8157 pipeline cycle; creating the constant takes one or two ARM
8160 else if (const_vec != NULL_RTX)
8161 /* Load from constant pool. On Cortex-A8 this takes two cycles
8162 (for either double or quad vectors). We can not take advantage
8163 of single-cycle VLD1 because we need a PC-relative addressing
8167 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8168 We can not construct an initializer. */
8172 /* Initialize vector TARGET to VALS. */
8175 neon_expand_vector_init (rtx target, rtx vals)
8177 enum machine_mode mode = GET_MODE (target);
8178 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8179 int n_elts = GET_MODE_NUNITS (mode);
8180 int n_var = 0, one_var = -1;
8181 bool all_same = true;
8185 for (i = 0; i < n_elts; ++i)
8187 x = XVECEXP (vals, 0, i);
8188 if (!CONSTANT_P (x))
8189 ++n_var, one_var = i;
8191 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8197 rtx constant = neon_make_constant (vals);
8198 if (constant != NULL_RTX)
8200 emit_move_insn (target, constant);
8205 /* Splat a single non-constant element if we can. */
8206 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8208 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8209 emit_insn (gen_rtx_SET (VOIDmode, target,
8210 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8215 /* One field is non-constant. Load constant then overwrite varying
8216 field. This is more efficient than using the stack. */
8219 rtx copy = copy_rtx (vals);
8222 /* Load constant part of vector, substitute neighboring value for
8224 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8225 neon_expand_vector_init (target, copy);
8227 /* Insert variable. */
8228 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8229 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8230 emit_insn (gen_rtx_SET (VOIDmode, target,
8231 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8235 /* Construct the vector in memory one field at a time
8236 and load the whole vector. */
8237 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8238 for (i = 0; i < n_elts; i++)
8239 emit_move_insn (adjust_address_nv (mem, inner_mode,
8240 i * GET_MODE_SIZE (inner_mode)),
8241 XVECEXP (vals, 0, i));
8242 emit_move_insn (target, mem);
8245 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8246 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8247 reported source locations are bogus. */
8250 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8255 gcc_assert (GET_CODE (operand) == CONST_INT);
8257 lane = INTVAL (operand);
8259 if (lane < low || lane >= high)
8263 /* Bounds-check lanes. */
8266 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8268 bounds_check (operand, low, high, "lane out of range");
8271 /* Bounds-check constants. */
8274 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8276 bounds_check (operand, low, high, "constant out of range");
8280 neon_element_bits (enum machine_mode mode)
8283 return GET_MODE_BITSIZE (mode);
8285 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8289 /* Predicates for `match_operand' and `match_operator'. */
8291 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8293 cirrus_memory_offset (rtx op)
8295 /* Reject eliminable registers. */
8296 if (! (reload_in_progress || reload_completed)
8297 && ( reg_mentioned_p (frame_pointer_rtx, op)
8298 || reg_mentioned_p (arg_pointer_rtx, op)
8299 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8300 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8301 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8302 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8305 if (GET_CODE (op) == MEM)
8311 /* Match: (mem (reg)). */
8312 if (GET_CODE (ind) == REG)
8318 if (GET_CODE (ind) == PLUS
8319 && GET_CODE (XEXP (ind, 0)) == REG
8320 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8321 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8328 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8329 WB is true if full writeback address modes are allowed and is false
8330 if limited writeback address modes (POST_INC and PRE_DEC) are
8334 arm_coproc_mem_operand (rtx op, bool wb)
8338 /* Reject eliminable registers. */
8339 if (! (reload_in_progress || reload_completed)
8340 && ( reg_mentioned_p (frame_pointer_rtx, op)
8341 || reg_mentioned_p (arg_pointer_rtx, op)
8342 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8343 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8344 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8345 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8348 /* Constants are converted into offsets from labels. */
8349 if (GET_CODE (op) != MEM)
8354 if (reload_completed
8355 && (GET_CODE (ind) == LABEL_REF
8356 || (GET_CODE (ind) == CONST
8357 && GET_CODE (XEXP (ind, 0)) == PLUS
8358 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8359 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8362 /* Match: (mem (reg)). */
8363 if (GET_CODE (ind) == REG)
8364 return arm_address_register_rtx_p (ind, 0);
8366 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8367 acceptable in any case (subject to verification by
8368 arm_address_register_rtx_p). We need WB to be true to accept
8369 PRE_INC and POST_DEC. */
8370 if (GET_CODE (ind) == POST_INC
8371 || GET_CODE (ind) == PRE_DEC
8373 && (GET_CODE (ind) == PRE_INC
8374 || GET_CODE (ind) == POST_DEC)))
8375 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8378 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8379 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8380 && GET_CODE (XEXP (ind, 1)) == PLUS
8381 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8382 ind = XEXP (ind, 1);
8387 if (GET_CODE (ind) == PLUS
8388 && GET_CODE (XEXP (ind, 0)) == REG
8389 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8390 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8391 && INTVAL (XEXP (ind, 1)) > -1024
8392 && INTVAL (XEXP (ind, 1)) < 1024
8393 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8399 /* Return TRUE if OP is a memory operand which we can load or store a vector
8400 to/from. TYPE is one of the following values:
8401 0 - Vector load/stor (vldr)
8402 1 - Core registers (ldm)
8403 2 - Element/structure loads (vld1)
8406 neon_vector_mem_operand (rtx op, int type)
8410 /* Reject eliminable registers. */
8411 if (! (reload_in_progress || reload_completed)
8412 && ( reg_mentioned_p (frame_pointer_rtx, op)
8413 || reg_mentioned_p (arg_pointer_rtx, op)
8414 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8415 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8416 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8417 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8420 /* Constants are converted into offsets from labels. */
8421 if (GET_CODE (op) != MEM)
8426 if (reload_completed
8427 && (GET_CODE (ind) == LABEL_REF
8428 || (GET_CODE (ind) == CONST
8429 && GET_CODE (XEXP (ind, 0)) == PLUS
8430 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8431 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8434 /* Match: (mem (reg)). */
8435 if (GET_CODE (ind) == REG)
8436 return arm_address_register_rtx_p (ind, 0);
8438 /* Allow post-increment with Neon registers. */
8439 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8440 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8442 /* FIXME: vld1 allows register post-modify. */
8448 && GET_CODE (ind) == PLUS
8449 && GET_CODE (XEXP (ind, 0)) == REG
8450 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8451 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8452 && INTVAL (XEXP (ind, 1)) > -1024
8453 && INTVAL (XEXP (ind, 1)) < 1016
8454 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8460 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8463 neon_struct_mem_operand (rtx op)
8467 /* Reject eliminable registers. */
8468 if (! (reload_in_progress || reload_completed)
8469 && ( reg_mentioned_p (frame_pointer_rtx, op)
8470 || reg_mentioned_p (arg_pointer_rtx, op)
8471 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8472 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8473 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8474 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8477 /* Constants are converted into offsets from labels. */
8478 if (GET_CODE (op) != MEM)
8483 if (reload_completed
8484 && (GET_CODE (ind) == LABEL_REF
8485 || (GET_CODE (ind) == CONST
8486 && GET_CODE (XEXP (ind, 0)) == PLUS
8487 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8488 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8491 /* Match: (mem (reg)). */
8492 if (GET_CODE (ind) == REG)
8493 return arm_address_register_rtx_p (ind, 0);
8498 /* Return true if X is a register that will be eliminated later on. */
8500 arm_eliminable_register (rtx x)
8502 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8503 || REGNO (x) == ARG_POINTER_REGNUM
8504 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8505 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8508 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8509 coprocessor registers. Otherwise return NO_REGS. */
8512 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8516 if (!TARGET_NEON_FP16)
8517 return GENERAL_REGS;
8518 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8520 return GENERAL_REGS;
8524 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8525 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8526 && neon_vector_mem_operand (x, 0))
8529 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8532 return GENERAL_REGS;
8535 /* Values which must be returned in the most-significant end of the return
8539 arm_return_in_msb (const_tree valtype)
8541 return (TARGET_AAPCS_BASED
8543 && (AGGREGATE_TYPE_P (valtype)
8544 || TREE_CODE (valtype) == COMPLEX_TYPE));
8547 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8548 Use by the Cirrus Maverick code which has to workaround
8549 a hardware bug triggered by such instructions. */
8551 arm_memory_load_p (rtx insn)
8553 rtx body, lhs, rhs;;
8555 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8558 body = PATTERN (insn);
8560 if (GET_CODE (body) != SET)
8563 lhs = XEXP (body, 0);
8564 rhs = XEXP (body, 1);
8566 lhs = REG_OR_SUBREG_RTX (lhs);
8568 /* If the destination is not a general purpose
8569 register we do not have to worry. */
8570 if (GET_CODE (lhs) != REG
8571 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8574 /* As well as loads from memory we also have to react
8575 to loads of invalid constants which will be turned
8576 into loads from the minipool. */
8577 return (GET_CODE (rhs) == MEM
8578 || GET_CODE (rhs) == SYMBOL_REF
8579 || note_invalid_constants (insn, -1, false));
8582 /* Return TRUE if INSN is a Cirrus instruction. */
8584 arm_cirrus_insn_p (rtx insn)
8586 enum attr_cirrus attr;
8588 /* get_attr cannot accept USE or CLOBBER. */
8590 || GET_CODE (insn) != INSN
8591 || GET_CODE (PATTERN (insn)) == USE
8592 || GET_CODE (PATTERN (insn)) == CLOBBER)
8595 attr = get_attr_cirrus (insn);
8597 return attr != CIRRUS_NOT;
8600 /* Cirrus reorg for invalid instruction combinations. */
8602 cirrus_reorg (rtx first)
8604 enum attr_cirrus attr;
8605 rtx body = PATTERN (first);
8609 /* Any branch must be followed by 2 non Cirrus instructions. */
8610 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8613 t = next_nonnote_insn (first);
8615 if (arm_cirrus_insn_p (t))
8618 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8622 emit_insn_after (gen_nop (), first);
8627 /* (float (blah)) is in parallel with a clobber. */
8628 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8629 body = XVECEXP (body, 0, 0);
8631 if (GET_CODE (body) == SET)
8633 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8635 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8636 be followed by a non Cirrus insn. */
8637 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8639 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8640 emit_insn_after (gen_nop (), first);
8644 else if (arm_memory_load_p (first))
8646 unsigned int arm_regno;
8648 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8649 ldr/cfmv64hr combination where the Rd field is the same
8650 in both instructions must be split with a non Cirrus
8657 /* Get Arm register number for ldr insn. */
8658 if (GET_CODE (lhs) == REG)
8659 arm_regno = REGNO (lhs);
8662 gcc_assert (GET_CODE (rhs) == REG);
8663 arm_regno = REGNO (rhs);
8667 first = next_nonnote_insn (first);
8669 if (! arm_cirrus_insn_p (first))
8672 body = PATTERN (first);
8674 /* (float (blah)) is in parallel with a clobber. */
8675 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8676 body = XVECEXP (body, 0, 0);
8678 if (GET_CODE (body) == FLOAT)
8679 body = XEXP (body, 0);
8681 if (get_attr_cirrus (first) == CIRRUS_MOVE
8682 && GET_CODE (XEXP (body, 1)) == REG
8683 && arm_regno == REGNO (XEXP (body, 1)))
8684 emit_insn_after (gen_nop (), first);
8690 /* get_attr cannot accept USE or CLOBBER. */
8692 || GET_CODE (first) != INSN
8693 || GET_CODE (PATTERN (first)) == USE
8694 || GET_CODE (PATTERN (first)) == CLOBBER)
8697 attr = get_attr_cirrus (first);
8699 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8700 must be followed by a non-coprocessor instruction. */
8701 if (attr == CIRRUS_COMPARE)
8705 t = next_nonnote_insn (first);
8707 if (arm_cirrus_insn_p (t))
8710 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8714 emit_insn_after (gen_nop (), first);
8720 /* Return TRUE if X references a SYMBOL_REF. */
8722 symbol_mentioned_p (rtx x)
8727 if (GET_CODE (x) == SYMBOL_REF)
8730 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8731 are constant offsets, not symbols. */
8732 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8735 fmt = GET_RTX_FORMAT (GET_CODE (x));
8737 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8743 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8744 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8747 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8754 /* Return TRUE if X references a LABEL_REF. */
8756 label_mentioned_p (rtx x)
8761 if (GET_CODE (x) == LABEL_REF)
8764 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8765 instruction, but they are constant offsets, not symbols. */
8766 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8769 fmt = GET_RTX_FORMAT (GET_CODE (x));
8770 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8776 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8777 if (label_mentioned_p (XVECEXP (x, i, j)))
8780 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8788 tls_mentioned_p (rtx x)
8790 switch (GET_CODE (x))
8793 return tls_mentioned_p (XEXP (x, 0));
8796 if (XINT (x, 1) == UNSPEC_TLS)
8804 /* Must not copy any rtx that uses a pc-relative address. */
8807 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8809 if (GET_CODE (*x) == UNSPEC
8810 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8816 arm_cannot_copy_insn_p (rtx insn)
8818 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8824 enum rtx_code code = GET_CODE (x);
8841 /* Return 1 if memory locations are adjacent. */
8843 adjacent_mem_locations (rtx a, rtx b)
8845 /* We don't guarantee to preserve the order of these memory refs. */
8846 if (volatile_refs_p (a) || volatile_refs_p (b))
8849 if ((GET_CODE (XEXP (a, 0)) == REG
8850 || (GET_CODE (XEXP (a, 0)) == PLUS
8851 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8852 && (GET_CODE (XEXP (b, 0)) == REG
8853 || (GET_CODE (XEXP (b, 0)) == PLUS
8854 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8856 HOST_WIDE_INT val0 = 0, val1 = 0;
8860 if (GET_CODE (XEXP (a, 0)) == PLUS)
8862 reg0 = XEXP (XEXP (a, 0), 0);
8863 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8868 if (GET_CODE (XEXP (b, 0)) == PLUS)
8870 reg1 = XEXP (XEXP (b, 0), 0);
8871 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8876 /* Don't accept any offset that will require multiple
8877 instructions to handle, since this would cause the
8878 arith_adjacentmem pattern to output an overlong sequence. */
8879 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8882 /* Don't allow an eliminable register: register elimination can make
8883 the offset too large. */
8884 if (arm_eliminable_register (reg0))
8887 val_diff = val1 - val0;
8891 /* If the target has load delay slots, then there's no benefit
8892 to using an ldm instruction unless the offset is zero and
8893 we are optimizing for size. */
8894 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8895 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8896 && (val_diff == 4 || val_diff == -4));
8899 return ((REGNO (reg0) == REGNO (reg1))
8900 && (val_diff == 4 || val_diff == -4));
8907 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8908 HOST_WIDE_INT *load_offset)
8910 int unsorted_regs[4];
8911 HOST_WIDE_INT unsorted_offsets[4];
8916 /* Can only handle 2, 3, or 4 insns at present,
8917 though could be easily extended if required. */
8918 gcc_assert (nops >= 2 && nops <= 4);
8920 memset (order, 0, 4 * sizeof (int));
8922 /* Loop over the operands and check that the memory references are
8923 suitable (i.e. immediate offsets from the same base register). At
8924 the same time, extract the target register, and the memory
8926 for (i = 0; i < nops; i++)
8931 /* Convert a subreg of a mem into the mem itself. */
8932 if (GET_CODE (operands[nops + i]) == SUBREG)
8933 operands[nops + i] = alter_subreg (operands + (nops + i));
8935 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8937 /* Don't reorder volatile memory references; it doesn't seem worth
8938 looking for the case where the order is ok anyway. */
8939 if (MEM_VOLATILE_P (operands[nops + i]))
8942 offset = const0_rtx;
8944 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8945 || (GET_CODE (reg) == SUBREG
8946 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8947 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8948 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8950 || (GET_CODE (reg) == SUBREG
8951 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8952 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8957 base_reg = REGNO (reg);
8958 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8959 ? REGNO (operands[i])
8960 : REGNO (SUBREG_REG (operands[i])));
8965 if (base_reg != (int) REGNO (reg))
8966 /* Not addressed from the same base register. */
8969 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8970 ? REGNO (operands[i])
8971 : REGNO (SUBREG_REG (operands[i])));
8972 if (unsorted_regs[i] < unsorted_regs[order[0]])
8976 /* If it isn't an integer register, or if it overwrites the
8977 base register but isn't the last insn in the list, then
8978 we can't do this. */
8979 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8980 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8983 unsorted_offsets[i] = INTVAL (offset);
8986 /* Not a suitable memory address. */
8990 /* All the useful information has now been extracted from the
8991 operands into unsorted_regs and unsorted_offsets; additionally,
8992 order[0] has been set to the lowest numbered register in the
8993 list. Sort the registers into order, and check that the memory
8994 offsets are ascending and adjacent. */
8996 for (i = 1; i < nops; i++)
9000 order[i] = order[i - 1];
9001 for (j = 0; j < nops; j++)
9002 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9003 && (order[i] == order[i - 1]
9004 || unsorted_regs[j] < unsorted_regs[order[i]]))
9007 /* Have we found a suitable register? if not, one must be used more
9009 if (order[i] == order[i - 1])
9012 /* Is the memory address adjacent and ascending? */
9013 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9021 for (i = 0; i < nops; i++)
9022 regs[i] = unsorted_regs[order[i]];
9024 *load_offset = unsorted_offsets[order[0]];
9027 if (unsorted_offsets[order[0]] == 0)
9028 return 1; /* ldmia */
9030 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9031 return 2; /* ldmib */
9033 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9034 return 3; /* ldmda */
9036 if (unsorted_offsets[order[nops - 1]] == -4)
9037 return 4; /* ldmdb */
9039 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9040 if the offset isn't small enough. The reason 2 ldrs are faster
9041 is because these ARMs are able to do more than one cache access
9042 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9043 whilst the ARM8 has a double bandwidth cache. This means that
9044 these cores can do both an instruction fetch and a data fetch in
9045 a single cycle, so the trick of calculating the address into a
9046 scratch register (one of the result regs) and then doing a load
9047 multiple actually becomes slower (and no smaller in code size).
9048 That is the transformation
9050 ldr rd1, [rbase + offset]
9051 ldr rd2, [rbase + offset + 4]
9055 add rd1, rbase, offset
9056 ldmia rd1, {rd1, rd2}
9058 produces worse code -- '3 cycles + any stalls on rd2' instead of
9059 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9060 access per cycle, the first sequence could never complete in less
9061 than 6 cycles, whereas the ldm sequence would only take 5 and
9062 would make better use of sequential accesses if not hitting the
9065 We cheat here and test 'arm_ld_sched' which we currently know to
9066 only be true for the ARM8, ARM9 and StrongARM. If this ever
9067 changes, then the test below needs to be reworked. */
9068 if (nops == 2 && arm_ld_sched)
9071 /* Can't do it without setting up the offset, only do this if it takes
9072 no more than one insn. */
9073 return (const_ok_for_arm (unsorted_offsets[order[0]])
9074 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9078 emit_ldm_seq (rtx *operands, int nops)
9082 HOST_WIDE_INT offset;
9086 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9089 strcpy (buf, "ldm%(ia%)\t");
9093 strcpy (buf, "ldm%(ib%)\t");
9097 strcpy (buf, "ldm%(da%)\t");
9101 strcpy (buf, "ldm%(db%)\t");
9106 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9107 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9110 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9111 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9113 output_asm_insn (buf, operands);
9115 strcpy (buf, "ldm%(ia%)\t");
9122 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9123 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9125 for (i = 1; i < nops; i++)
9126 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9127 reg_names[regs[i]]);
9129 strcat (buf, "}\t%@ phole ldm");
9131 output_asm_insn (buf, operands);
9136 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9137 HOST_WIDE_INT * load_offset)
9139 int unsorted_regs[4];
9140 HOST_WIDE_INT unsorted_offsets[4];
9145 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9146 extended if required. */
9147 gcc_assert (nops >= 2 && nops <= 4);
9149 memset (order, 0, 4 * sizeof (int));
9151 /* Loop over the operands and check that the memory references are
9152 suitable (i.e. immediate offsets from the same base register). At
9153 the same time, extract the target register, and the memory
9155 for (i = 0; i < nops; i++)
9160 /* Convert a subreg of a mem into the mem itself. */
9161 if (GET_CODE (operands[nops + i]) == SUBREG)
9162 operands[nops + i] = alter_subreg (operands + (nops + i));
9164 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9166 /* Don't reorder volatile memory references; it doesn't seem worth
9167 looking for the case where the order is ok anyway. */
9168 if (MEM_VOLATILE_P (operands[nops + i]))
9171 offset = const0_rtx;
9173 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9174 || (GET_CODE (reg) == SUBREG
9175 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9176 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9177 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9179 || (GET_CODE (reg) == SUBREG
9180 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9181 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9186 base_reg = REGNO (reg);
9187 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9188 ? REGNO (operands[i])
9189 : REGNO (SUBREG_REG (operands[i])));
9194 if (base_reg != (int) REGNO (reg))
9195 /* Not addressed from the same base register. */
9198 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9199 ? REGNO (operands[i])
9200 : REGNO (SUBREG_REG (operands[i])));
9201 if (unsorted_regs[i] < unsorted_regs[order[0]])
9205 /* If it isn't an integer register, then we can't do this. */
9206 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9209 unsorted_offsets[i] = INTVAL (offset);
9212 /* Not a suitable memory address. */
9216 /* All the useful information has now been extracted from the
9217 operands into unsorted_regs and unsorted_offsets; additionally,
9218 order[0] has been set to the lowest numbered register in the
9219 list. Sort the registers into order, and check that the memory
9220 offsets are ascending and adjacent. */
9222 for (i = 1; i < nops; i++)
9226 order[i] = order[i - 1];
9227 for (j = 0; j < nops; j++)
9228 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9229 && (order[i] == order[i - 1]
9230 || unsorted_regs[j] < unsorted_regs[order[i]]))
9233 /* Have we found a suitable register? if not, one must be used more
9235 if (order[i] == order[i - 1])
9238 /* Is the memory address adjacent and ascending? */
9239 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9247 for (i = 0; i < nops; i++)
9248 regs[i] = unsorted_regs[order[i]];
9250 *load_offset = unsorted_offsets[order[0]];
9253 if (unsorted_offsets[order[0]] == 0)
9254 return 1; /* stmia */
9256 if (unsorted_offsets[order[0]] == 4)
9257 return 2; /* stmib */
9259 if (unsorted_offsets[order[nops - 1]] == 0)
9260 return 3; /* stmda */
9262 if (unsorted_offsets[order[nops - 1]] == -4)
9263 return 4; /* stmdb */
9269 emit_stm_seq (rtx *operands, int nops)
9273 HOST_WIDE_INT offset;
9277 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9280 strcpy (buf, "stm%(ia%)\t");
9284 strcpy (buf, "stm%(ib%)\t");
9288 strcpy (buf, "stm%(da%)\t");
9292 strcpy (buf, "stm%(db%)\t");
9299 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9300 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9302 for (i = 1; i < nops; i++)
9303 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9304 reg_names[regs[i]]);
9306 strcat (buf, "}\t%@ phole stm");
9308 output_asm_insn (buf, operands);
9312 /* Routines for use in generating RTL. */
9315 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9316 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9318 HOST_WIDE_INT offset = *offsetp;
9321 int sign = up ? 1 : -1;
9324 /* XScale has load-store double instructions, but they have stricter
9325 alignment requirements than load-store multiple, so we cannot
9328 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9329 the pipeline until completion.
9337 An ldr instruction takes 1-3 cycles, but does not block the
9346 Best case ldr will always win. However, the more ldr instructions
9347 we issue, the less likely we are to be able to schedule them well.
9348 Using ldr instructions also increases code size.
9350 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9351 for counts of 3 or 4 regs. */
9352 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9358 for (i = 0; i < count; i++)
9360 addr = plus_constant (from, i * 4 * sign);
9361 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9362 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9368 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9378 result = gen_rtx_PARALLEL (VOIDmode,
9379 rtvec_alloc (count + (write_back ? 1 : 0)));
9382 XVECEXP (result, 0, 0)
9383 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9388 for (j = 0; i < count; i++, j++)
9390 addr = plus_constant (from, j * 4 * sign);
9391 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9392 XVECEXP (result, 0, i)
9393 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9404 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9405 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9407 HOST_WIDE_INT offset = *offsetp;
9410 int sign = up ? 1 : -1;
9413 /* See arm_gen_load_multiple for discussion of
9414 the pros/cons of ldm/stm usage for XScale. */
9415 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9421 for (i = 0; i < count; i++)
9423 addr = plus_constant (to, i * 4 * sign);
9424 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9425 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9431 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9441 result = gen_rtx_PARALLEL (VOIDmode,
9442 rtvec_alloc (count + (write_back ? 1 : 0)));
9445 XVECEXP (result, 0, 0)
9446 = gen_rtx_SET (VOIDmode, to,
9447 plus_constant (to, count * 4 * sign));
9452 for (j = 0; i < count; i++, j++)
9454 addr = plus_constant (to, j * 4 * sign);
9455 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9456 XVECEXP (result, 0, i)
9457 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9468 arm_gen_movmemqi (rtx *operands)
9470 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9471 HOST_WIDE_INT srcoffset, dstoffset;
9473 rtx src, dst, srcbase, dstbase;
9474 rtx part_bytes_reg = NULL;
9477 if (GET_CODE (operands[2]) != CONST_INT
9478 || GET_CODE (operands[3]) != CONST_INT
9479 || INTVAL (operands[2]) > 64
9480 || INTVAL (operands[3]) & 3)
9483 dstbase = operands[0];
9484 srcbase = operands[1];
9486 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9487 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9489 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9490 out_words_to_go = INTVAL (operands[2]) / 4;
9491 last_bytes = INTVAL (operands[2]) & 3;
9492 dstoffset = srcoffset = 0;
9494 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9495 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9497 for (i = 0; in_words_to_go >= 2; i+=4)
9499 if (in_words_to_go > 4)
9500 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9501 srcbase, &srcoffset));
9503 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9504 FALSE, srcbase, &srcoffset));
9506 if (out_words_to_go)
9508 if (out_words_to_go > 4)
9509 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9510 dstbase, &dstoffset));
9511 else if (out_words_to_go != 1)
9512 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9516 dstbase, &dstoffset));
9519 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9520 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9521 if (last_bytes != 0)
9523 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9529 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9530 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9533 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9534 if (out_words_to_go)
9538 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9539 sreg = copy_to_reg (mem);
9541 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9542 emit_move_insn (mem, sreg);
9545 gcc_assert (!in_words_to_go); /* Sanity check */
9550 gcc_assert (in_words_to_go > 0);
9552 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9553 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9556 gcc_assert (!last_bytes || part_bytes_reg);
9558 if (BYTES_BIG_ENDIAN && last_bytes)
9560 rtx tmp = gen_reg_rtx (SImode);
9562 /* The bytes we want are in the top end of the word. */
9563 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9564 GEN_INT (8 * (4 - last_bytes))));
9565 part_bytes_reg = tmp;
9569 mem = adjust_automodify_address (dstbase, QImode,
9570 plus_constant (dst, last_bytes - 1),
9571 dstoffset + last_bytes - 1);
9572 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9576 tmp = gen_reg_rtx (SImode);
9577 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9578 part_bytes_reg = tmp;
9587 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9588 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9592 rtx tmp = gen_reg_rtx (SImode);
9593 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9594 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9595 part_bytes_reg = tmp;
9602 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9603 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9610 /* Select a dominance comparison mode if possible for a test of the general
9611 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9612 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9613 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9614 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9615 In all cases OP will be either EQ or NE, but we don't need to know which
9616 here. If we are unable to support a dominance comparison we return
9617 CC mode. This will then fail to match for the RTL expressions that
9618 generate this call. */
9620 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9622 enum rtx_code cond1, cond2;
9625 /* Currently we will probably get the wrong result if the individual
9626 comparisons are not simple. This also ensures that it is safe to
9627 reverse a comparison if necessary. */
9628 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9630 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9634 /* The if_then_else variant of this tests the second condition if the
9635 first passes, but is true if the first fails. Reverse the first
9636 condition to get a true "inclusive-or" expression. */
9637 if (cond_or == DOM_CC_NX_OR_Y)
9638 cond1 = reverse_condition (cond1);
9640 /* If the comparisons are not equal, and one doesn't dominate the other,
9641 then we can't do this. */
9643 && !comparison_dominates_p (cond1, cond2)
9644 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9649 enum rtx_code temp = cond1;
9657 if (cond_or == DOM_CC_X_AND_Y)
9662 case EQ: return CC_DEQmode;
9663 case LE: return CC_DLEmode;
9664 case LEU: return CC_DLEUmode;
9665 case GE: return CC_DGEmode;
9666 case GEU: return CC_DGEUmode;
9667 default: gcc_unreachable ();
9671 if (cond_or == DOM_CC_X_AND_Y)
9687 if (cond_or == DOM_CC_X_AND_Y)
9703 if (cond_or == DOM_CC_X_AND_Y)
9719 if (cond_or == DOM_CC_X_AND_Y)
9734 /* The remaining cases only occur when both comparisons are the
9737 gcc_assert (cond1 == cond2);
9741 gcc_assert (cond1 == cond2);
9745 gcc_assert (cond1 == cond2);
9749 gcc_assert (cond1 == cond2);
9753 gcc_assert (cond1 == cond2);
9762 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9764 /* All floating point compares return CCFP if it is an equality
9765 comparison, and CCFPE otherwise. */
9766 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9786 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9795 /* A compare with a shifted operand. Because of canonicalization, the
9796 comparison will have to be swapped when we emit the assembler. */
9797 if (GET_MODE (y) == SImode
9798 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9799 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9800 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9801 || GET_CODE (x) == ROTATERT))
9804 /* This operation is performed swapped, but since we only rely on the Z
9805 flag we don't need an additional mode. */
9806 if (GET_MODE (y) == SImode
9807 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9808 && GET_CODE (x) == NEG
9809 && (op == EQ || op == NE))
9812 /* This is a special case that is used by combine to allow a
9813 comparison of a shifted byte load to be split into a zero-extend
9814 followed by a comparison of the shifted integer (only valid for
9815 equalities and unsigned inequalities). */
9816 if (GET_MODE (x) == SImode
9817 && GET_CODE (x) == ASHIFT
9818 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9819 && GET_CODE (XEXP (x, 0)) == SUBREG
9820 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9821 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9822 && (op == EQ || op == NE
9823 || op == GEU || op == GTU || op == LTU || op == LEU)
9824 && GET_CODE (y) == CONST_INT)
9827 /* A construct for a conditional compare, if the false arm contains
9828 0, then both conditions must be true, otherwise either condition
9829 must be true. Not all conditions are possible, so CCmode is
9830 returned if it can't be done. */
9831 if (GET_CODE (x) == IF_THEN_ELSE
9832 && (XEXP (x, 2) == const0_rtx
9833 || XEXP (x, 2) == const1_rtx)
9834 && COMPARISON_P (XEXP (x, 0))
9835 && COMPARISON_P (XEXP (x, 1)))
9836 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9837 INTVAL (XEXP (x, 2)));
9839 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9840 if (GET_CODE (x) == AND
9841 && COMPARISON_P (XEXP (x, 0))
9842 && COMPARISON_P (XEXP (x, 1)))
9843 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9846 if (GET_CODE (x) == IOR
9847 && COMPARISON_P (XEXP (x, 0))
9848 && COMPARISON_P (XEXP (x, 1)))
9849 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9852 /* An operation (on Thumb) where we want to test for a single bit.
9853 This is done by shifting that bit up into the top bit of a
9854 scratch register; we can then branch on the sign bit. */
9856 && GET_MODE (x) == SImode
9857 && (op == EQ || op == NE)
9858 && GET_CODE (x) == ZERO_EXTRACT
9859 && XEXP (x, 1) == const1_rtx)
9862 /* An operation that sets the condition codes as a side-effect, the
9863 V flag is not set correctly, so we can only use comparisons where
9864 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9866 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9867 if (GET_MODE (x) == SImode
9869 && (op == EQ || op == NE || op == LT || op == GE)
9870 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9871 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9872 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9873 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9874 || GET_CODE (x) == LSHIFTRT
9875 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9876 || GET_CODE (x) == ROTATERT
9877 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9880 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9883 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9884 && GET_CODE (x) == PLUS
9885 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9891 /* X and Y are two things to compare using CODE. Emit the compare insn and
9892 return the rtx for register 0 in the proper mode. FP means this is a
9893 floating point compare: I don't think that it is needed on the arm. */
9895 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9897 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9898 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9900 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9905 /* Generate a sequence of insns that will generate the correct return
9906 address mask depending on the physical architecture that the program
9909 arm_gen_return_addr_mask (void)
9911 rtx reg = gen_reg_rtx (Pmode);
9913 emit_insn (gen_return_addr_mask (reg));
9918 arm_reload_in_hi (rtx *operands)
9920 rtx ref = operands[1];
9922 HOST_WIDE_INT offset = 0;
9924 if (GET_CODE (ref) == SUBREG)
9926 offset = SUBREG_BYTE (ref);
9927 ref = SUBREG_REG (ref);
9930 if (GET_CODE (ref) == REG)
9932 /* We have a pseudo which has been spilt onto the stack; there
9933 are two cases here: the first where there is a simple
9934 stack-slot replacement and a second where the stack-slot is
9935 out of range, or is used as a subreg. */
9936 if (reg_equiv_mem[REGNO (ref)])
9938 ref = reg_equiv_mem[REGNO (ref)];
9939 base = find_replacement (&XEXP (ref, 0));
9942 /* The slot is out of range, or was dressed up in a SUBREG. */
9943 base = reg_equiv_address[REGNO (ref)];
9946 base = find_replacement (&XEXP (ref, 0));
9948 /* Handle the case where the address is too complex to be offset by 1. */
9949 if (GET_CODE (base) == MINUS
9950 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9952 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9954 emit_set_insn (base_plus, base);
9957 else if (GET_CODE (base) == PLUS)
9959 /* The addend must be CONST_INT, or we would have dealt with it above. */
9960 HOST_WIDE_INT hi, lo;
9962 offset += INTVAL (XEXP (base, 1));
9963 base = XEXP (base, 0);
9965 /* Rework the address into a legal sequence of insns. */
9966 /* Valid range for lo is -4095 -> 4095 */
9969 : -((-offset) & 0xfff));
9971 /* Corner case, if lo is the max offset then we would be out of range
9972 once we have added the additional 1 below, so bump the msb into the
9973 pre-loading insn(s). */
9977 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9978 ^ (HOST_WIDE_INT) 0x80000000)
9979 - (HOST_WIDE_INT) 0x80000000);
9981 gcc_assert (hi + lo == offset);
9985 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9987 /* Get the base address; addsi3 knows how to handle constants
9988 that require more than one insn. */
9989 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9995 /* Operands[2] may overlap operands[0] (though it won't overlap
9996 operands[1]), that's why we asked for a DImode reg -- so we can
9997 use the bit that does not overlap. */
9998 if (REGNO (operands[2]) == REGNO (operands[0]))
9999 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10001 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10003 emit_insn (gen_zero_extendqisi2 (scratch,
10004 gen_rtx_MEM (QImode,
10005 plus_constant (base,
10007 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10008 gen_rtx_MEM (QImode,
10009 plus_constant (base,
10011 if (!BYTES_BIG_ENDIAN)
10012 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10013 gen_rtx_IOR (SImode,
10016 gen_rtx_SUBREG (SImode, operands[0], 0),
10020 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10021 gen_rtx_IOR (SImode,
10022 gen_rtx_ASHIFT (SImode, scratch,
10024 gen_rtx_SUBREG (SImode, operands[0], 0)));
10027 /* Handle storing a half-word to memory during reload by synthesizing as two
10028 byte stores. Take care not to clobber the input values until after we
10029 have moved them somewhere safe. This code assumes that if the DImode
10030 scratch in operands[2] overlaps either the input value or output address
10031 in some way, then that value must die in this insn (we absolutely need
10032 two scratch registers for some corner cases). */
10034 arm_reload_out_hi (rtx *operands)
10036 rtx ref = operands[0];
10037 rtx outval = operands[1];
10039 HOST_WIDE_INT offset = 0;
10041 if (GET_CODE (ref) == SUBREG)
10043 offset = SUBREG_BYTE (ref);
10044 ref = SUBREG_REG (ref);
10047 if (GET_CODE (ref) == REG)
10049 /* We have a pseudo which has been spilt onto the stack; there
10050 are two cases here: the first where there is a simple
10051 stack-slot replacement and a second where the stack-slot is
10052 out of range, or is used as a subreg. */
10053 if (reg_equiv_mem[REGNO (ref)])
10055 ref = reg_equiv_mem[REGNO (ref)];
10056 base = find_replacement (&XEXP (ref, 0));
10059 /* The slot is out of range, or was dressed up in a SUBREG. */
10060 base = reg_equiv_address[REGNO (ref)];
10063 base = find_replacement (&XEXP (ref, 0));
10065 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10067 /* Handle the case where the address is too complex to be offset by 1. */
10068 if (GET_CODE (base) == MINUS
10069 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10071 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10073 /* Be careful not to destroy OUTVAL. */
10074 if (reg_overlap_mentioned_p (base_plus, outval))
10076 /* Updating base_plus might destroy outval, see if we can
10077 swap the scratch and base_plus. */
10078 if (!reg_overlap_mentioned_p (scratch, outval))
10081 scratch = base_plus;
10086 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10088 /* Be conservative and copy OUTVAL into the scratch now,
10089 this should only be necessary if outval is a subreg
10090 of something larger than a word. */
10091 /* XXX Might this clobber base? I can't see how it can,
10092 since scratch is known to overlap with OUTVAL, and
10093 must be wider than a word. */
10094 emit_insn (gen_movhi (scratch_hi, outval));
10095 outval = scratch_hi;
10099 emit_set_insn (base_plus, base);
10102 else if (GET_CODE (base) == PLUS)
10104 /* The addend must be CONST_INT, or we would have dealt with it above. */
10105 HOST_WIDE_INT hi, lo;
10107 offset += INTVAL (XEXP (base, 1));
10108 base = XEXP (base, 0);
10110 /* Rework the address into a legal sequence of insns. */
10111 /* Valid range for lo is -4095 -> 4095 */
10114 : -((-offset) & 0xfff));
10116 /* Corner case, if lo is the max offset then we would be out of range
10117 once we have added the additional 1 below, so bump the msb into the
10118 pre-loading insn(s). */
10122 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10123 ^ (HOST_WIDE_INT) 0x80000000)
10124 - (HOST_WIDE_INT) 0x80000000);
10126 gcc_assert (hi + lo == offset);
10130 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10132 /* Be careful not to destroy OUTVAL. */
10133 if (reg_overlap_mentioned_p (base_plus, outval))
10135 /* Updating base_plus might destroy outval, see if we
10136 can swap the scratch and base_plus. */
10137 if (!reg_overlap_mentioned_p (scratch, outval))
10140 scratch = base_plus;
10145 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10147 /* Be conservative and copy outval into scratch now,
10148 this should only be necessary if outval is a
10149 subreg of something larger than a word. */
10150 /* XXX Might this clobber base? I can't see how it
10151 can, since scratch is known to overlap with
10153 emit_insn (gen_movhi (scratch_hi, outval));
10154 outval = scratch_hi;
10158 /* Get the base address; addsi3 knows how to handle constants
10159 that require more than one insn. */
10160 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10166 if (BYTES_BIG_ENDIAN)
10168 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10169 plus_constant (base, offset + 1)),
10170 gen_lowpart (QImode, outval)));
10171 emit_insn (gen_lshrsi3 (scratch,
10172 gen_rtx_SUBREG (SImode, outval, 0),
10174 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10175 gen_lowpart (QImode, scratch)));
10179 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10180 gen_lowpart (QImode, outval)));
10181 emit_insn (gen_lshrsi3 (scratch,
10182 gen_rtx_SUBREG (SImode, outval, 0),
10184 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10185 plus_constant (base, offset + 1)),
10186 gen_lowpart (QImode, scratch)));
10190 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10191 (padded to the size of a word) should be passed in a register. */
10194 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10196 if (TARGET_AAPCS_BASED)
10197 return must_pass_in_stack_var_size (mode, type);
10199 return must_pass_in_stack_var_size_or_pad (mode, type);
10203 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10204 Return true if an argument passed on the stack should be padded upwards,
10205 i.e. if the least-significant byte has useful data.
10206 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10207 aggregate types are placed in the lowest memory address. */
10210 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10212 if (!TARGET_AAPCS_BASED)
10213 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10215 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10222 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10223 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10224 byte of the register has useful data, and return the opposite if the
10225 most significant byte does.
10226 For AAPCS, small aggregates and small complex types are always padded
10230 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10231 tree type, int first ATTRIBUTE_UNUSED)
10233 if (TARGET_AAPCS_BASED
10234 && BYTES_BIG_ENDIAN
10235 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10236 && int_size_in_bytes (type) <= 4)
10239 /* Otherwise, use default padding. */
10240 return !BYTES_BIG_ENDIAN;
10244 /* Print a symbolic form of X to the debug file, F. */
10246 arm_print_value (FILE *f, rtx x)
10248 switch (GET_CODE (x))
10251 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10255 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10263 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10265 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10266 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10274 fprintf (f, "\"%s\"", XSTR (x, 0));
10278 fprintf (f, "`%s'", XSTR (x, 0));
10282 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10286 arm_print_value (f, XEXP (x, 0));
10290 arm_print_value (f, XEXP (x, 0));
10292 arm_print_value (f, XEXP (x, 1));
10300 fprintf (f, "????");
10305 /* Routines for manipulation of the constant pool. */
10307 /* Arm instructions cannot load a large constant directly into a
10308 register; they have to come from a pc relative load. The constant
10309 must therefore be placed in the addressable range of the pc
10310 relative load. Depending on the precise pc relative load
10311 instruction the range is somewhere between 256 bytes and 4k. This
10312 means that we often have to dump a constant inside a function, and
10313 generate code to branch around it.
10315 It is important to minimize this, since the branches will slow
10316 things down and make the code larger.
10318 Normally we can hide the table after an existing unconditional
10319 branch so that there is no interruption of the flow, but in the
10320 worst case the code looks like this:
10338 We fix this by performing a scan after scheduling, which notices
10339 which instructions need to have their operands fetched from the
10340 constant table and builds the table.
10342 The algorithm starts by building a table of all the constants that
10343 need fixing up and all the natural barriers in the function (places
10344 where a constant table can be dropped without breaking the flow).
10345 For each fixup we note how far the pc-relative replacement will be
10346 able to reach and the offset of the instruction into the function.
10348 Having built the table we then group the fixes together to form
10349 tables that are as large as possible (subject to addressing
10350 constraints) and emit each table of constants after the last
10351 barrier that is within range of all the instructions in the group.
10352 If a group does not contain a barrier, then we forcibly create one
10353 by inserting a jump instruction into the flow. Once the table has
10354 been inserted, the insns are then modified to reference the
10355 relevant entry in the pool.
10357 Possible enhancements to the algorithm (not implemented) are:
10359 1) For some processors and object formats, there may be benefit in
10360 aligning the pools to the start of cache lines; this alignment
10361 would need to be taken into account when calculating addressability
10364 /* These typedefs are located at the start of this file, so that
10365 they can be used in the prototypes there. This comment is to
10366 remind readers of that fact so that the following structures
10367 can be understood more easily.
10369 typedef struct minipool_node Mnode;
10370 typedef struct minipool_fixup Mfix; */
10372 struct minipool_node
10374 /* Doubly linked chain of entries. */
10377 /* The maximum offset into the code that this entry can be placed. While
10378 pushing fixes for forward references, all entries are sorted in order
10379 of increasing max_address. */
10380 HOST_WIDE_INT max_address;
10381 /* Similarly for an entry inserted for a backwards ref. */
10382 HOST_WIDE_INT min_address;
10383 /* The number of fixes referencing this entry. This can become zero
10384 if we "unpush" an entry. In this case we ignore the entry when we
10385 come to emit the code. */
10387 /* The offset from the start of the minipool. */
10388 HOST_WIDE_INT offset;
10389 /* The value in table. */
10391 /* The mode of value. */
10392 enum machine_mode mode;
10393 /* The size of the value. With iWMMXt enabled
10394 sizes > 4 also imply an alignment of 8-bytes. */
10398 struct minipool_fixup
10402 HOST_WIDE_INT address;
10404 enum machine_mode mode;
10408 HOST_WIDE_INT forwards;
10409 HOST_WIDE_INT backwards;
10412 /* Fixes less than a word need padding out to a word boundary. */
10413 #define MINIPOOL_FIX_SIZE(mode) \
10414 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10416 static Mnode * minipool_vector_head;
10417 static Mnode * minipool_vector_tail;
10418 static rtx minipool_vector_label;
10419 static int minipool_pad;
10421 /* The linked list of all minipool fixes required for this function. */
10422 Mfix * minipool_fix_head;
10423 Mfix * minipool_fix_tail;
10424 /* The fix entry for the current minipool, once it has been placed. */
10425 Mfix * minipool_barrier;
10427 /* Determines if INSN is the start of a jump table. Returns the end
10428 of the TABLE or NULL_RTX. */
10430 is_jump_table (rtx insn)
10434 if (GET_CODE (insn) == JUMP_INSN
10435 && JUMP_LABEL (insn) != NULL
10436 && ((table = next_real_insn (JUMP_LABEL (insn)))
10437 == next_real_insn (insn))
10439 && GET_CODE (table) == JUMP_INSN
10440 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10441 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10447 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10448 #define JUMP_TABLES_IN_TEXT_SECTION 0
10451 static HOST_WIDE_INT
10452 get_jump_table_size (rtx insn)
10454 /* ADDR_VECs only take room if read-only data does into the text
10456 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10458 rtx body = PATTERN (insn);
10459 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10460 HOST_WIDE_INT size;
10461 HOST_WIDE_INT modesize;
10463 modesize = GET_MODE_SIZE (GET_MODE (body));
10464 size = modesize * XVECLEN (body, elt);
10468 /* Round up size of TBB table to a halfword boundary. */
10469 size = (size + 1) & ~(HOST_WIDE_INT)1;
10472 /* No padding necessary for TBH. */
10475 /* Add two bytes for alignment on Thumb. */
10480 gcc_unreachable ();
10488 /* Move a minipool fix MP from its current location to before MAX_MP.
10489 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10490 constraints may need updating. */
10492 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10493 HOST_WIDE_INT max_address)
10495 /* The code below assumes these are different. */
10496 gcc_assert (mp != max_mp);
10498 if (max_mp == NULL)
10500 if (max_address < mp->max_address)
10501 mp->max_address = max_address;
10505 if (max_address > max_mp->max_address - mp->fix_size)
10506 mp->max_address = max_mp->max_address - mp->fix_size;
10508 mp->max_address = max_address;
10510 /* Unlink MP from its current position. Since max_mp is non-null,
10511 mp->prev must be non-null. */
10512 mp->prev->next = mp->next;
10513 if (mp->next != NULL)
10514 mp->next->prev = mp->prev;
10516 minipool_vector_tail = mp->prev;
10518 /* Re-insert it before MAX_MP. */
10520 mp->prev = max_mp->prev;
10523 if (mp->prev != NULL)
10524 mp->prev->next = mp;
10526 minipool_vector_head = mp;
10529 /* Save the new entry. */
10532 /* Scan over the preceding entries and adjust their addresses as
10534 while (mp->prev != NULL
10535 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10537 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10544 /* Add a constant to the minipool for a forward reference. Returns the
10545 node added or NULL if the constant will not fit in this pool. */
10547 add_minipool_forward_ref (Mfix *fix)
10549 /* If set, max_mp is the first pool_entry that has a lower
10550 constraint than the one we are trying to add. */
10551 Mnode * max_mp = NULL;
10552 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10555 /* If the minipool starts before the end of FIX->INSN then this FIX
10556 can not be placed into the current pool. Furthermore, adding the
10557 new constant pool entry may cause the pool to start FIX_SIZE bytes
10559 if (minipool_vector_head &&
10560 (fix->address + get_attr_length (fix->insn)
10561 >= minipool_vector_head->max_address - fix->fix_size))
10564 /* Scan the pool to see if a constant with the same value has
10565 already been added. While we are doing this, also note the
10566 location where we must insert the constant if it doesn't already
10568 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10570 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10571 && fix->mode == mp->mode
10572 && (GET_CODE (fix->value) != CODE_LABEL
10573 || (CODE_LABEL_NUMBER (fix->value)
10574 == CODE_LABEL_NUMBER (mp->value)))
10575 && rtx_equal_p (fix->value, mp->value))
10577 /* More than one fix references this entry. */
10579 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10582 /* Note the insertion point if necessary. */
10584 && mp->max_address > max_address)
10587 /* If we are inserting an 8-bytes aligned quantity and
10588 we have not already found an insertion point, then
10589 make sure that all such 8-byte aligned quantities are
10590 placed at the start of the pool. */
10591 if (ARM_DOUBLEWORD_ALIGN
10593 && fix->fix_size >= 8
10594 && mp->fix_size < 8)
10597 max_address = mp->max_address;
10601 /* The value is not currently in the minipool, so we need to create
10602 a new entry for it. If MAX_MP is NULL, the entry will be put on
10603 the end of the list since the placement is less constrained than
10604 any existing entry. Otherwise, we insert the new fix before
10605 MAX_MP and, if necessary, adjust the constraints on the other
10608 mp->fix_size = fix->fix_size;
10609 mp->mode = fix->mode;
10610 mp->value = fix->value;
10612 /* Not yet required for a backwards ref. */
10613 mp->min_address = -65536;
10615 if (max_mp == NULL)
10617 mp->max_address = max_address;
10619 mp->prev = minipool_vector_tail;
10621 if (mp->prev == NULL)
10623 minipool_vector_head = mp;
10624 minipool_vector_label = gen_label_rtx ();
10627 mp->prev->next = mp;
10629 minipool_vector_tail = mp;
10633 if (max_address > max_mp->max_address - mp->fix_size)
10634 mp->max_address = max_mp->max_address - mp->fix_size;
10636 mp->max_address = max_address;
10639 mp->prev = max_mp->prev;
10641 if (mp->prev != NULL)
10642 mp->prev->next = mp;
10644 minipool_vector_head = mp;
10647 /* Save the new entry. */
10650 /* Scan over the preceding entries and adjust their addresses as
10652 while (mp->prev != NULL
10653 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10655 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10663 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10664 HOST_WIDE_INT min_address)
10666 HOST_WIDE_INT offset;
10668 /* The code below assumes these are different. */
10669 gcc_assert (mp != min_mp);
10671 if (min_mp == NULL)
10673 if (min_address > mp->min_address)
10674 mp->min_address = min_address;
10678 /* We will adjust this below if it is too loose. */
10679 mp->min_address = min_address;
10681 /* Unlink MP from its current position. Since min_mp is non-null,
10682 mp->next must be non-null. */
10683 mp->next->prev = mp->prev;
10684 if (mp->prev != NULL)
10685 mp->prev->next = mp->next;
10687 minipool_vector_head = mp->next;
10689 /* Reinsert it after MIN_MP. */
10691 mp->next = min_mp->next;
10693 if (mp->next != NULL)
10694 mp->next->prev = mp;
10696 minipool_vector_tail = mp;
10702 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10704 mp->offset = offset;
10705 if (mp->refcount > 0)
10706 offset += mp->fix_size;
10708 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10709 mp->next->min_address = mp->min_address + mp->fix_size;
10715 /* Add a constant to the minipool for a backward reference. Returns the
10716 node added or NULL if the constant will not fit in this pool.
10718 Note that the code for insertion for a backwards reference can be
10719 somewhat confusing because the calculated offsets for each fix do
10720 not take into account the size of the pool (which is still under
10723 add_minipool_backward_ref (Mfix *fix)
10725 /* If set, min_mp is the last pool_entry that has a lower constraint
10726 than the one we are trying to add. */
10727 Mnode *min_mp = NULL;
10728 /* This can be negative, since it is only a constraint. */
10729 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10732 /* If we can't reach the current pool from this insn, or if we can't
10733 insert this entry at the end of the pool without pushing other
10734 fixes out of range, then we don't try. This ensures that we
10735 can't fail later on. */
10736 if (min_address >= minipool_barrier->address
10737 || (minipool_vector_tail->min_address + fix->fix_size
10738 >= minipool_barrier->address))
10741 /* Scan the pool to see if a constant with the same value has
10742 already been added. While we are doing this, also note the
10743 location where we must insert the constant if it doesn't already
10745 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10747 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10748 && fix->mode == mp->mode
10749 && (GET_CODE (fix->value) != CODE_LABEL
10750 || (CODE_LABEL_NUMBER (fix->value)
10751 == CODE_LABEL_NUMBER (mp->value)))
10752 && rtx_equal_p (fix->value, mp->value)
10753 /* Check that there is enough slack to move this entry to the
10754 end of the table (this is conservative). */
10755 && (mp->max_address
10756 > (minipool_barrier->address
10757 + minipool_vector_tail->offset
10758 + minipool_vector_tail->fix_size)))
10761 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10764 if (min_mp != NULL)
10765 mp->min_address += fix->fix_size;
10768 /* Note the insertion point if necessary. */
10769 if (mp->min_address < min_address)
10771 /* For now, we do not allow the insertion of 8-byte alignment
10772 requiring nodes anywhere but at the start of the pool. */
10773 if (ARM_DOUBLEWORD_ALIGN
10774 && fix->fix_size >= 8 && mp->fix_size < 8)
10779 else if (mp->max_address
10780 < minipool_barrier->address + mp->offset + fix->fix_size)
10782 /* Inserting before this entry would push the fix beyond
10783 its maximum address (which can happen if we have
10784 re-located a forwards fix); force the new fix to come
10786 if (ARM_DOUBLEWORD_ALIGN
10787 && fix->fix_size >= 8 && mp->fix_size < 8)
10792 min_address = mp->min_address + fix->fix_size;
10795 /* Do not insert a non-8-byte aligned quantity before 8-byte
10796 aligned quantities. */
10797 else if (ARM_DOUBLEWORD_ALIGN
10798 && fix->fix_size < 8
10799 && mp->fix_size >= 8)
10802 min_address = mp->min_address + fix->fix_size;
10807 /* We need to create a new entry. */
10809 mp->fix_size = fix->fix_size;
10810 mp->mode = fix->mode;
10811 mp->value = fix->value;
10813 mp->max_address = minipool_barrier->address + 65536;
10815 mp->min_address = min_address;
10817 if (min_mp == NULL)
10820 mp->next = minipool_vector_head;
10822 if (mp->next == NULL)
10824 minipool_vector_tail = mp;
10825 minipool_vector_label = gen_label_rtx ();
10828 mp->next->prev = mp;
10830 minipool_vector_head = mp;
10834 mp->next = min_mp->next;
10838 if (mp->next != NULL)
10839 mp->next->prev = mp;
10841 minipool_vector_tail = mp;
10844 /* Save the new entry. */
10852 /* Scan over the following entries and adjust their offsets. */
10853 while (mp->next != NULL)
10855 if (mp->next->min_address < mp->min_address + mp->fix_size)
10856 mp->next->min_address = mp->min_address + mp->fix_size;
10859 mp->next->offset = mp->offset + mp->fix_size;
10861 mp->next->offset = mp->offset;
10870 assign_minipool_offsets (Mfix *barrier)
10872 HOST_WIDE_INT offset = 0;
10875 minipool_barrier = barrier;
10877 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10879 mp->offset = offset;
10881 if (mp->refcount > 0)
10882 offset += mp->fix_size;
10886 /* Output the literal table */
10888 dump_minipool (rtx scan)
10894 if (ARM_DOUBLEWORD_ALIGN)
10895 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10896 if (mp->refcount > 0 && mp->fix_size >= 8)
10903 fprintf (dump_file,
10904 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10905 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10907 scan = emit_label_after (gen_label_rtx (), scan);
10908 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10909 scan = emit_label_after (minipool_vector_label, scan);
10911 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10913 if (mp->refcount > 0)
10917 fprintf (dump_file,
10918 ";; Offset %u, min %ld, max %ld ",
10919 (unsigned) mp->offset, (unsigned long) mp->min_address,
10920 (unsigned long) mp->max_address);
10921 arm_print_value (dump_file, mp->value);
10922 fputc ('\n', dump_file);
10925 switch (mp->fix_size)
10927 #ifdef HAVE_consttable_1
10929 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10933 #ifdef HAVE_consttable_2
10935 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10939 #ifdef HAVE_consttable_4
10941 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10945 #ifdef HAVE_consttable_8
10947 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10951 #ifdef HAVE_consttable_16
10953 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10958 gcc_unreachable ();
10966 minipool_vector_head = minipool_vector_tail = NULL;
10967 scan = emit_insn_after (gen_consttable_end (), scan);
10968 scan = emit_barrier_after (scan);
10971 /* Return the cost of forcibly inserting a barrier after INSN. */
10973 arm_barrier_cost (rtx insn)
10975 /* Basing the location of the pool on the loop depth is preferable,
10976 but at the moment, the basic block information seems to be
10977 corrupt by this stage of the compilation. */
10978 int base_cost = 50;
10979 rtx next = next_nonnote_insn (insn);
10981 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10984 switch (GET_CODE (insn))
10987 /* It will always be better to place the table before the label, rather
10996 return base_cost - 10;
10999 return base_cost + 10;
11003 /* Find the best place in the insn stream in the range
11004 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11005 Create the barrier by inserting a jump and add a new fix entry for
11008 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11010 HOST_WIDE_INT count = 0;
11012 rtx from = fix->insn;
11013 /* The instruction after which we will insert the jump. */
11014 rtx selected = NULL;
11016 /* The address at which the jump instruction will be placed. */
11017 HOST_WIDE_INT selected_address;
11019 HOST_WIDE_INT max_count = max_address - fix->address;
11020 rtx label = gen_label_rtx ();
11022 selected_cost = arm_barrier_cost (from);
11023 selected_address = fix->address;
11025 while (from && count < max_count)
11030 /* This code shouldn't have been called if there was a natural barrier
11032 gcc_assert (GET_CODE (from) != BARRIER);
11034 /* Count the length of this insn. */
11035 count += get_attr_length (from);
11037 /* If there is a jump table, add its length. */
11038 tmp = is_jump_table (from);
11041 count += get_jump_table_size (tmp);
11043 /* Jump tables aren't in a basic block, so base the cost on
11044 the dispatch insn. If we select this location, we will
11045 still put the pool after the table. */
11046 new_cost = arm_barrier_cost (from);
11048 if (count < max_count
11049 && (!selected || new_cost <= selected_cost))
11052 selected_cost = new_cost;
11053 selected_address = fix->address + count;
11056 /* Continue after the dispatch table. */
11057 from = NEXT_INSN (tmp);
11061 new_cost = arm_barrier_cost (from);
11063 if (count < max_count
11064 && (!selected || new_cost <= selected_cost))
11067 selected_cost = new_cost;
11068 selected_address = fix->address + count;
11071 from = NEXT_INSN (from);
11074 /* Make sure that we found a place to insert the jump. */
11075 gcc_assert (selected);
11077 /* Create a new JUMP_INSN that branches around a barrier. */
11078 from = emit_jump_insn_after (gen_jump (label), selected);
11079 JUMP_LABEL (from) = label;
11080 barrier = emit_barrier_after (from);
11081 emit_label_after (label, barrier);
11083 /* Create a minipool barrier entry for the new barrier. */
11084 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11085 new_fix->insn = barrier;
11086 new_fix->address = selected_address;
11087 new_fix->next = fix->next;
11088 fix->next = new_fix;
11093 /* Record that there is a natural barrier in the insn stream at
11096 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11098 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11101 fix->address = address;
11104 if (minipool_fix_head != NULL)
11105 minipool_fix_tail->next = fix;
11107 minipool_fix_head = fix;
11109 minipool_fix_tail = fix;
11112 /* Record INSN, which will need fixing up to load a value from the
11113 minipool. ADDRESS is the offset of the insn since the start of the
11114 function; LOC is a pointer to the part of the insn which requires
11115 fixing; VALUE is the constant that must be loaded, which is of type
11118 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11119 enum machine_mode mode, rtx value)
11121 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11124 fix->address = address;
11127 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11128 fix->value = value;
11129 fix->forwards = get_attr_pool_range (insn);
11130 fix->backwards = get_attr_neg_pool_range (insn);
11131 fix->minipool = NULL;
11133 /* If an insn doesn't have a range defined for it, then it isn't
11134 expecting to be reworked by this code. Better to stop now than
11135 to generate duff assembly code. */
11136 gcc_assert (fix->forwards || fix->backwards);
11138 /* If an entry requires 8-byte alignment then assume all constant pools
11139 require 4 bytes of padding. Trying to do this later on a per-pool
11140 basis is awkward because existing pool entries have to be modified. */
11141 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11146 fprintf (dump_file,
11147 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11148 GET_MODE_NAME (mode),
11149 INSN_UID (insn), (unsigned long) address,
11150 -1 * (long)fix->backwards, (long)fix->forwards);
11151 arm_print_value (dump_file, fix->value);
11152 fprintf (dump_file, "\n");
11155 /* Add it to the chain of fixes. */
11158 if (minipool_fix_head != NULL)
11159 minipool_fix_tail->next = fix;
11161 minipool_fix_head = fix;
11163 minipool_fix_tail = fix;
11166 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11167 Returns the number of insns needed, or 99 if we don't know how to
11170 arm_const_double_inline_cost (rtx val)
11172 rtx lowpart, highpart;
11173 enum machine_mode mode;
11175 mode = GET_MODE (val);
11177 if (mode == VOIDmode)
11180 gcc_assert (GET_MODE_SIZE (mode) == 8);
11182 lowpart = gen_lowpart (SImode, val);
11183 highpart = gen_highpart_mode (SImode, mode, val);
11185 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11186 gcc_assert (GET_CODE (highpart) == CONST_INT);
11188 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11189 NULL_RTX, NULL_RTX, 0, 0)
11190 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11191 NULL_RTX, NULL_RTX, 0, 0));
11194 /* Return true if it is worthwhile to split a 64-bit constant into two
11195 32-bit operations. This is the case if optimizing for size, or
11196 if we have load delay slots, or if one 32-bit part can be done with
11197 a single data operation. */
11199 arm_const_double_by_parts (rtx val)
11201 enum machine_mode mode = GET_MODE (val);
11204 if (optimize_size || arm_ld_sched)
11207 if (mode == VOIDmode)
11210 part = gen_highpart_mode (SImode, mode, val);
11212 gcc_assert (GET_CODE (part) == CONST_INT);
11214 if (const_ok_for_arm (INTVAL (part))
11215 || const_ok_for_arm (~INTVAL (part)))
11218 part = gen_lowpart (SImode, val);
11220 gcc_assert (GET_CODE (part) == CONST_INT);
11222 if (const_ok_for_arm (INTVAL (part))
11223 || const_ok_for_arm (~INTVAL (part)))
11229 /* Scan INSN and note any of its operands that need fixing.
11230 If DO_PUSHES is false we do not actually push any of the fixups
11231 needed. The function returns TRUE if any fixups were needed/pushed.
11232 This is used by arm_memory_load_p() which needs to know about loads
11233 of constants that will be converted into minipool loads. */
11235 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11237 bool result = false;
11240 extract_insn (insn);
11242 if (!constrain_operands (1))
11243 fatal_insn_not_found (insn);
11245 if (recog_data.n_alternatives == 0)
11248 /* Fill in recog_op_alt with information about the constraints of
11250 preprocess_constraints ();
11252 for (opno = 0; opno < recog_data.n_operands; opno++)
11254 /* Things we need to fix can only occur in inputs. */
11255 if (recog_data.operand_type[opno] != OP_IN)
11258 /* If this alternative is a memory reference, then any mention
11259 of constants in this alternative is really to fool reload
11260 into allowing us to accept one there. We need to fix them up
11261 now so that we output the right code. */
11262 if (recog_op_alt[opno][which_alternative].memory_ok)
11264 rtx op = recog_data.operand[opno];
11266 if (CONSTANT_P (op))
11269 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11270 recog_data.operand_mode[opno], op);
11273 else if (GET_CODE (op) == MEM
11274 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11275 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11279 rtx cop = avoid_constant_pool_reference (op);
11281 /* Casting the address of something to a mode narrower
11282 than a word can cause avoid_constant_pool_reference()
11283 to return the pool reference itself. That's no good to
11284 us here. Lets just hope that we can use the
11285 constant pool value directly. */
11287 cop = get_pool_constant (XEXP (op, 0));
11289 push_minipool_fix (insn, address,
11290 recog_data.operand_loc[opno],
11291 recog_data.operand_mode[opno], cop);
11302 /* Gcc puts the pool in the wrong place for ARM, since we can only
11303 load addresses a limited distance around the pc. We do some
11304 special munging to move the constant pool values to the correct
11305 point in the code. */
11310 HOST_WIDE_INT address = 0;
11313 minipool_fix_head = minipool_fix_tail = NULL;
11315 /* The first insn must always be a note, or the code below won't
11316 scan it properly. */
11317 insn = get_insns ();
11318 gcc_assert (GET_CODE (insn) == NOTE);
11321 /* Scan all the insns and record the operands that will need fixing. */
11322 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11324 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11325 && (arm_cirrus_insn_p (insn)
11326 || GET_CODE (insn) == JUMP_INSN
11327 || arm_memory_load_p (insn)))
11328 cirrus_reorg (insn);
11330 if (GET_CODE (insn) == BARRIER)
11331 push_minipool_barrier (insn, address);
11332 else if (INSN_P (insn))
11336 note_invalid_constants (insn, address, true);
11337 address += get_attr_length (insn);
11339 /* If the insn is a vector jump, add the size of the table
11340 and skip the table. */
11341 if ((table = is_jump_table (insn)) != NULL)
11343 address += get_jump_table_size (table);
11349 fix = minipool_fix_head;
11351 /* Now scan the fixups and perform the required changes. */
11356 Mfix * last_added_fix;
11357 Mfix * last_barrier = NULL;
11360 /* Skip any further barriers before the next fix. */
11361 while (fix && GET_CODE (fix->insn) == BARRIER)
11364 /* No more fixes. */
11368 last_added_fix = NULL;
11370 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11372 if (GET_CODE (ftmp->insn) == BARRIER)
11374 if (ftmp->address >= minipool_vector_head->max_address)
11377 last_barrier = ftmp;
11379 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11382 last_added_fix = ftmp; /* Keep track of the last fix added. */
11385 /* If we found a barrier, drop back to that; any fixes that we
11386 could have reached but come after the barrier will now go in
11387 the next mini-pool. */
11388 if (last_barrier != NULL)
11390 /* Reduce the refcount for those fixes that won't go into this
11392 for (fdel = last_barrier->next;
11393 fdel && fdel != ftmp;
11396 fdel->minipool->refcount--;
11397 fdel->minipool = NULL;
11400 ftmp = last_barrier;
11404 /* ftmp is first fix that we can't fit into this pool and
11405 there no natural barriers that we could use. Insert a
11406 new barrier in the code somewhere between the previous
11407 fix and this one, and arrange to jump around it. */
11408 HOST_WIDE_INT max_address;
11410 /* The last item on the list of fixes must be a barrier, so
11411 we can never run off the end of the list of fixes without
11412 last_barrier being set. */
11415 max_address = minipool_vector_head->max_address;
11416 /* Check that there isn't another fix that is in range that
11417 we couldn't fit into this pool because the pool was
11418 already too large: we need to put the pool before such an
11419 instruction. The pool itself may come just after the
11420 fix because create_fix_barrier also allows space for a
11421 jump instruction. */
11422 if (ftmp->address < max_address)
11423 max_address = ftmp->address + 1;
11425 last_barrier = create_fix_barrier (last_added_fix, max_address);
11428 assign_minipool_offsets (last_barrier);
11432 if (GET_CODE (ftmp->insn) != BARRIER
11433 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11440 /* Scan over the fixes we have identified for this pool, fixing them
11441 up and adding the constants to the pool itself. */
11442 for (this_fix = fix; this_fix && ftmp != this_fix;
11443 this_fix = this_fix->next)
11444 if (GET_CODE (this_fix->insn) != BARRIER)
11447 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11448 minipool_vector_label),
11449 this_fix->minipool->offset);
11450 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11453 dump_minipool (last_barrier->insn);
11457 /* From now on we must synthesize any constants that we can't handle
11458 directly. This can happen if the RTL gets split during final
11459 instruction generation. */
11460 after_arm_reorg = 1;
11462 /* Free the minipool memory. */
11463 obstack_free (&minipool_obstack, minipool_startobj);
11466 /* Routines to output assembly language. */
11468 /* If the rtx is the correct value then return the string of the number.
11469 In this way we can ensure that valid double constants are generated even
11470 when cross compiling. */
11472 fp_immediate_constant (rtx x)
11477 if (!fp_consts_inited)
11480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11481 for (i = 0; i < 8; i++)
11482 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11483 return strings_fp[i];
11485 gcc_unreachable ();
11488 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11489 static const char *
11490 fp_const_from_val (REAL_VALUE_TYPE *r)
11494 if (!fp_consts_inited)
11497 for (i = 0; i < 8; i++)
11498 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11499 return strings_fp[i];
11501 gcc_unreachable ();
11504 /* Output the operands of a LDM/STM instruction to STREAM.
11505 MASK is the ARM register set mask of which only bits 0-15 are important.
11506 REG is the base register, either the frame pointer or the stack pointer,
11507 INSTR is the possibly suffixed load or store instruction.
11508 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11511 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11512 unsigned long mask, int rfe)
11515 bool not_first = FALSE;
11517 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11518 fputc ('\t', stream);
11519 asm_fprintf (stream, instr, reg);
11520 fputc ('{', stream);
11522 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11523 if (mask & (1 << i))
11526 fprintf (stream, ", ");
11528 asm_fprintf (stream, "%r", i);
11533 fprintf (stream, "}^\n");
11535 fprintf (stream, "}\n");
11539 /* Output a FLDMD instruction to STREAM.
11540 BASE if the register containing the address.
11541 REG and COUNT specify the register range.
11542 Extra registers may be added to avoid hardware bugs.
11544 We output FLDMD even for ARMv5 VFP implementations. Although
11545 FLDMD is technically not supported until ARMv6, it is believed
11546 that all VFP implementations support its use in this context. */
11549 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11553 /* Workaround ARM10 VFPr1 bug. */
11554 if (count == 2 && !arm_arch6)
11561 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11562 load into multiple parts if we have to handle more than 16 registers. */
11565 vfp_output_fldmd (stream, base, reg, 16);
11566 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11570 fputc ('\t', stream);
11571 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11573 for (i = reg; i < reg + count; i++)
11576 fputs (", ", stream);
11577 asm_fprintf (stream, "d%d", i);
11579 fputs ("}\n", stream);
11584 /* Output the assembly for a store multiple. */
11587 vfp_output_fstmd (rtx * operands)
11594 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11595 p = strlen (pattern);
11597 gcc_assert (GET_CODE (operands[1]) == REG);
11599 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11600 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11602 p += sprintf (&pattern[p], ", d%d", base + i);
11604 strcpy (&pattern[p], "}");
11606 output_asm_insn (pattern, operands);
11611 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11612 number of bytes pushed. */
11615 vfp_emit_fstmd (int base_reg, int count)
11622 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11623 register pairs are stored by a store multiple insn. We avoid this
11624 by pushing an extra pair. */
11625 if (count == 2 && !arm_arch6)
11627 if (base_reg == LAST_VFP_REGNUM - 3)
11632 /* FSTMD may not store more than 16 doubleword registers at once. Split
11633 larger stores into multiple parts (up to a maximum of two, in
11638 /* NOTE: base_reg is an internal register number, so each D register
11640 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11641 saved += vfp_emit_fstmd (base_reg, 16);
11645 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11646 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11648 reg = gen_rtx_REG (DFmode, base_reg);
11651 XVECEXP (par, 0, 0)
11652 = gen_rtx_SET (VOIDmode,
11655 gen_rtx_PRE_MODIFY (Pmode,
11658 (stack_pointer_rtx,
11661 gen_rtx_UNSPEC (BLKmode,
11662 gen_rtvec (1, reg),
11663 UNSPEC_PUSH_MULT));
11665 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11666 plus_constant (stack_pointer_rtx, -(count * 8)));
11667 RTX_FRAME_RELATED_P (tmp) = 1;
11668 XVECEXP (dwarf, 0, 0) = tmp;
11670 tmp = gen_rtx_SET (VOIDmode,
11671 gen_frame_mem (DFmode, stack_pointer_rtx),
11673 RTX_FRAME_RELATED_P (tmp) = 1;
11674 XVECEXP (dwarf, 0, 1) = tmp;
11676 for (i = 1; i < count; i++)
11678 reg = gen_rtx_REG (DFmode, base_reg);
11680 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11682 tmp = gen_rtx_SET (VOIDmode,
11683 gen_frame_mem (DFmode,
11684 plus_constant (stack_pointer_rtx,
11687 RTX_FRAME_RELATED_P (tmp) = 1;
11688 XVECEXP (dwarf, 0, i + 1) = tmp;
11691 par = emit_insn (par);
11692 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11693 RTX_FRAME_RELATED_P (par) = 1;
11698 /* Emit a call instruction with pattern PAT. ADDR is the address of
11699 the call target. */
11702 arm_emit_call_insn (rtx pat, rtx addr)
11706 insn = emit_call_insn (pat);
11708 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11709 If the call might use such an entry, add a use of the PIC register
11710 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11711 if (TARGET_VXWORKS_RTP
11713 && GET_CODE (addr) == SYMBOL_REF
11714 && (SYMBOL_REF_DECL (addr)
11715 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11716 : !SYMBOL_REF_LOCAL_P (addr)))
11718 require_pic_register ();
11719 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11723 /* Output a 'call' insn. */
11725 output_call (rtx *operands)
11727 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11729 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11730 if (REGNO (operands[0]) == LR_REGNUM)
11732 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11733 output_asm_insn ("mov%?\t%0, %|lr", operands);
11736 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11738 if (TARGET_INTERWORK || arm_arch4t)
11739 output_asm_insn ("bx%?\t%0", operands);
11741 output_asm_insn ("mov%?\t%|pc, %0", operands);
11746 /* Output a 'call' insn that is a reference in memory. This is
11747 disabled for ARMv5 and we prefer a blx instead because otherwise
11748 there's a significant performance overhead. */
11750 output_call_mem (rtx *operands)
11752 gcc_assert (!arm_arch5);
11753 if (TARGET_INTERWORK)
11755 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11756 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11757 output_asm_insn ("bx%?\t%|ip", operands);
11759 else if (regno_use_in (LR_REGNUM, operands[0]))
11761 /* LR is used in the memory address. We load the address in the
11762 first instruction. It's safe to use IP as the target of the
11763 load since the call will kill it anyway. */
11764 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11765 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11767 output_asm_insn ("bx%?\t%|ip", operands);
11769 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11773 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11774 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11781 /* Output a move from arm registers to an fpa registers.
11782 OPERANDS[0] is an fpa register.
11783 OPERANDS[1] is the first registers of an arm register pair. */
11785 output_mov_long_double_fpa_from_arm (rtx *operands)
11787 int arm_reg0 = REGNO (operands[1]);
11790 gcc_assert (arm_reg0 != IP_REGNUM);
11792 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11793 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11794 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11796 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11797 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11802 /* Output a move from an fpa register to arm registers.
11803 OPERANDS[0] is the first registers of an arm register pair.
11804 OPERANDS[1] is an fpa register. */
11806 output_mov_long_double_arm_from_fpa (rtx *operands)
11808 int arm_reg0 = REGNO (operands[0]);
11811 gcc_assert (arm_reg0 != IP_REGNUM);
11813 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11814 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11815 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11817 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11818 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11822 /* Output a move from arm registers to arm registers of a long double
11823 OPERANDS[0] is the destination.
11824 OPERANDS[1] is the source. */
11826 output_mov_long_double_arm_from_arm (rtx *operands)
11828 /* We have to be careful here because the two might overlap. */
11829 int dest_start = REGNO (operands[0]);
11830 int src_start = REGNO (operands[1]);
11834 if (dest_start < src_start)
11836 for (i = 0; i < 3; i++)
11838 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11839 ops[1] = gen_rtx_REG (SImode, src_start + i);
11840 output_asm_insn ("mov%?\t%0, %1", ops);
11845 for (i = 2; i >= 0; i--)
11847 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11848 ops[1] = gen_rtx_REG (SImode, src_start + i);
11849 output_asm_insn ("mov%?\t%0, %1", ops);
11857 arm_emit_movpair (rtx dest, rtx src)
11859 /* If the src is an immediate, simplify it. */
11860 if (CONST_INT_P (src))
11862 HOST_WIDE_INT val = INTVAL (src);
11863 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11864 if ((val >> 16) & 0x0000ffff)
11865 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11867 GEN_INT ((val >> 16) & 0x0000ffff));
11870 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11871 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11874 /* Output a move from arm registers to an fpa registers.
11875 OPERANDS[0] is an fpa register.
11876 OPERANDS[1] is the first registers of an arm register pair. */
11878 output_mov_double_fpa_from_arm (rtx *operands)
11880 int arm_reg0 = REGNO (operands[1]);
11883 gcc_assert (arm_reg0 != IP_REGNUM);
11885 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11886 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11887 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11888 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11892 /* Output a move from an fpa register to arm registers.
11893 OPERANDS[0] is the first registers of an arm register pair.
11894 OPERANDS[1] is an fpa register. */
11896 output_mov_double_arm_from_fpa (rtx *operands)
11898 int arm_reg0 = REGNO (operands[0]);
11901 gcc_assert (arm_reg0 != IP_REGNUM);
11903 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11904 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11905 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11906 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11910 /* Output a move between double words.
11911 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11912 or MEM<-REG and all MEMs must be offsettable addresses. */
11914 output_move_double (rtx *operands)
11916 enum rtx_code code0 = GET_CODE (operands[0]);
11917 enum rtx_code code1 = GET_CODE (operands[1]);
11922 unsigned int reg0 = REGNO (operands[0]);
11924 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11926 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11928 switch (GET_CODE (XEXP (operands[1], 0)))
11932 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11933 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11935 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11939 gcc_assert (TARGET_LDRD);
11940 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11952 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11954 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11958 gcc_assert (TARGET_LDRD);
11959 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11964 /* Autoicrement addressing modes should never have overlapping
11965 base and destination registers, and overlapping index registers
11966 are already prohibited, so this doesn't need to worry about
11968 otherops[0] = operands[0];
11969 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11970 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11972 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11974 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11976 /* Registers overlap so split out the increment. */
11977 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11978 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11982 /* Use a single insn if we can.
11983 FIXME: IWMMXT allows offsets larger than ldrd can
11984 handle, fix these up with a pair of ldr. */
11986 || GET_CODE (otherops[2]) != CONST_INT
11987 || (INTVAL (otherops[2]) > -256
11988 && INTVAL (otherops[2]) < 256))
11989 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11992 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11993 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11999 /* Use a single insn if we can.
12000 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12001 fix these up with a pair of ldr. */
12003 || GET_CODE (otherops[2]) != CONST_INT
12004 || (INTVAL (otherops[2]) > -256
12005 && INTVAL (otherops[2]) < 256))
12006 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12009 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12010 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12017 /* We might be able to use ldrd %0, %1 here. However the range is
12018 different to ldr/adr, and it is broken on some ARMv7-M
12019 implementations. */
12020 /* Use the second register of the pair to avoid problematic
12022 otherops[1] = operands[1];
12023 output_asm_insn ("adr%?\t%0, %1", otherops);
12024 operands[1] = otherops[0];
12026 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12028 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12031 /* ??? This needs checking for thumb2. */
12033 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12034 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12036 otherops[0] = operands[0];
12037 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12038 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12040 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12042 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12044 switch ((int) INTVAL (otherops[2]))
12047 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12052 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12057 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12061 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12062 operands[1] = otherops[0];
12064 && (GET_CODE (otherops[2]) == REG
12066 || (GET_CODE (otherops[2]) == CONST_INT
12067 && INTVAL (otherops[2]) > -256
12068 && INTVAL (otherops[2]) < 256)))
12070 if (reg_overlap_mentioned_p (operands[0],
12074 /* Swap base and index registers over to
12075 avoid a conflict. */
12077 otherops[1] = otherops[2];
12080 /* If both registers conflict, it will usually
12081 have been fixed by a splitter. */
12082 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12083 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12085 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12086 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12090 otherops[0] = operands[0];
12091 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12096 if (GET_CODE (otherops[2]) == CONST_INT)
12098 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12099 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12101 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12104 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12107 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12110 return "ldr%(d%)\t%0, [%1]";
12112 return "ldm%(ia%)\t%1, %M0";
12116 otherops[1] = adjust_address (operands[1], SImode, 4);
12117 /* Take care of overlapping base/data reg. */
12118 if (reg_mentioned_p (operands[0], operands[1]))
12120 output_asm_insn ("ldr%?\t%0, %1", otherops);
12121 output_asm_insn ("ldr%?\t%0, %1", operands);
12125 output_asm_insn ("ldr%?\t%0, %1", operands);
12126 output_asm_insn ("ldr%?\t%0, %1", otherops);
12133 /* Constraints should ensure this. */
12134 gcc_assert (code0 == MEM && code1 == REG);
12135 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12137 switch (GET_CODE (XEXP (operands[0], 0)))
12141 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12143 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12147 gcc_assert (TARGET_LDRD);
12148 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12153 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12155 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12160 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12162 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12166 gcc_assert (TARGET_LDRD);
12167 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12172 otherops[0] = operands[1];
12173 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12174 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12176 /* IWMMXT allows offsets larger than ldrd can handle,
12177 fix these up with a pair of ldr. */
12179 && GET_CODE (otherops[2]) == CONST_INT
12180 && (INTVAL(otherops[2]) <= -256
12181 || INTVAL(otherops[2]) >= 256))
12183 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12185 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12186 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12190 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12191 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12194 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12195 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12197 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12201 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12202 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12204 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12207 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12213 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12219 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12224 && (GET_CODE (otherops[2]) == REG
12226 || (GET_CODE (otherops[2]) == CONST_INT
12227 && INTVAL (otherops[2]) > -256
12228 && INTVAL (otherops[2]) < 256)))
12230 otherops[0] = operands[1];
12231 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12232 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12238 otherops[0] = adjust_address (operands[0], SImode, 4);
12239 otherops[1] = operands[1];
12240 output_asm_insn ("str%?\t%1, %0", operands);
12241 output_asm_insn ("str%?\t%H1, %0", otherops);
12248 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12249 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12252 output_move_quad (rtx *operands)
12254 if (REG_P (operands[0]))
12256 /* Load, or reg->reg move. */
12258 if (MEM_P (operands[1]))
12260 switch (GET_CODE (XEXP (operands[1], 0)))
12263 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12268 output_asm_insn ("adr%?\t%0, %1", operands);
12269 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12273 gcc_unreachable ();
12281 gcc_assert (REG_P (operands[1]));
12283 dest = REGNO (operands[0]);
12284 src = REGNO (operands[1]);
12286 /* This seems pretty dumb, but hopefully GCC won't try to do it
12289 for (i = 0; i < 4; i++)
12291 ops[0] = gen_rtx_REG (SImode, dest + i);
12292 ops[1] = gen_rtx_REG (SImode, src + i);
12293 output_asm_insn ("mov%?\t%0, %1", ops);
12296 for (i = 3; i >= 0; i--)
12298 ops[0] = gen_rtx_REG (SImode, dest + i);
12299 ops[1] = gen_rtx_REG (SImode, src + i);
12300 output_asm_insn ("mov%?\t%0, %1", ops);
12306 gcc_assert (MEM_P (operands[0]));
12307 gcc_assert (REG_P (operands[1]));
12308 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12310 switch (GET_CODE (XEXP (operands[0], 0)))
12313 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12317 gcc_unreachable ();
12324 /* Output a VFP load or store instruction. */
12327 output_move_vfp (rtx *operands)
12329 rtx reg, mem, addr, ops[2];
12330 int load = REG_P (operands[0]);
12331 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12332 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12335 enum machine_mode mode;
12337 reg = operands[!load];
12338 mem = operands[load];
12340 mode = GET_MODE (reg);
12342 gcc_assert (REG_P (reg));
12343 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12344 gcc_assert (mode == SFmode
12348 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12349 gcc_assert (MEM_P (mem));
12351 addr = XEXP (mem, 0);
12353 switch (GET_CODE (addr))
12356 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12357 ops[0] = XEXP (addr, 0);
12362 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12363 ops[0] = XEXP (addr, 0);
12368 templ = "f%s%c%%?\t%%%s0, %%1%s";
12374 sprintf (buff, templ,
12375 load ? "ld" : "st",
12378 integer_p ? "\t%@ int" : "");
12379 output_asm_insn (buff, ops);
12384 /* Output a Neon quad-word load or store, or a load or store for
12385 larger structure modes.
12387 WARNING: The ordering of elements is weird in big-endian mode,
12388 because we use VSTM, as required by the EABI. GCC RTL defines
12389 element ordering based on in-memory order. This can be differ
12390 from the architectural ordering of elements within a NEON register.
12391 The intrinsics defined in arm_neon.h use the NEON register element
12392 ordering, not the GCC RTL element ordering.
12394 For example, the in-memory ordering of a big-endian a quadword
12395 vector with 16-bit elements when stored from register pair {d0,d1}
12396 will be (lowest address first, d0[N] is NEON register element N):
12398 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12400 When necessary, quadword registers (dN, dN+1) are moved to ARM
12401 registers from rN in the order:
12403 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12405 So that STM/LDM can be used on vectors in ARM registers, and the
12406 same memory layout will result as if VSTM/VLDM were used. */
12409 output_move_neon (rtx *operands)
12411 rtx reg, mem, addr, ops[2];
12412 int regno, load = REG_P (operands[0]);
12415 enum machine_mode mode;
12417 reg = operands[!load];
12418 mem = operands[load];
12420 mode = GET_MODE (reg);
12422 gcc_assert (REG_P (reg));
12423 regno = REGNO (reg);
12424 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12425 || NEON_REGNO_OK_FOR_QUAD (regno));
12426 gcc_assert (VALID_NEON_DREG_MODE (mode)
12427 || VALID_NEON_QREG_MODE (mode)
12428 || VALID_NEON_STRUCT_MODE (mode));
12429 gcc_assert (MEM_P (mem));
12431 addr = XEXP (mem, 0);
12433 /* Strip off const from addresses like (const (plus (...))). */
12434 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12435 addr = XEXP (addr, 0);
12437 switch (GET_CODE (addr))
12440 templ = "v%smia%%?\t%%0!, %%h1";
12441 ops[0] = XEXP (addr, 0);
12446 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12447 templ = "v%smdb%%?\t%%0!, %%h1";
12448 ops[0] = XEXP (addr, 0);
12453 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12454 gcc_unreachable ();
12459 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12462 for (i = 0; i < nregs; i++)
12464 /* We're only using DImode here because it's a convenient size. */
12465 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12466 ops[1] = adjust_address (mem, DImode, 8 * i);
12467 if (reg_overlap_mentioned_p (ops[0], mem))
12469 gcc_assert (overlap == -1);
12474 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12475 output_asm_insn (buff, ops);
12480 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12481 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12482 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12483 output_asm_insn (buff, ops);
12490 templ = "v%smia%%?\t%%m0, %%h1";
12495 sprintf (buff, templ, load ? "ld" : "st");
12496 output_asm_insn (buff, ops);
12501 /* Output an ADD r, s, #n where n may be too big for one instruction.
12502 If adding zero to one register, output nothing. */
12504 output_add_immediate (rtx *operands)
12506 HOST_WIDE_INT n = INTVAL (operands[2]);
12508 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12511 output_multi_immediate (operands,
12512 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12515 output_multi_immediate (operands,
12516 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12523 /* Output a multiple immediate operation.
12524 OPERANDS is the vector of operands referred to in the output patterns.
12525 INSTR1 is the output pattern to use for the first constant.
12526 INSTR2 is the output pattern to use for subsequent constants.
12527 IMMED_OP is the index of the constant slot in OPERANDS.
12528 N is the constant value. */
12529 static const char *
12530 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12531 int immed_op, HOST_WIDE_INT n)
12533 #if HOST_BITS_PER_WIDE_INT > 32
12539 /* Quick and easy output. */
12540 operands[immed_op] = const0_rtx;
12541 output_asm_insn (instr1, operands);
12546 const char * instr = instr1;
12548 /* Note that n is never zero here (which would give no output). */
12549 for (i = 0; i < 32; i += 2)
12553 operands[immed_op] = GEN_INT (n & (255 << i));
12554 output_asm_insn (instr, operands);
12564 /* Return the name of a shifter operation. */
12565 static const char *
12566 arm_shift_nmem(enum rtx_code code)
12571 return ARM_LSL_NAME;
12587 /* Return the appropriate ARM instruction for the operation code.
12588 The returned result should not be overwritten. OP is the rtx of the
12589 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12592 arithmetic_instr (rtx op, int shift_first_arg)
12594 switch (GET_CODE (op))
12600 return shift_first_arg ? "rsb" : "sub";
12615 return arm_shift_nmem(GET_CODE(op));
12618 gcc_unreachable ();
12622 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12623 for the operation code. The returned result should not be overwritten.
12624 OP is the rtx code of the shift.
12625 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12627 static const char *
12628 shift_op (rtx op, HOST_WIDE_INT *amountp)
12631 enum rtx_code code = GET_CODE (op);
12633 switch (GET_CODE (XEXP (op, 1)))
12641 *amountp = INTVAL (XEXP (op, 1));
12645 gcc_unreachable ();
12651 gcc_assert (*amountp != -1);
12652 *amountp = 32 - *amountp;
12655 /* Fall through. */
12661 mnem = arm_shift_nmem(code);
12665 /* We never have to worry about the amount being other than a
12666 power of 2, since this case can never be reloaded from a reg. */
12667 gcc_assert (*amountp != -1);
12668 *amountp = int_log2 (*amountp);
12669 return ARM_LSL_NAME;
12672 gcc_unreachable ();
12675 if (*amountp != -1)
12677 /* This is not 100% correct, but follows from the desire to merge
12678 multiplication by a power of 2 with the recognizer for a
12679 shift. >=32 is not a valid shift for "lsl", so we must try and
12680 output a shift that produces the correct arithmetical result.
12681 Using lsr #32 is identical except for the fact that the carry bit
12682 is not set correctly if we set the flags; but we never use the
12683 carry bit from such an operation, so we can ignore that. */
12684 if (code == ROTATERT)
12685 /* Rotate is just modulo 32. */
12687 else if (*amountp != (*amountp & 31))
12689 if (code == ASHIFT)
12694 /* Shifts of 0 are no-ops. */
12702 /* Obtain the shift from the POWER of two. */
12704 static HOST_WIDE_INT
12705 int_log2 (HOST_WIDE_INT power)
12707 HOST_WIDE_INT shift = 0;
12709 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12711 gcc_assert (shift <= 31);
12718 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12719 because /bin/as is horribly restrictive. The judgement about
12720 whether or not each character is 'printable' (and can be output as
12721 is) or not (and must be printed with an octal escape) must be made
12722 with reference to the *host* character set -- the situation is
12723 similar to that discussed in the comments above pp_c_char in
12724 c-pretty-print.c. */
12726 #define MAX_ASCII_LEN 51
12729 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12732 int len_so_far = 0;
12734 fputs ("\t.ascii\t\"", stream);
12736 for (i = 0; i < len; i++)
12740 if (len_so_far >= MAX_ASCII_LEN)
12742 fputs ("\"\n\t.ascii\t\"", stream);
12748 if (c == '\\' || c == '\"')
12750 putc ('\\', stream);
12758 fprintf (stream, "\\%03o", c);
12763 fputs ("\"\n", stream);
12766 /* Compute the register save mask for registers 0 through 12
12767 inclusive. This code is used by arm_compute_save_reg_mask. */
12769 static unsigned long
12770 arm_compute_save_reg0_reg12_mask (void)
12772 unsigned long func_type = arm_current_func_type ();
12773 unsigned long save_reg_mask = 0;
12776 if (IS_INTERRUPT (func_type))
12778 unsigned int max_reg;
12779 /* Interrupt functions must not corrupt any registers,
12780 even call clobbered ones. If this is a leaf function
12781 we can just examine the registers used by the RTL, but
12782 otherwise we have to assume that whatever function is
12783 called might clobber anything, and so we have to save
12784 all the call-clobbered registers as well. */
12785 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12786 /* FIQ handlers have registers r8 - r12 banked, so
12787 we only need to check r0 - r7, Normal ISRs only
12788 bank r14 and r15, so we must check up to r12.
12789 r13 is the stack pointer which is always preserved,
12790 so we do not need to consider it here. */
12795 for (reg = 0; reg <= max_reg; reg++)
12796 if (df_regs_ever_live_p (reg)
12797 || (! current_function_is_leaf && call_used_regs[reg]))
12798 save_reg_mask |= (1 << reg);
12800 /* Also save the pic base register if necessary. */
12802 && !TARGET_SINGLE_PIC_BASE
12803 && arm_pic_register != INVALID_REGNUM
12804 && crtl->uses_pic_offset_table)
12805 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12807 else if (IS_VOLATILE(func_type))
12809 /* For noreturn functions we historically omitted register saves
12810 altogether. However this really messes up debugging. As a
12811 compromise save just the frame pointers. Combined with the link
12812 register saved elsewhere this should be sufficient to get
12814 if (frame_pointer_needed)
12815 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12816 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12817 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12818 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12819 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12823 /* In the normal case we only need to save those registers
12824 which are call saved and which are used by this function. */
12825 for (reg = 0; reg <= 11; reg++)
12826 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12827 save_reg_mask |= (1 << reg);
12829 /* Handle the frame pointer as a special case. */
12830 if (frame_pointer_needed)
12831 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12833 /* If we aren't loading the PIC register,
12834 don't stack it even though it may be live. */
12836 && !TARGET_SINGLE_PIC_BASE
12837 && arm_pic_register != INVALID_REGNUM
12838 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12839 || crtl->uses_pic_offset_table))
12840 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12842 /* The prologue will copy SP into R0, so save it. */
12843 if (IS_STACKALIGN (func_type))
12844 save_reg_mask |= 1;
12847 /* Save registers so the exception handler can modify them. */
12848 if (crtl->calls_eh_return)
12854 reg = EH_RETURN_DATA_REGNO (i);
12855 if (reg == INVALID_REGNUM)
12857 save_reg_mask |= 1 << reg;
12861 return save_reg_mask;
12865 /* Compute the number of bytes used to store the static chain register on the
12866 stack, above the stack frame. We need to know this accurately to get the
12867 alignment of the rest of the stack frame correct. */
12869 static int arm_compute_static_chain_stack_bytes (void)
12871 unsigned long func_type = arm_current_func_type ();
12872 int static_chain_stack_bytes = 0;
12874 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12875 IS_NESTED (func_type) &&
12876 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12877 static_chain_stack_bytes = 4;
12879 return static_chain_stack_bytes;
12883 /* Compute a bit mask of which registers need to be
12884 saved on the stack for the current function.
12885 This is used by arm_get_frame_offsets, which may add extra registers. */
12887 static unsigned long
12888 arm_compute_save_reg_mask (void)
12890 unsigned int save_reg_mask = 0;
12891 unsigned long func_type = arm_current_func_type ();
12894 if (IS_NAKED (func_type))
12895 /* This should never really happen. */
12898 /* If we are creating a stack frame, then we must save the frame pointer,
12899 IP (which will hold the old stack pointer), LR and the PC. */
12900 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12902 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12905 | (1 << PC_REGNUM);
12907 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12909 /* Decide if we need to save the link register.
12910 Interrupt routines have their own banked link register,
12911 so they never need to save it.
12912 Otherwise if we do not use the link register we do not need to save
12913 it. If we are pushing other registers onto the stack however, we
12914 can save an instruction in the epilogue by pushing the link register
12915 now and then popping it back into the PC. This incurs extra memory
12916 accesses though, so we only do it when optimizing for size, and only
12917 if we know that we will not need a fancy return sequence. */
12918 if (df_regs_ever_live_p (LR_REGNUM)
12921 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12922 && !crtl->calls_eh_return))
12923 save_reg_mask |= 1 << LR_REGNUM;
12925 if (cfun->machine->lr_save_eliminated)
12926 save_reg_mask &= ~ (1 << LR_REGNUM);
12928 if (TARGET_REALLY_IWMMXT
12929 && ((bit_count (save_reg_mask)
12930 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12931 arm_compute_static_chain_stack_bytes())
12934 /* The total number of registers that are going to be pushed
12935 onto the stack is odd. We need to ensure that the stack
12936 is 64-bit aligned before we start to save iWMMXt registers,
12937 and also before we start to create locals. (A local variable
12938 might be a double or long long which we will load/store using
12939 an iWMMXt instruction). Therefore we need to push another
12940 ARM register, so that the stack will be 64-bit aligned. We
12941 try to avoid using the arg registers (r0 -r3) as they might be
12942 used to pass values in a tail call. */
12943 for (reg = 4; reg <= 12; reg++)
12944 if ((save_reg_mask & (1 << reg)) == 0)
12948 save_reg_mask |= (1 << reg);
12951 cfun->machine->sibcall_blocked = 1;
12952 save_reg_mask |= (1 << 3);
12956 /* We may need to push an additional register for use initializing the
12957 PIC base register. */
12958 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12959 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12961 reg = thumb_find_work_register (1 << 4);
12962 if (!call_used_regs[reg])
12963 save_reg_mask |= (1 << reg);
12966 return save_reg_mask;
12970 /* Compute a bit mask of which registers need to be
12971 saved on the stack for the current function. */
12972 static unsigned long
12973 thumb1_compute_save_reg_mask (void)
12975 unsigned long mask;
12979 for (reg = 0; reg < 12; reg ++)
12980 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12984 && !TARGET_SINGLE_PIC_BASE
12985 && arm_pic_register != INVALID_REGNUM
12986 && crtl->uses_pic_offset_table)
12987 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12989 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12990 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12991 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12993 /* LR will also be pushed if any lo regs are pushed. */
12994 if (mask & 0xff || thumb_force_lr_save ())
12995 mask |= (1 << LR_REGNUM);
12997 /* Make sure we have a low work register if we need one.
12998 We will need one if we are going to push a high register,
12999 but we are not currently intending to push a low register. */
13000 if ((mask & 0xff) == 0
13001 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13003 /* Use thumb_find_work_register to choose which register
13004 we will use. If the register is live then we will
13005 have to push it. Use LAST_LO_REGNUM as our fallback
13006 choice for the register to select. */
13007 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13008 /* Make sure the register returned by thumb_find_work_register is
13009 not part of the return value. */
13010 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13011 reg = LAST_LO_REGNUM;
13013 if (! call_used_regs[reg])
13017 /* The 504 below is 8 bytes less than 512 because there are two possible
13018 alignment words. We can't tell here if they will be present or not so we
13019 have to play it safe and assume that they are. */
13020 if ((CALLER_INTERWORKING_SLOT_SIZE +
13021 ROUND_UP_WORD (get_frame_size ()) +
13022 crtl->outgoing_args_size) >= 504)
13024 /* This is the same as the code in thumb1_expand_prologue() which
13025 determines which register to use for stack decrement. */
13026 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13027 if (mask & (1 << reg))
13030 if (reg > LAST_LO_REGNUM)
13032 /* Make sure we have a register available for stack decrement. */
13033 mask |= 1 << LAST_LO_REGNUM;
13041 /* Return the number of bytes required to save VFP registers. */
13043 arm_get_vfp_saved_size (void)
13045 unsigned int regno;
13050 /* Space for saved VFP registers. */
13051 if (TARGET_HARD_FLOAT && TARGET_VFP)
13054 for (regno = FIRST_VFP_REGNUM;
13055 regno < LAST_VFP_REGNUM;
13058 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13059 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13063 /* Workaround ARM10 VFPr1 bug. */
13064 if (count == 2 && !arm_arch6)
13066 saved += count * 8;
13075 if (count == 2 && !arm_arch6)
13077 saved += count * 8;
13084 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13085 everything bar the final return instruction. */
13087 output_return_instruction (rtx operand, int really_return, int reverse)
13089 char conditional[10];
13092 unsigned long live_regs_mask;
13093 unsigned long func_type;
13094 arm_stack_offsets *offsets;
13096 func_type = arm_current_func_type ();
13098 if (IS_NAKED (func_type))
13101 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13103 /* If this function was declared non-returning, and we have
13104 found a tail call, then we have to trust that the called
13105 function won't return. */
13110 /* Otherwise, trap an attempted return by aborting. */
13112 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13114 assemble_external_libcall (ops[1]);
13115 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13121 gcc_assert (!cfun->calls_alloca || really_return);
13123 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13125 cfun->machine->return_used_this_function = 1;
13127 offsets = arm_get_frame_offsets ();
13128 live_regs_mask = offsets->saved_regs_mask;
13130 if (live_regs_mask)
13132 const char * return_reg;
13134 /* If we do not have any special requirements for function exit
13135 (e.g. interworking) then we can load the return address
13136 directly into the PC. Otherwise we must load it into LR. */
13138 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13139 return_reg = reg_names[PC_REGNUM];
13141 return_reg = reg_names[LR_REGNUM];
13143 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13145 /* There are three possible reasons for the IP register
13146 being saved. 1) a stack frame was created, in which case
13147 IP contains the old stack pointer, or 2) an ISR routine
13148 corrupted it, or 3) it was saved to align the stack on
13149 iWMMXt. In case 1, restore IP into SP, otherwise just
13151 if (frame_pointer_needed)
13153 live_regs_mask &= ~ (1 << IP_REGNUM);
13154 live_regs_mask |= (1 << SP_REGNUM);
13157 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13160 /* On some ARM architectures it is faster to use LDR rather than
13161 LDM to load a single register. On other architectures, the
13162 cost is the same. In 26 bit mode, or for exception handlers,
13163 we have to use LDM to load the PC so that the CPSR is also
13165 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13166 if (live_regs_mask == (1U << reg))
13169 if (reg <= LAST_ARM_REGNUM
13170 && (reg != LR_REGNUM
13172 || ! IS_INTERRUPT (func_type)))
13174 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13175 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13182 /* Generate the load multiple instruction to restore the
13183 registers. Note we can get here, even if
13184 frame_pointer_needed is true, but only if sp already
13185 points to the base of the saved core registers. */
13186 if (live_regs_mask & (1 << SP_REGNUM))
13188 unsigned HOST_WIDE_INT stack_adjust;
13190 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13191 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13193 if (stack_adjust && arm_arch5 && TARGET_ARM)
13194 if (TARGET_UNIFIED_ASM)
13195 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13197 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13200 /* If we can't use ldmib (SA110 bug),
13201 then try to pop r3 instead. */
13203 live_regs_mask |= 1 << 3;
13205 if (TARGET_UNIFIED_ASM)
13206 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13208 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13212 if (TARGET_UNIFIED_ASM)
13213 sprintf (instr, "pop%s\t{", conditional);
13215 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13217 p = instr + strlen (instr);
13219 for (reg = 0; reg <= SP_REGNUM; reg++)
13220 if (live_regs_mask & (1 << reg))
13222 int l = strlen (reg_names[reg]);
13228 memcpy (p, ", ", 2);
13232 memcpy (p, "%|", 2);
13233 memcpy (p + 2, reg_names[reg], l);
13237 if (live_regs_mask & (1 << LR_REGNUM))
13239 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13240 /* If returning from an interrupt, restore the CPSR. */
13241 if (IS_INTERRUPT (func_type))
13248 output_asm_insn (instr, & operand);
13250 /* See if we need to generate an extra instruction to
13251 perform the actual function return. */
13253 && func_type != ARM_FT_INTERWORKED
13254 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13256 /* The return has already been handled
13257 by loading the LR into the PC. */
13264 switch ((int) ARM_FUNC_TYPE (func_type))
13268 /* ??? This is wrong for unified assembly syntax. */
13269 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13272 case ARM_FT_INTERWORKED:
13273 sprintf (instr, "bx%s\t%%|lr", conditional);
13276 case ARM_FT_EXCEPTION:
13277 /* ??? This is wrong for unified assembly syntax. */
13278 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13282 /* Use bx if it's available. */
13283 if (arm_arch5 || arm_arch4t)
13284 sprintf (instr, "bx%s\t%%|lr", conditional);
13286 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13290 output_asm_insn (instr, & operand);
13296 /* Write the function name into the code section, directly preceding
13297 the function prologue.
13299 Code will be output similar to this:
13301 .ascii "arm_poke_function_name", 0
13304 .word 0xff000000 + (t1 - t0)
13305 arm_poke_function_name
13307 stmfd sp!, {fp, ip, lr, pc}
13310 When performing a stack backtrace, code can inspect the value
13311 of 'pc' stored at 'fp' + 0. If the trace function then looks
13312 at location pc - 12 and the top 8 bits are set, then we know
13313 that there is a function name embedded immediately preceding this
13314 location and has length ((pc[-3]) & 0xff000000).
13316 We assume that pc is declared as a pointer to an unsigned long.
13318 It is of no benefit to output the function name if we are assembling
13319 a leaf function. These function types will not contain a stack
13320 backtrace structure, therefore it is not possible to determine the
13323 arm_poke_function_name (FILE *stream, const char *name)
13325 unsigned long alignlength;
13326 unsigned long length;
13329 length = strlen (name) + 1;
13330 alignlength = ROUND_UP_WORD (length);
13332 ASM_OUTPUT_ASCII (stream, name, length);
13333 ASM_OUTPUT_ALIGN (stream, 2);
13334 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13335 assemble_aligned_integer (UNITS_PER_WORD, x);
13338 /* Place some comments into the assembler stream
13339 describing the current function. */
13341 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13343 unsigned long func_type;
13347 thumb1_output_function_prologue (f, frame_size);
13351 /* Sanity check. */
13352 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13354 func_type = arm_current_func_type ();
13356 switch ((int) ARM_FUNC_TYPE (func_type))
13359 case ARM_FT_NORMAL:
13361 case ARM_FT_INTERWORKED:
13362 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13365 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13368 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13370 case ARM_FT_EXCEPTION:
13371 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13375 if (IS_NAKED (func_type))
13376 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13378 if (IS_VOLATILE (func_type))
13379 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13381 if (IS_NESTED (func_type))
13382 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13383 if (IS_STACKALIGN (func_type))
13384 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13386 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13388 crtl->args.pretend_args_size, frame_size);
13390 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13391 frame_pointer_needed,
13392 cfun->machine->uses_anonymous_args);
13394 if (cfun->machine->lr_save_eliminated)
13395 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13397 if (crtl->calls_eh_return)
13398 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13403 arm_output_epilogue (rtx sibling)
13406 unsigned long saved_regs_mask;
13407 unsigned long func_type;
13408 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13409 frame that is $fp + 4 for a non-variadic function. */
13410 int floats_offset = 0;
13412 FILE * f = asm_out_file;
13413 unsigned int lrm_count = 0;
13414 int really_return = (sibling == NULL);
13416 arm_stack_offsets *offsets;
13418 /* If we have already generated the return instruction
13419 then it is futile to generate anything else. */
13420 if (use_return_insn (FALSE, sibling) &&
13421 (cfun->machine->return_used_this_function != 0))
13424 func_type = arm_current_func_type ();
13426 if (IS_NAKED (func_type))
13427 /* Naked functions don't have epilogues. */
13430 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13434 /* A volatile function should never return. Call abort. */
13435 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13436 assemble_external_libcall (op);
13437 output_asm_insn ("bl\t%a0", &op);
13442 /* If we are throwing an exception, then we really must be doing a
13443 return, so we can't tail-call. */
13444 gcc_assert (!crtl->calls_eh_return || really_return);
13446 offsets = arm_get_frame_offsets ();
13447 saved_regs_mask = offsets->saved_regs_mask;
13450 lrm_count = bit_count (saved_regs_mask);
13452 floats_offset = offsets->saved_args;
13453 /* Compute how far away the floats will be. */
13454 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13455 if (saved_regs_mask & (1 << reg))
13456 floats_offset += 4;
13458 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13460 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13461 int vfp_offset = offsets->frame;
13463 if (TARGET_FPA_EMU2)
13465 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13466 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13468 floats_offset += 12;
13469 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13470 reg, FP_REGNUM, floats_offset - vfp_offset);
13475 start_reg = LAST_FPA_REGNUM;
13477 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13479 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13481 floats_offset += 12;
13483 /* We can't unstack more than four registers at once. */
13484 if (start_reg - reg == 3)
13486 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13487 reg, FP_REGNUM, floats_offset - vfp_offset);
13488 start_reg = reg - 1;
13493 if (reg != start_reg)
13494 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13495 reg + 1, start_reg - reg,
13496 FP_REGNUM, floats_offset - vfp_offset);
13497 start_reg = reg - 1;
13501 /* Just in case the last register checked also needs unstacking. */
13502 if (reg != start_reg)
13503 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13504 reg + 1, start_reg - reg,
13505 FP_REGNUM, floats_offset - vfp_offset);
13508 if (TARGET_HARD_FLOAT && TARGET_VFP)
13512 /* The fldmd insns do not have base+offset addressing
13513 modes, so we use IP to hold the address. */
13514 saved_size = arm_get_vfp_saved_size ();
13516 if (saved_size > 0)
13518 floats_offset += saved_size;
13519 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13520 FP_REGNUM, floats_offset - vfp_offset);
13522 start_reg = FIRST_VFP_REGNUM;
13523 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13525 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13526 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13528 if (start_reg != reg)
13529 vfp_output_fldmd (f, IP_REGNUM,
13530 (start_reg - FIRST_VFP_REGNUM) / 2,
13531 (reg - start_reg) / 2);
13532 start_reg = reg + 2;
13535 if (start_reg != reg)
13536 vfp_output_fldmd (f, IP_REGNUM,
13537 (start_reg - FIRST_VFP_REGNUM) / 2,
13538 (reg - start_reg) / 2);
13543 /* The frame pointer is guaranteed to be non-double-word aligned.
13544 This is because it is set to (old_stack_pointer - 4) and the
13545 old_stack_pointer was double word aligned. Thus the offset to
13546 the iWMMXt registers to be loaded must also be non-double-word
13547 sized, so that the resultant address *is* double-word aligned.
13548 We can ignore floats_offset since that was already included in
13549 the live_regs_mask. */
13550 lrm_count += (lrm_count % 2 ? 2 : 1);
13552 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13553 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13555 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13556 reg, FP_REGNUM, lrm_count * 4);
13561 /* saved_regs_mask should contain the IP, which at the time of stack
13562 frame generation actually contains the old stack pointer. So a
13563 quick way to unwind the stack is just pop the IP register directly
13564 into the stack pointer. */
13565 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13566 saved_regs_mask &= ~ (1 << IP_REGNUM);
13567 saved_regs_mask |= (1 << SP_REGNUM);
13569 /* There are two registers left in saved_regs_mask - LR and PC. We
13570 only need to restore the LR register (the return address), but to
13571 save time we can load it directly into the PC, unless we need a
13572 special function exit sequence, or we are not really returning. */
13574 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13575 && !crtl->calls_eh_return)
13576 /* Delete the LR from the register mask, so that the LR on
13577 the stack is loaded into the PC in the register mask. */
13578 saved_regs_mask &= ~ (1 << LR_REGNUM);
13580 saved_regs_mask &= ~ (1 << PC_REGNUM);
13582 /* We must use SP as the base register, because SP is one of the
13583 registers being restored. If an interrupt or page fault
13584 happens in the ldm instruction, the SP might or might not
13585 have been restored. That would be bad, as then SP will no
13586 longer indicate the safe area of stack, and we can get stack
13587 corruption. Using SP as the base register means that it will
13588 be reset correctly to the original value, should an interrupt
13589 occur. If the stack pointer already points at the right
13590 place, then omit the subtraction. */
13591 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13592 || cfun->calls_alloca)
13593 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13594 4 * bit_count (saved_regs_mask));
13595 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13597 if (IS_INTERRUPT (func_type))
13598 /* Interrupt handlers will have pushed the
13599 IP onto the stack, so restore it now. */
13600 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13604 /* This branch is executed for ARM mode (non-apcs frames) and
13605 Thumb-2 mode. Frame layout is essentially the same for those
13606 cases, except that in ARM mode frame pointer points to the
13607 first saved register, while in Thumb-2 mode the frame pointer points
13608 to the last saved register.
13610 It is possible to make frame pointer point to last saved
13611 register in both cases, and remove some conditionals below.
13612 That means that fp setup in prologue would be just "mov fp, sp"
13613 and sp restore in epilogue would be just "mov sp, fp", whereas
13614 now we have to use add/sub in those cases. However, the value
13615 of that would be marginal, as both mov and add/sub are 32-bit
13616 in ARM mode, and it would require extra conditionals
13617 in arm_expand_prologue to distingish ARM-apcs-frame case
13618 (where frame pointer is required to point at first register)
13619 and ARM-non-apcs-frame. Therefore, such change is postponed
13620 until real need arise. */
13621 unsigned HOST_WIDE_INT amount;
13623 /* Restore stack pointer if necessary. */
13624 if (TARGET_ARM && frame_pointer_needed)
13626 operands[0] = stack_pointer_rtx;
13627 operands[1] = hard_frame_pointer_rtx;
13629 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13630 output_add_immediate (operands);
13634 if (frame_pointer_needed)
13636 /* For Thumb-2 restore sp from the frame pointer.
13637 Operand restrictions mean we have to incrememnt FP, then copy
13639 amount = offsets->locals_base - offsets->saved_regs;
13640 operands[0] = hard_frame_pointer_rtx;
13644 unsigned long count;
13645 operands[0] = stack_pointer_rtx;
13646 amount = offsets->outgoing_args - offsets->saved_regs;
13647 /* pop call clobbered registers if it avoids a
13648 separate stack adjustment. */
13649 count = offsets->saved_regs - offsets->saved_args;
13652 && !crtl->calls_eh_return
13653 && bit_count(saved_regs_mask) * 4 == count
13654 && !IS_INTERRUPT (func_type)
13655 && !crtl->tail_call_emit)
13657 unsigned long mask;
13658 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13660 mask &= ~saved_regs_mask;
13662 while (bit_count (mask) * 4 > amount)
13664 while ((mask & (1 << reg)) == 0)
13666 mask &= ~(1 << reg);
13668 if (bit_count (mask) * 4 == amount) {
13670 saved_regs_mask |= mask;
13677 operands[1] = operands[0];
13678 operands[2] = GEN_INT (amount);
13679 output_add_immediate (operands);
13681 if (frame_pointer_needed)
13682 asm_fprintf (f, "\tmov\t%r, %r\n",
13683 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13686 if (TARGET_FPA_EMU2)
13688 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13689 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13690 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13695 start_reg = FIRST_FPA_REGNUM;
13697 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13699 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13701 if (reg - start_reg == 3)
13703 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13704 start_reg, SP_REGNUM);
13705 start_reg = reg + 1;
13710 if (reg != start_reg)
13711 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13712 start_reg, reg - start_reg,
13715 start_reg = reg + 1;
13719 /* Just in case the last register checked also needs unstacking. */
13720 if (reg != start_reg)
13721 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13722 start_reg, reg - start_reg, SP_REGNUM);
13725 if (TARGET_HARD_FLOAT && TARGET_VFP)
13727 int end_reg = LAST_VFP_REGNUM + 1;
13729 /* Scan the registers in reverse order. We need to match
13730 any groupings made in the prologue and generate matching
13732 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13734 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13735 && (!df_regs_ever_live_p (reg + 1)
13736 || call_used_regs[reg + 1]))
13738 if (end_reg > reg + 2)
13739 vfp_output_fldmd (f, SP_REGNUM,
13740 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13741 (end_reg - (reg + 2)) / 2);
13745 if (end_reg > reg + 2)
13746 vfp_output_fldmd (f, SP_REGNUM, 0,
13747 (end_reg - (reg + 2)) / 2);
13751 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13752 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13753 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13755 /* If we can, restore the LR into the PC. */
13756 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13757 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13758 && !IS_STACKALIGN (func_type)
13760 && crtl->args.pretend_args_size == 0
13761 && saved_regs_mask & (1 << LR_REGNUM)
13762 && !crtl->calls_eh_return)
13764 saved_regs_mask &= ~ (1 << LR_REGNUM);
13765 saved_regs_mask |= (1 << PC_REGNUM);
13766 rfe = IS_INTERRUPT (func_type);
13771 /* Load the registers off the stack. If we only have one register
13772 to load use the LDR instruction - it is faster. For Thumb-2
13773 always use pop and the assembler will pick the best instruction.*/
13774 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13775 && !IS_INTERRUPT(func_type))
13777 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13779 else if (saved_regs_mask)
13781 if (saved_regs_mask & (1 << SP_REGNUM))
13782 /* Note - write back to the stack register is not enabled
13783 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13784 in the list of registers and if we add writeback the
13785 instruction becomes UNPREDICTABLE. */
13786 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13788 else if (TARGET_ARM)
13789 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13792 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13795 if (crtl->args.pretend_args_size)
13797 /* Unwind the pre-pushed regs. */
13798 operands[0] = operands[1] = stack_pointer_rtx;
13799 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13800 output_add_immediate (operands);
13804 /* We may have already restored PC directly from the stack. */
13805 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13808 /* Stack adjustment for exception handler. */
13809 if (crtl->calls_eh_return)
13810 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13811 ARM_EH_STACKADJ_REGNUM);
13813 /* Generate the return instruction. */
13814 switch ((int) ARM_FUNC_TYPE (func_type))
13818 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13821 case ARM_FT_EXCEPTION:
13822 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13825 case ARM_FT_INTERWORKED:
13826 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13830 if (IS_STACKALIGN (func_type))
13832 /* See comment in arm_expand_prologue. */
13833 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13835 if (arm_arch5 || arm_arch4t)
13836 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13838 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13846 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13847 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13849 arm_stack_offsets *offsets;
13855 /* Emit any call-via-reg trampolines that are needed for v4t support
13856 of call_reg and call_value_reg type insns. */
13857 for (regno = 0; regno < LR_REGNUM; regno++)
13859 rtx label = cfun->machine->call_via[regno];
13863 switch_to_section (function_section (current_function_decl));
13864 targetm.asm_out.internal_label (asm_out_file, "L",
13865 CODE_LABEL_NUMBER (label));
13866 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13870 /* ??? Probably not safe to set this here, since it assumes that a
13871 function will be emitted as assembly immediately after we generate
13872 RTL for it. This does not happen for inline functions. */
13873 cfun->machine->return_used_this_function = 0;
13875 else /* TARGET_32BIT */
13877 /* We need to take into account any stack-frame rounding. */
13878 offsets = arm_get_frame_offsets ();
13880 gcc_assert (!use_return_insn (FALSE, NULL)
13881 || (cfun->machine->return_used_this_function != 0)
13882 || offsets->saved_regs == offsets->outgoing_args
13883 || frame_pointer_needed);
13885 /* Reset the ARM-specific per-function variables. */
13886 after_arm_reorg = 0;
13890 /* Generate and emit an insn that we will recognize as a push_multi.
13891 Unfortunately, since this insn does not reflect very well the actual
13892 semantics of the operation, we need to annotate the insn for the benefit
13893 of DWARF2 frame unwind information. */
13895 emit_multi_reg_push (unsigned long mask)
13898 int num_dwarf_regs;
13902 int dwarf_par_index;
13905 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13906 if (mask & (1 << i))
13909 gcc_assert (num_regs && num_regs <= 16);
13911 /* We don't record the PC in the dwarf frame information. */
13912 num_dwarf_regs = num_regs;
13913 if (mask & (1 << PC_REGNUM))
13916 /* For the body of the insn we are going to generate an UNSPEC in
13917 parallel with several USEs. This allows the insn to be recognized
13918 by the push_multi pattern in the arm.md file.
13920 The body of the insn looks something like this:
13923 (set (mem:BLK (pre_modify:SI (reg:SI sp)
13924 (const_int:SI <num>)))
13925 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13931 For the frame note however, we try to be more explicit and actually
13932 show each register being stored into the stack frame, plus a (single)
13933 decrement of the stack pointer. We do it this way in order to be
13934 friendly to the stack unwinding code, which only wants to see a single
13935 stack decrement per instruction. The RTL we generate for the note looks
13936 something like this:
13939 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13940 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13941 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
13942 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
13946 FIXME:: In an ideal world the PRE_MODIFY would not exist and
13947 instead we'd have a parallel expression detailing all
13948 the stores to the various memory addresses so that debug
13949 information is more up-to-date. Remember however while writing
13950 this to take care of the constraints with the push instruction.
13952 Note also that this has to be taken care of for the VFP registers.
13954 For more see PR43399. */
13956 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13957 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13958 dwarf_par_index = 1;
13960 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13962 if (mask & (1 << i))
13964 reg = gen_rtx_REG (SImode, i);
13966 XVECEXP (par, 0, 0)
13967 = gen_rtx_SET (VOIDmode,
13970 gen_rtx_PRE_MODIFY (Pmode,
13973 (stack_pointer_rtx,
13976 gen_rtx_UNSPEC (BLKmode,
13977 gen_rtvec (1, reg),
13978 UNSPEC_PUSH_MULT));
13980 if (i != PC_REGNUM)
13982 tmp = gen_rtx_SET (VOIDmode,
13983 gen_frame_mem (SImode, stack_pointer_rtx),
13985 RTX_FRAME_RELATED_P (tmp) = 1;
13986 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13994 for (j = 1, i++; j < num_regs; i++)
13996 if (mask & (1 << i))
13998 reg = gen_rtx_REG (SImode, i);
14000 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14002 if (i != PC_REGNUM)
14005 = gen_rtx_SET (VOIDmode,
14008 plus_constant (stack_pointer_rtx,
14011 RTX_FRAME_RELATED_P (tmp) = 1;
14012 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14019 par = emit_insn (par);
14021 tmp = gen_rtx_SET (VOIDmode,
14023 plus_constant (stack_pointer_rtx, -4 * num_regs));
14024 RTX_FRAME_RELATED_P (tmp) = 1;
14025 XVECEXP (dwarf, 0, 0) = tmp;
14027 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14032 /* Calculate the size of the return value that is passed in registers. */
14034 arm_size_return_regs (void)
14036 enum machine_mode mode;
14038 if (crtl->return_rtx != 0)
14039 mode = GET_MODE (crtl->return_rtx);
14041 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14043 return GET_MODE_SIZE (mode);
14047 emit_sfm (int base_reg, int count)
14054 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14055 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14057 reg = gen_rtx_REG (XFmode, base_reg++);
14059 XVECEXP (par, 0, 0)
14060 = gen_rtx_SET (VOIDmode,
14063 gen_rtx_PRE_MODIFY (Pmode,
14066 (stack_pointer_rtx,
14069 gen_rtx_UNSPEC (BLKmode,
14070 gen_rtvec (1, reg),
14071 UNSPEC_PUSH_MULT));
14072 tmp = gen_rtx_SET (VOIDmode,
14073 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14074 RTX_FRAME_RELATED_P (tmp) = 1;
14075 XVECEXP (dwarf, 0, 1) = tmp;
14077 for (i = 1; i < count; i++)
14079 reg = gen_rtx_REG (XFmode, base_reg++);
14080 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14082 tmp = gen_rtx_SET (VOIDmode,
14083 gen_frame_mem (XFmode,
14084 plus_constant (stack_pointer_rtx,
14087 RTX_FRAME_RELATED_P (tmp) = 1;
14088 XVECEXP (dwarf, 0, i + 1) = tmp;
14091 tmp = gen_rtx_SET (VOIDmode,
14093 plus_constant (stack_pointer_rtx, -12 * count));
14095 RTX_FRAME_RELATED_P (tmp) = 1;
14096 XVECEXP (dwarf, 0, 0) = tmp;
14098 par = emit_insn (par);
14099 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14105 /* Return true if the current function needs to save/restore LR. */
14108 thumb_force_lr_save (void)
14110 return !cfun->machine->lr_save_eliminated
14111 && (!leaf_function_p ()
14112 || thumb_far_jump_used_p ()
14113 || df_regs_ever_live_p (LR_REGNUM));
14117 /* Compute the distance from register FROM to register TO.
14118 These can be the arg pointer (26), the soft frame pointer (25),
14119 the stack pointer (13) or the hard frame pointer (11).
14120 In thumb mode r7 is used as the soft frame pointer, if needed.
14121 Typical stack layout looks like this:
14123 old stack pointer -> | |
14126 | | saved arguments for
14127 | | vararg functions
14130 hard FP & arg pointer -> | | \
14138 soft frame pointer -> | | /
14143 locals base pointer -> | | /
14148 current stack pointer -> | | /
14151 For a given function some or all of these stack components
14152 may not be needed, giving rise to the possibility of
14153 eliminating some of the registers.
14155 The values returned by this function must reflect the behavior
14156 of arm_expand_prologue() and arm_compute_save_reg_mask().
14158 The sign of the number returned reflects the direction of stack
14159 growth, so the values are positive for all eliminations except
14160 from the soft frame pointer to the hard frame pointer.
14162 SFP may point just inside the local variables block to ensure correct
14166 /* Calculate stack offsets. These are used to calculate register elimination
14167 offsets and in prologue/epilogue code. Also calculates which registers
14168 should be saved. */
14170 static arm_stack_offsets *
14171 arm_get_frame_offsets (void)
14173 struct arm_stack_offsets *offsets;
14174 unsigned long func_type;
14178 HOST_WIDE_INT frame_size;
14181 offsets = &cfun->machine->stack_offsets;
14183 /* We need to know if we are a leaf function. Unfortunately, it
14184 is possible to be called after start_sequence has been called,
14185 which causes get_insns to return the insns for the sequence,
14186 not the function, which will cause leaf_function_p to return
14187 the incorrect result.
14189 to know about leaf functions once reload has completed, and the
14190 frame size cannot be changed after that time, so we can safely
14191 use the cached value. */
14193 if (reload_completed)
14196 /* Initially this is the size of the local variables. It will translated
14197 into an offset once we have determined the size of preceding data. */
14198 frame_size = ROUND_UP_WORD (get_frame_size ());
14200 leaf = leaf_function_p ();
14202 /* Space for variadic functions. */
14203 offsets->saved_args = crtl->args.pretend_args_size;
14205 /* In Thumb mode this is incorrect, but never used. */
14206 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14207 arm_compute_static_chain_stack_bytes();
14211 unsigned int regno;
14213 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14214 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14215 saved = core_saved;
14217 /* We know that SP will be doubleword aligned on entry, and we must
14218 preserve that condition at any subroutine call. We also require the
14219 soft frame pointer to be doubleword aligned. */
14221 if (TARGET_REALLY_IWMMXT)
14223 /* Check for the call-saved iWMMXt registers. */
14224 for (regno = FIRST_IWMMXT_REGNUM;
14225 regno <= LAST_IWMMXT_REGNUM;
14227 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14231 func_type = arm_current_func_type ();
14232 if (! IS_VOLATILE (func_type))
14234 /* Space for saved FPA registers. */
14235 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14236 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14239 /* Space for saved VFP registers. */
14240 if (TARGET_HARD_FLOAT && TARGET_VFP)
14241 saved += arm_get_vfp_saved_size ();
14244 else /* TARGET_THUMB1 */
14246 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14247 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14248 saved = core_saved;
14249 if (TARGET_BACKTRACE)
14253 /* Saved registers include the stack frame. */
14254 offsets->saved_regs = offsets->saved_args + saved +
14255 arm_compute_static_chain_stack_bytes();
14256 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14257 /* A leaf function does not need any stack alignment if it has nothing
14259 if (leaf && frame_size == 0)
14261 offsets->outgoing_args = offsets->soft_frame;
14262 offsets->locals_base = offsets->soft_frame;
14266 /* Ensure SFP has the correct alignment. */
14267 if (ARM_DOUBLEWORD_ALIGN
14268 && (offsets->soft_frame & 7))
14270 offsets->soft_frame += 4;
14271 /* Try to align stack by pushing an extra reg. Don't bother doing this
14272 when there is a stack frame as the alignment will be rolled into
14273 the normal stack adjustment. */
14274 if (frame_size + crtl->outgoing_args_size == 0)
14278 /* If it is safe to use r3, then do so. This sometimes
14279 generates better code on Thumb-2 by avoiding the need to
14280 use 32-bit push/pop instructions. */
14281 if (!crtl->tail_call_emit
14282 && arm_size_return_regs () <= 12)
14287 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14289 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14298 offsets->saved_regs += 4;
14299 offsets->saved_regs_mask |= (1 << reg);
14304 offsets->locals_base = offsets->soft_frame + frame_size;
14305 offsets->outgoing_args = (offsets->locals_base
14306 + crtl->outgoing_args_size);
14308 if (ARM_DOUBLEWORD_ALIGN)
14310 /* Ensure SP remains doubleword aligned. */
14311 if (offsets->outgoing_args & 7)
14312 offsets->outgoing_args += 4;
14313 gcc_assert (!(offsets->outgoing_args & 7));
14320 /* Calculate the relative offsets for the different stack pointers. Positive
14321 offsets are in the direction of stack growth. */
14324 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14326 arm_stack_offsets *offsets;
14328 offsets = arm_get_frame_offsets ();
14330 /* OK, now we have enough information to compute the distances.
14331 There must be an entry in these switch tables for each pair
14332 of registers in ELIMINABLE_REGS, even if some of the entries
14333 seem to be redundant or useless. */
14336 case ARG_POINTER_REGNUM:
14339 case THUMB_HARD_FRAME_POINTER_REGNUM:
14342 case FRAME_POINTER_REGNUM:
14343 /* This is the reverse of the soft frame pointer
14344 to hard frame pointer elimination below. */
14345 return offsets->soft_frame - offsets->saved_args;
14347 case ARM_HARD_FRAME_POINTER_REGNUM:
14348 /* This is only non-zero in the case where the static chain register
14349 is stored above the frame. */
14350 return offsets->frame - offsets->saved_args - 4;
14352 case STACK_POINTER_REGNUM:
14353 /* If nothing has been pushed on the stack at all
14354 then this will return -4. This *is* correct! */
14355 return offsets->outgoing_args - (offsets->saved_args + 4);
14358 gcc_unreachable ();
14360 gcc_unreachable ();
14362 case FRAME_POINTER_REGNUM:
14365 case THUMB_HARD_FRAME_POINTER_REGNUM:
14368 case ARM_HARD_FRAME_POINTER_REGNUM:
14369 /* The hard frame pointer points to the top entry in the
14370 stack frame. The soft frame pointer to the bottom entry
14371 in the stack frame. If there is no stack frame at all,
14372 then they are identical. */
14374 return offsets->frame - offsets->soft_frame;
14376 case STACK_POINTER_REGNUM:
14377 return offsets->outgoing_args - offsets->soft_frame;
14380 gcc_unreachable ();
14382 gcc_unreachable ();
14385 /* You cannot eliminate from the stack pointer.
14386 In theory you could eliminate from the hard frame
14387 pointer to the stack pointer, but this will never
14388 happen, since if a stack frame is not needed the
14389 hard frame pointer will never be used. */
14390 gcc_unreachable ();
14394 /* Given FROM and TO register numbers, say whether this elimination is
14395 allowed. Frame pointer elimination is automatically handled.
14397 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14398 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14399 pointer, we must eliminate FRAME_POINTER_REGNUM into
14400 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14401 ARG_POINTER_REGNUM. */
14404 arm_can_eliminate (const int from, const int to)
14406 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14407 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14408 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14409 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14413 /* Emit RTL to save coprocessor registers on function entry. Returns the
14414 number of bytes pushed. */
14417 arm_save_coproc_regs(void)
14419 int saved_size = 0;
14421 unsigned start_reg;
14424 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14425 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14427 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14428 insn = gen_rtx_MEM (V2SImode, insn);
14429 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14430 RTX_FRAME_RELATED_P (insn) = 1;
14434 /* Save any floating point call-saved registers used by this
14436 if (TARGET_FPA_EMU2)
14438 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14439 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14441 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14442 insn = gen_rtx_MEM (XFmode, insn);
14443 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14444 RTX_FRAME_RELATED_P (insn) = 1;
14450 start_reg = LAST_FPA_REGNUM;
14452 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14454 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14456 if (start_reg - reg == 3)
14458 insn = emit_sfm (reg, 4);
14459 RTX_FRAME_RELATED_P (insn) = 1;
14461 start_reg = reg - 1;
14466 if (start_reg != reg)
14468 insn = emit_sfm (reg + 1, start_reg - reg);
14469 RTX_FRAME_RELATED_P (insn) = 1;
14470 saved_size += (start_reg - reg) * 12;
14472 start_reg = reg - 1;
14476 if (start_reg != reg)
14478 insn = emit_sfm (reg + 1, start_reg - reg);
14479 saved_size += (start_reg - reg) * 12;
14480 RTX_FRAME_RELATED_P (insn) = 1;
14483 if (TARGET_HARD_FLOAT && TARGET_VFP)
14485 start_reg = FIRST_VFP_REGNUM;
14487 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14489 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14490 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14492 if (start_reg != reg)
14493 saved_size += vfp_emit_fstmd (start_reg,
14494 (reg - start_reg) / 2);
14495 start_reg = reg + 2;
14498 if (start_reg != reg)
14499 saved_size += vfp_emit_fstmd (start_reg,
14500 (reg - start_reg) / 2);
14506 /* Set the Thumb frame pointer from the stack pointer. */
14509 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14511 HOST_WIDE_INT amount;
14514 amount = offsets->outgoing_args - offsets->locals_base;
14516 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14517 stack_pointer_rtx, GEN_INT (amount)));
14520 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14521 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14522 expects the first two operands to be the same. */
14525 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14527 hard_frame_pointer_rtx));
14531 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14532 hard_frame_pointer_rtx,
14533 stack_pointer_rtx));
14535 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14536 plus_constant (stack_pointer_rtx, amount));
14537 RTX_FRAME_RELATED_P (dwarf) = 1;
14538 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14541 RTX_FRAME_RELATED_P (insn) = 1;
14544 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14547 arm_expand_prologue (void)
14552 unsigned long live_regs_mask;
14553 unsigned long func_type;
14555 int saved_pretend_args = 0;
14556 int saved_regs = 0;
14557 unsigned HOST_WIDE_INT args_to_push;
14558 arm_stack_offsets *offsets;
14560 func_type = arm_current_func_type ();
14562 /* Naked functions don't have prologues. */
14563 if (IS_NAKED (func_type))
14566 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14567 args_to_push = crtl->args.pretend_args_size;
14569 /* Compute which register we will have to save onto the stack. */
14570 offsets = arm_get_frame_offsets ();
14571 live_regs_mask = offsets->saved_regs_mask;
14573 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14575 if (IS_STACKALIGN (func_type))
14580 /* Handle a word-aligned stack pointer. We generate the following:
14585 <save and restore r0 in normal prologue/epilogue>
14589 The unwinder doesn't need to know about the stack realignment.
14590 Just tell it we saved SP in r0. */
14591 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14593 r0 = gen_rtx_REG (SImode, 0);
14594 r1 = gen_rtx_REG (SImode, 1);
14595 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14596 compiler won't choke. */
14597 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14598 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14599 insn = gen_movsi (r0, stack_pointer_rtx);
14600 RTX_FRAME_RELATED_P (insn) = 1;
14601 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14603 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14604 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14607 /* For APCS frames, if IP register is clobbered
14608 when creating frame, save that register in a special
14610 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14612 if (IS_INTERRUPT (func_type))
14614 /* Interrupt functions must not corrupt any registers.
14615 Creating a frame pointer however, corrupts the IP
14616 register, so we must push it first. */
14617 insn = emit_multi_reg_push (1 << IP_REGNUM);
14619 /* Do not set RTX_FRAME_RELATED_P on this insn.
14620 The dwarf stack unwinding code only wants to see one
14621 stack decrement per function, and this is not it. If
14622 this instruction is labeled as being part of the frame
14623 creation sequence then dwarf2out_frame_debug_expr will
14624 die when it encounters the assignment of IP to FP
14625 later on, since the use of SP here establishes SP as
14626 the CFA register and not IP.
14628 Anyway this instruction is not really part of the stack
14629 frame creation although it is part of the prologue. */
14631 else if (IS_NESTED (func_type))
14633 /* The Static chain register is the same as the IP register
14634 used as a scratch register during stack frame creation.
14635 To get around this need to find somewhere to store IP
14636 whilst the frame is being created. We try the following
14639 1. The last argument register.
14640 2. A slot on the stack above the frame. (This only
14641 works if the function is not a varargs function).
14642 3. Register r3, after pushing the argument registers
14645 Note - we only need to tell the dwarf2 backend about the SP
14646 adjustment in the second variant; the static chain register
14647 doesn't need to be unwound, as it doesn't contain a value
14648 inherited from the caller. */
14650 if (df_regs_ever_live_p (3) == false)
14651 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14652 else if (args_to_push == 0)
14656 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14659 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14660 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14663 /* Just tell the dwarf backend that we adjusted SP. */
14664 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14665 plus_constant (stack_pointer_rtx,
14667 RTX_FRAME_RELATED_P (insn) = 1;
14668 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14672 /* Store the args on the stack. */
14673 if (cfun->machine->uses_anonymous_args)
14674 insn = emit_multi_reg_push
14675 ((0xf0 >> (args_to_push / 4)) & 0xf);
14678 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14679 GEN_INT (- args_to_push)));
14681 RTX_FRAME_RELATED_P (insn) = 1;
14683 saved_pretend_args = 1;
14684 fp_offset = args_to_push;
14687 /* Now reuse r3 to preserve IP. */
14688 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14692 insn = emit_set_insn (ip_rtx,
14693 plus_constant (stack_pointer_rtx, fp_offset));
14694 RTX_FRAME_RELATED_P (insn) = 1;
14699 /* Push the argument registers, or reserve space for them. */
14700 if (cfun->machine->uses_anonymous_args)
14701 insn = emit_multi_reg_push
14702 ((0xf0 >> (args_to_push / 4)) & 0xf);
14705 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14706 GEN_INT (- args_to_push)));
14707 RTX_FRAME_RELATED_P (insn) = 1;
14710 /* If this is an interrupt service routine, and the link register
14711 is going to be pushed, and we're not generating extra
14712 push of IP (needed when frame is needed and frame layout if apcs),
14713 subtracting four from LR now will mean that the function return
14714 can be done with a single instruction. */
14715 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14716 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14717 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14720 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14722 emit_set_insn (lr, plus_constant (lr, -4));
14725 if (live_regs_mask)
14727 saved_regs += bit_count (live_regs_mask) * 4;
14728 if (optimize_size && !frame_pointer_needed
14729 && saved_regs == offsets->saved_regs - offsets->saved_args)
14731 /* If no coprocessor registers are being pushed and we don't have
14732 to worry about a frame pointer then push extra registers to
14733 create the stack frame. This is done is a way that does not
14734 alter the frame layout, so is independent of the epilogue. */
14738 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14740 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14741 if (frame && n * 4 >= frame)
14744 live_regs_mask |= (1 << n) - 1;
14745 saved_regs += frame;
14748 insn = emit_multi_reg_push (live_regs_mask);
14749 RTX_FRAME_RELATED_P (insn) = 1;
14752 if (! IS_VOLATILE (func_type))
14753 saved_regs += arm_save_coproc_regs ();
14755 if (frame_pointer_needed && TARGET_ARM)
14757 /* Create the new frame pointer. */
14758 if (TARGET_APCS_FRAME)
14760 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14761 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14762 RTX_FRAME_RELATED_P (insn) = 1;
14764 if (IS_NESTED (func_type))
14766 /* Recover the static chain register. */
14767 if (!df_regs_ever_live_p (3)
14768 || saved_pretend_args)
14769 insn = gen_rtx_REG (SImode, 3);
14770 else /* if (crtl->args.pretend_args_size == 0) */
14772 insn = plus_constant (hard_frame_pointer_rtx, 4);
14773 insn = gen_frame_mem (SImode, insn);
14775 emit_set_insn (ip_rtx, insn);
14776 /* Add a USE to stop propagate_one_insn() from barfing. */
14777 emit_insn (gen_prologue_use (ip_rtx));
14782 insn = GEN_INT (saved_regs - 4);
14783 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14784 stack_pointer_rtx, insn));
14785 RTX_FRAME_RELATED_P (insn) = 1;
14789 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14791 /* This add can produce multiple insns for a large constant, so we
14792 need to get tricky. */
14793 rtx last = get_last_insn ();
14795 amount = GEN_INT (offsets->saved_args + saved_regs
14796 - offsets->outgoing_args);
14798 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14802 last = last ? NEXT_INSN (last) : get_insns ();
14803 RTX_FRAME_RELATED_P (last) = 1;
14805 while (last != insn);
14807 /* If the frame pointer is needed, emit a special barrier that
14808 will prevent the scheduler from moving stores to the frame
14809 before the stack adjustment. */
14810 if (frame_pointer_needed)
14811 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14812 hard_frame_pointer_rtx));
14816 if (frame_pointer_needed && TARGET_THUMB2)
14817 thumb_set_frame_pointer (offsets);
14819 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14821 unsigned long mask;
14823 mask = live_regs_mask;
14824 mask &= THUMB2_WORK_REGS;
14825 if (!IS_NESTED (func_type))
14826 mask |= (1 << IP_REGNUM);
14827 arm_load_pic_register (mask);
14830 /* If we are profiling, make sure no instructions are scheduled before
14831 the call to mcount. Similarly if the user has requested no
14832 scheduling in the prolog. Similarly if we want non-call exceptions
14833 using the EABI unwinder, to prevent faulting instructions from being
14834 swapped with a stack adjustment. */
14835 if (crtl->profile || !TARGET_SCHED_PROLOG
14836 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14837 emit_insn (gen_blockage ());
14839 /* If the link register is being kept alive, with the return address in it,
14840 then make sure that it does not get reused by the ce2 pass. */
14841 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14842 cfun->machine->lr_save_eliminated = 1;
14845 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14847 arm_print_condition (FILE *stream)
14849 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14851 /* Branch conversion is not implemented for Thumb-2. */
14854 output_operand_lossage ("predicated Thumb instruction");
14857 if (current_insn_predicate != NULL)
14859 output_operand_lossage
14860 ("predicated instruction in conditional sequence");
14864 fputs (arm_condition_codes[arm_current_cc], stream);
14866 else if (current_insn_predicate)
14868 enum arm_cond_code code;
14872 output_operand_lossage ("predicated Thumb instruction");
14876 code = get_arm_condition_code (current_insn_predicate);
14877 fputs (arm_condition_codes[code], stream);
14882 /* If CODE is 'd', then the X is a condition operand and the instruction
14883 should only be executed if the condition is true.
14884 if CODE is 'D', then the X is a condition operand and the instruction
14885 should only be executed if the condition is false: however, if the mode
14886 of the comparison is CCFPEmode, then always execute the instruction -- we
14887 do this because in these circumstances !GE does not necessarily imply LT;
14888 in these cases the instruction pattern will take care to make sure that
14889 an instruction containing %d will follow, thereby undoing the effects of
14890 doing this instruction unconditionally.
14891 If CODE is 'N' then X is a floating point operand that must be negated
14893 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14894 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14896 arm_print_operand (FILE *stream, rtx x, int code)
14901 fputs (ASM_COMMENT_START, stream);
14905 fputs (user_label_prefix, stream);
14909 fputs (REGISTER_PREFIX, stream);
14913 arm_print_condition (stream);
14917 /* Nothing in unified syntax, otherwise the current condition code. */
14918 if (!TARGET_UNIFIED_ASM)
14919 arm_print_condition (stream);
14923 /* The current condition code in unified syntax, otherwise nothing. */
14924 if (TARGET_UNIFIED_ASM)
14925 arm_print_condition (stream);
14929 /* The current condition code for a condition code setting instruction.
14930 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14931 if (TARGET_UNIFIED_ASM)
14933 fputc('s', stream);
14934 arm_print_condition (stream);
14938 arm_print_condition (stream);
14939 fputc('s', stream);
14944 /* If the instruction is conditionally executed then print
14945 the current condition code, otherwise print 's'. */
14946 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14947 if (current_insn_predicate)
14948 arm_print_condition (stream);
14950 fputc('s', stream);
14953 /* %# is a "break" sequence. It doesn't output anything, but is used to
14954 separate e.g. operand numbers from following text, if that text consists
14955 of further digits which we don't want to be part of the operand
14963 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14964 r = REAL_VALUE_NEGATE (r);
14965 fprintf (stream, "%s", fp_const_from_val (&r));
14969 /* An integer or symbol address without a preceding # sign. */
14971 switch (GET_CODE (x))
14974 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14978 output_addr_const (stream, x);
14982 gcc_unreachable ();
14987 if (GET_CODE (x) == CONST_INT)
14990 val = ARM_SIGN_EXTEND (~INTVAL (x));
14991 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14995 putc ('~', stream);
14996 output_addr_const (stream, x);
15001 /* The low 16 bits of an immediate constant. */
15002 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15006 fprintf (stream, "%s", arithmetic_instr (x, 1));
15009 /* Truncate Cirrus shift counts. */
15011 if (GET_CODE (x) == CONST_INT)
15013 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15016 arm_print_operand (stream, x, 0);
15020 fprintf (stream, "%s", arithmetic_instr (x, 0));
15028 if (!shift_operator (x, SImode))
15030 output_operand_lossage ("invalid shift operand");
15034 shift = shift_op (x, &val);
15038 fprintf (stream, ", %s ", shift);
15040 arm_print_operand (stream, XEXP (x, 1), 0);
15042 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15047 /* An explanation of the 'Q', 'R' and 'H' register operands:
15049 In a pair of registers containing a DI or DF value the 'Q'
15050 operand returns the register number of the register containing
15051 the least significant part of the value. The 'R' operand returns
15052 the register number of the register containing the most
15053 significant part of the value.
15055 The 'H' operand returns the higher of the two register numbers.
15056 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15057 same as the 'Q' operand, since the most significant part of the
15058 value is held in the lower number register. The reverse is true
15059 on systems where WORDS_BIG_ENDIAN is false.
15061 The purpose of these operands is to distinguish between cases
15062 where the endian-ness of the values is important (for example
15063 when they are added together), and cases where the endian-ness
15064 is irrelevant, but the order of register operations is important.
15065 For example when loading a value from memory into a register
15066 pair, the endian-ness does not matter. Provided that the value
15067 from the lower memory address is put into the lower numbered
15068 register, and the value from the higher address is put into the
15069 higher numbered register, the load will work regardless of whether
15070 the value being loaded is big-wordian or little-wordian. The
15071 order of the two register loads can matter however, if the address
15072 of the memory location is actually held in one of the registers
15073 being overwritten by the load. */
15075 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15077 output_operand_lossage ("invalid operand for code '%c'", code);
15081 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15085 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15087 output_operand_lossage ("invalid operand for code '%c'", code);
15091 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15095 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15097 output_operand_lossage ("invalid operand for code '%c'", code);
15101 asm_fprintf (stream, "%r", REGNO (x) + 1);
15105 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15107 output_operand_lossage ("invalid operand for code '%c'", code);
15111 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15115 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15117 output_operand_lossage ("invalid operand for code '%c'", code);
15121 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15125 asm_fprintf (stream, "%r",
15126 GET_CODE (XEXP (x, 0)) == REG
15127 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15131 asm_fprintf (stream, "{%r-%r}",
15133 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15136 /* Like 'M', but writing doubleword vector registers, for use by Neon
15140 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15141 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15143 asm_fprintf (stream, "{d%d}", regno);
15145 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15150 /* CONST_TRUE_RTX means always -- that's the default. */
15151 if (x == const_true_rtx)
15154 if (!COMPARISON_P (x))
15156 output_operand_lossage ("invalid operand for code '%c'", code);
15160 fputs (arm_condition_codes[get_arm_condition_code (x)],
15165 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15166 want to do that. */
15167 if (x == const_true_rtx)
15169 output_operand_lossage ("instruction never executed");
15172 if (!COMPARISON_P (x))
15174 output_operand_lossage ("invalid operand for code '%c'", code);
15178 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15179 (get_arm_condition_code (x))],
15183 /* Cirrus registers can be accessed in a variety of ways:
15184 single floating point (f)
15185 double floating point (d)
15187 64bit integer (dx). */
15188 case 'W': /* Cirrus register in F mode. */
15189 case 'X': /* Cirrus register in D mode. */
15190 case 'Y': /* Cirrus register in FX mode. */
15191 case 'Z': /* Cirrus register in DX mode. */
15192 gcc_assert (GET_CODE (x) == REG
15193 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15195 fprintf (stream, "mv%s%s",
15197 : code == 'X' ? "d"
15198 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15202 /* Print cirrus register in the mode specified by the register's mode. */
15205 int mode = GET_MODE (x);
15207 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15209 output_operand_lossage ("invalid operand for code '%c'", code);
15213 fprintf (stream, "mv%s%s",
15214 mode == DFmode ? "d"
15215 : mode == SImode ? "fx"
15216 : mode == DImode ? "dx"
15217 : "f", reg_names[REGNO (x)] + 2);
15223 if (GET_CODE (x) != REG
15224 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15225 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15226 /* Bad value for wCG register number. */
15228 output_operand_lossage ("invalid operand for code '%c'", code);
15233 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15236 /* Print an iWMMXt control register name. */
15238 if (GET_CODE (x) != CONST_INT
15240 || INTVAL (x) >= 16)
15241 /* Bad value for wC register number. */
15243 output_operand_lossage ("invalid operand for code '%c'", code);
15249 static const char * wc_reg_names [16] =
15251 "wCID", "wCon", "wCSSF", "wCASF",
15252 "wC4", "wC5", "wC6", "wC7",
15253 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15254 "wC12", "wC13", "wC14", "wC15"
15257 fprintf (stream, wc_reg_names [INTVAL (x)]);
15261 /* Print the high single-precision register of a VFP double-precision
15265 int mode = GET_MODE (x);
15268 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15270 output_operand_lossage ("invalid operand for code '%c'", code);
15275 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15277 output_operand_lossage ("invalid operand for code '%c'", code);
15281 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15285 /* Print a VFP/Neon double precision or quad precision register name. */
15289 int mode = GET_MODE (x);
15290 int is_quad = (code == 'q');
15293 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15295 output_operand_lossage ("invalid operand for code '%c'", code);
15299 if (GET_CODE (x) != REG
15300 || !IS_VFP_REGNUM (REGNO (x)))
15302 output_operand_lossage ("invalid operand for code '%c'", code);
15307 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15308 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15310 output_operand_lossage ("invalid operand for code '%c'", code);
15314 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15315 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15319 /* These two codes print the low/high doubleword register of a Neon quad
15320 register, respectively. For pair-structure types, can also print
15321 low/high quadword registers. */
15325 int mode = GET_MODE (x);
15328 if ((GET_MODE_SIZE (mode) != 16
15329 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15331 output_operand_lossage ("invalid operand for code '%c'", code);
15336 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15338 output_operand_lossage ("invalid operand for code '%c'", code);
15342 if (GET_MODE_SIZE (mode) == 16)
15343 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15344 + (code == 'f' ? 1 : 0));
15346 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15347 + (code == 'f' ? 1 : 0));
15351 /* Print a VFPv3 floating-point constant, represented as an integer
15355 int index = vfp3_const_double_index (x);
15356 gcc_assert (index != -1);
15357 fprintf (stream, "%d", index);
15361 /* Print bits representing opcode features for Neon.
15363 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15364 and polynomials as unsigned.
15366 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15368 Bit 2 is 1 for rounding functions, 0 otherwise. */
15370 /* Identify the type as 's', 'u', 'p' or 'f'. */
15373 HOST_WIDE_INT bits = INTVAL (x);
15374 fputc ("uspf"[bits & 3], stream);
15378 /* Likewise, but signed and unsigned integers are both 'i'. */
15381 HOST_WIDE_INT bits = INTVAL (x);
15382 fputc ("iipf"[bits & 3], stream);
15386 /* As for 'T', but emit 'u' instead of 'p'. */
15389 HOST_WIDE_INT bits = INTVAL (x);
15390 fputc ("usuf"[bits & 3], stream);
15394 /* Bit 2: rounding (vs none). */
15397 HOST_WIDE_INT bits = INTVAL (x);
15398 fputs ((bits & 4) != 0 ? "r" : "", stream);
15402 /* Memory operand for vld1/vst1 instruction. */
15406 bool postinc = FALSE;
15407 gcc_assert (GET_CODE (x) == MEM);
15408 addr = XEXP (x, 0);
15409 if (GET_CODE (addr) == POST_INC)
15412 addr = XEXP (addr, 0);
15414 asm_fprintf (stream, "[%r]", REGNO (addr));
15416 fputs("!", stream);
15420 /* Translate an S register number into a D register number and element index. */
15423 int mode = GET_MODE (x);
15426 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15428 output_operand_lossage ("invalid operand for code '%c'", code);
15433 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15435 output_operand_lossage ("invalid operand for code '%c'", code);
15439 regno = regno - FIRST_VFP_REGNUM;
15440 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15444 /* Register specifier for vld1.16/vst1.16. Translate the S register
15445 number into a D register number and element index. */
15448 int mode = GET_MODE (x);
15451 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15453 output_operand_lossage ("invalid operand for code '%c'", code);
15458 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15460 output_operand_lossage ("invalid operand for code '%c'", code);
15464 regno = regno - FIRST_VFP_REGNUM;
15465 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15472 output_operand_lossage ("missing operand");
15476 switch (GET_CODE (x))
15479 asm_fprintf (stream, "%r", REGNO (x));
15483 output_memory_reference_mode = GET_MODE (x);
15484 output_address (XEXP (x, 0));
15491 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15492 sizeof (fpstr), 0, 1);
15493 fprintf (stream, "#%s", fpstr);
15496 fprintf (stream, "#%s", fp_immediate_constant (x));
15500 gcc_assert (GET_CODE (x) != NEG);
15501 fputc ('#', stream);
15502 if (GET_CODE (x) == HIGH)
15504 fputs (":lower16:", stream);
15508 output_addr_const (stream, x);
15514 /* Target hook for assembling integer objects. The ARM version needs to
15515 handle word-sized values specially. */
15517 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15519 enum machine_mode mode;
15521 if (size == UNITS_PER_WORD && aligned_p)
15523 fputs ("\t.word\t", asm_out_file);
15524 output_addr_const (asm_out_file, x);
15526 /* Mark symbols as position independent. We only do this in the
15527 .text segment, not in the .data segment. */
15528 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15529 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15531 /* See legitimize_pic_address for an explanation of the
15532 TARGET_VXWORKS_RTP check. */
15533 if (TARGET_VXWORKS_RTP
15534 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15535 fputs ("(GOT)", asm_out_file);
15537 fputs ("(GOTOFF)", asm_out_file);
15539 fputc ('\n', asm_out_file);
15543 mode = GET_MODE (x);
15545 if (arm_vector_mode_supported_p (mode))
15549 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15551 units = CONST_VECTOR_NUNITS (x);
15552 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15554 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15555 for (i = 0; i < units; i++)
15557 rtx elt = CONST_VECTOR_ELT (x, i);
15559 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15562 for (i = 0; i < units; i++)
15564 rtx elt = CONST_VECTOR_ELT (x, i);
15565 REAL_VALUE_TYPE rval;
15567 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15570 (rval, GET_MODE_INNER (mode),
15571 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15577 return default_assemble_integer (x, size, aligned_p);
15581 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15585 if (!TARGET_AAPCS_BASED)
15588 default_named_section_asm_out_constructor
15589 : default_named_section_asm_out_destructor) (symbol, priority);
15593 /* Put these in the .init_array section, using a special relocation. */
15594 if (priority != DEFAULT_INIT_PRIORITY)
15597 sprintf (buf, "%s.%.5u",
15598 is_ctor ? ".init_array" : ".fini_array",
15600 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15607 switch_to_section (s);
15608 assemble_align (POINTER_SIZE);
15609 fputs ("\t.word\t", asm_out_file);
15610 output_addr_const (asm_out_file, symbol);
15611 fputs ("(target1)\n", asm_out_file);
15614 /* Add a function to the list of static constructors. */
15617 arm_elf_asm_constructor (rtx symbol, int priority)
15619 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15622 /* Add a function to the list of static destructors. */
15625 arm_elf_asm_destructor (rtx symbol, int priority)
15627 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15630 /* A finite state machine takes care of noticing whether or not instructions
15631 can be conditionally executed, and thus decrease execution time and code
15632 size by deleting branch instructions. The fsm is controlled by
15633 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15635 /* The state of the fsm controlling condition codes are:
15636 0: normal, do nothing special
15637 1: make ASM_OUTPUT_OPCODE not output this instruction
15638 2: make ASM_OUTPUT_OPCODE not output this instruction
15639 3: make instructions conditional
15640 4: make instructions conditional
15642 State transitions (state->state by whom under condition):
15643 0 -> 1 final_prescan_insn if the `target' is a label
15644 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15645 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15646 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15647 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15648 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15649 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15650 (the target insn is arm_target_insn).
15652 If the jump clobbers the conditions then we use states 2 and 4.
15654 A similar thing can be done with conditional return insns.
15656 XXX In case the `target' is an unconditional branch, this conditionalising
15657 of the instructions always reduces code size, but not always execution
15658 time. But then, I want to reduce the code size to somewhere near what
15659 /bin/cc produces. */
15661 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15662 instructions. When a COND_EXEC instruction is seen the subsequent
15663 instructions are scanned so that multiple conditional instructions can be
15664 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15665 specify the length and true/false mask for the IT block. These will be
15666 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15668 /* Returns the index of the ARM condition code string in
15669 `arm_condition_codes'. COMPARISON should be an rtx like
15670 `(eq (...) (...))'. */
15671 static enum arm_cond_code
15672 get_arm_condition_code (rtx comparison)
15674 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15675 enum arm_cond_code code;
15676 enum rtx_code comp_code = GET_CODE (comparison);
15678 if (GET_MODE_CLASS (mode) != MODE_CC)
15679 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15680 XEXP (comparison, 1));
15684 case CC_DNEmode: code = ARM_NE; goto dominance;
15685 case CC_DEQmode: code = ARM_EQ; goto dominance;
15686 case CC_DGEmode: code = ARM_GE; goto dominance;
15687 case CC_DGTmode: code = ARM_GT; goto dominance;
15688 case CC_DLEmode: code = ARM_LE; goto dominance;
15689 case CC_DLTmode: code = ARM_LT; goto dominance;
15690 case CC_DGEUmode: code = ARM_CS; goto dominance;
15691 case CC_DGTUmode: code = ARM_HI; goto dominance;
15692 case CC_DLEUmode: code = ARM_LS; goto dominance;
15693 case CC_DLTUmode: code = ARM_CC;
15696 gcc_assert (comp_code == EQ || comp_code == NE);
15698 if (comp_code == EQ)
15699 return ARM_INVERSE_CONDITION_CODE (code);
15705 case NE: return ARM_NE;
15706 case EQ: return ARM_EQ;
15707 case GE: return ARM_PL;
15708 case LT: return ARM_MI;
15709 default: gcc_unreachable ();
15715 case NE: return ARM_NE;
15716 case EQ: return ARM_EQ;
15717 default: gcc_unreachable ();
15723 case NE: return ARM_MI;
15724 case EQ: return ARM_PL;
15725 default: gcc_unreachable ();
15730 /* These encodings assume that AC=1 in the FPA system control
15731 byte. This allows us to handle all cases except UNEQ and
15735 case GE: return ARM_GE;
15736 case GT: return ARM_GT;
15737 case LE: return ARM_LS;
15738 case LT: return ARM_MI;
15739 case NE: return ARM_NE;
15740 case EQ: return ARM_EQ;
15741 case ORDERED: return ARM_VC;
15742 case UNORDERED: return ARM_VS;
15743 case UNLT: return ARM_LT;
15744 case UNLE: return ARM_LE;
15745 case UNGT: return ARM_HI;
15746 case UNGE: return ARM_PL;
15747 /* UNEQ and LTGT do not have a representation. */
15748 case UNEQ: /* Fall through. */
15749 case LTGT: /* Fall through. */
15750 default: gcc_unreachable ();
15756 case NE: return ARM_NE;
15757 case EQ: return ARM_EQ;
15758 case GE: return ARM_LE;
15759 case GT: return ARM_LT;
15760 case LE: return ARM_GE;
15761 case LT: return ARM_GT;
15762 case GEU: return ARM_LS;
15763 case GTU: return ARM_CC;
15764 case LEU: return ARM_CS;
15765 case LTU: return ARM_HI;
15766 default: gcc_unreachable ();
15772 case LTU: return ARM_CS;
15773 case GEU: return ARM_CC;
15774 default: gcc_unreachable ();
15780 case NE: return ARM_NE;
15781 case EQ: return ARM_EQ;
15782 case GE: return ARM_GE;
15783 case GT: return ARM_GT;
15784 case LE: return ARM_LE;
15785 case LT: return ARM_LT;
15786 case GEU: return ARM_CS;
15787 case GTU: return ARM_HI;
15788 case LEU: return ARM_LS;
15789 case LTU: return ARM_CC;
15790 default: gcc_unreachable ();
15793 default: gcc_unreachable ();
15797 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15800 thumb2_final_prescan_insn (rtx insn)
15802 rtx first_insn = insn;
15803 rtx body = PATTERN (insn);
15805 enum arm_cond_code code;
15809 /* Remove the previous insn from the count of insns to be output. */
15810 if (arm_condexec_count)
15811 arm_condexec_count--;
15813 /* Nothing to do if we are already inside a conditional block. */
15814 if (arm_condexec_count)
15817 if (GET_CODE (body) != COND_EXEC)
15820 /* Conditional jumps are implemented directly. */
15821 if (GET_CODE (insn) == JUMP_INSN)
15824 predicate = COND_EXEC_TEST (body);
15825 arm_current_cc = get_arm_condition_code (predicate);
15827 n = get_attr_ce_count (insn);
15828 arm_condexec_count = 1;
15829 arm_condexec_mask = (1 << n) - 1;
15830 arm_condexec_masklen = n;
15831 /* See if subsequent instructions can be combined into the same block. */
15834 insn = next_nonnote_insn (insn);
15836 /* Jumping into the middle of an IT block is illegal, so a label or
15837 barrier terminates the block. */
15838 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15841 body = PATTERN (insn);
15842 /* USE and CLOBBER aren't really insns, so just skip them. */
15843 if (GET_CODE (body) == USE
15844 || GET_CODE (body) == CLOBBER)
15847 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15848 if (GET_CODE (body) != COND_EXEC)
15850 /* Allow up to 4 conditionally executed instructions in a block. */
15851 n = get_attr_ce_count (insn);
15852 if (arm_condexec_masklen + n > 4)
15855 predicate = COND_EXEC_TEST (body);
15856 code = get_arm_condition_code (predicate);
15857 mask = (1 << n) - 1;
15858 if (arm_current_cc == code)
15859 arm_condexec_mask |= (mask << arm_condexec_masklen);
15860 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15863 arm_condexec_count++;
15864 arm_condexec_masklen += n;
15866 /* A jump must be the last instruction in a conditional block. */
15867 if (GET_CODE(insn) == JUMP_INSN)
15870 /* Restore recog_data (getting the attributes of other insns can
15871 destroy this array, but final.c assumes that it remains intact
15872 across this call). */
15873 extract_constrain_insn_cached (first_insn);
15877 arm_final_prescan_insn (rtx insn)
15879 /* BODY will hold the body of INSN. */
15880 rtx body = PATTERN (insn);
15882 /* This will be 1 if trying to repeat the trick, and things need to be
15883 reversed if it appears to fail. */
15886 /* If we start with a return insn, we only succeed if we find another one. */
15887 int seeking_return = 0;
15889 /* START_INSN will hold the insn from where we start looking. This is the
15890 first insn after the following code_label if REVERSE is true. */
15891 rtx start_insn = insn;
15893 /* If in state 4, check if the target branch is reached, in order to
15894 change back to state 0. */
15895 if (arm_ccfsm_state == 4)
15897 if (insn == arm_target_insn)
15899 arm_target_insn = NULL;
15900 arm_ccfsm_state = 0;
15905 /* If in state 3, it is possible to repeat the trick, if this insn is an
15906 unconditional branch to a label, and immediately following this branch
15907 is the previous target label which is only used once, and the label this
15908 branch jumps to is not too far off. */
15909 if (arm_ccfsm_state == 3)
15911 if (simplejump_p (insn))
15913 start_insn = next_nonnote_insn (start_insn);
15914 if (GET_CODE (start_insn) == BARRIER)
15916 /* XXX Isn't this always a barrier? */
15917 start_insn = next_nonnote_insn (start_insn);
15919 if (GET_CODE (start_insn) == CODE_LABEL
15920 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15921 && LABEL_NUSES (start_insn) == 1)
15926 else if (GET_CODE (body) == RETURN)
15928 start_insn = next_nonnote_insn (start_insn);
15929 if (GET_CODE (start_insn) == BARRIER)
15930 start_insn = next_nonnote_insn (start_insn);
15931 if (GET_CODE (start_insn) == CODE_LABEL
15932 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15933 && LABEL_NUSES (start_insn) == 1)
15936 seeking_return = 1;
15945 gcc_assert (!arm_ccfsm_state || reverse);
15946 if (GET_CODE (insn) != JUMP_INSN)
15949 /* This jump might be paralleled with a clobber of the condition codes
15950 the jump should always come first */
15951 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15952 body = XVECEXP (body, 0, 0);
15955 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15956 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15959 int fail = FALSE, succeed = FALSE;
15960 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15961 int then_not_else = TRUE;
15962 rtx this_insn = start_insn, label = 0;
15964 /* Register the insn jumped to. */
15967 if (!seeking_return)
15968 label = XEXP (SET_SRC (body), 0);
15970 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15971 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15972 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15974 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15975 then_not_else = FALSE;
15977 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15978 seeking_return = 1;
15979 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15981 seeking_return = 1;
15982 then_not_else = FALSE;
15985 gcc_unreachable ();
15987 /* See how many insns this branch skips, and what kind of insns. If all
15988 insns are okay, and the label or unconditional branch to the same
15989 label is not too far away, succeed. */
15990 for (insns_skipped = 0;
15991 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15995 this_insn = next_nonnote_insn (this_insn);
15999 switch (GET_CODE (this_insn))
16002 /* Succeed if it is the target label, otherwise fail since
16003 control falls in from somewhere else. */
16004 if (this_insn == label)
16006 arm_ccfsm_state = 1;
16014 /* Succeed if the following insn is the target label.
16016 If return insns are used then the last insn in a function
16017 will be a barrier. */
16018 this_insn = next_nonnote_insn (this_insn);
16019 if (this_insn && this_insn == label)
16021 arm_ccfsm_state = 1;
16029 /* The AAPCS says that conditional calls should not be
16030 used since they make interworking inefficient (the
16031 linker can't transform BL<cond> into BLX). That's
16032 only a problem if the machine has BLX. */
16039 /* Succeed if the following insn is the target label, or
16040 if the following two insns are a barrier and the
16042 this_insn = next_nonnote_insn (this_insn);
16043 if (this_insn && GET_CODE (this_insn) == BARRIER)
16044 this_insn = next_nonnote_insn (this_insn);
16046 if (this_insn && this_insn == label
16047 && insns_skipped < max_insns_skipped)
16049 arm_ccfsm_state = 1;
16057 /* If this is an unconditional branch to the same label, succeed.
16058 If it is to another label, do nothing. If it is conditional,
16060 /* XXX Probably, the tests for SET and the PC are
16063 scanbody = PATTERN (this_insn);
16064 if (GET_CODE (scanbody) == SET
16065 && GET_CODE (SET_DEST (scanbody)) == PC)
16067 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16068 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16070 arm_ccfsm_state = 2;
16073 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16076 /* Fail if a conditional return is undesirable (e.g. on a
16077 StrongARM), but still allow this if optimizing for size. */
16078 else if (GET_CODE (scanbody) == RETURN
16079 && !use_return_insn (TRUE, NULL)
16082 else if (GET_CODE (scanbody) == RETURN
16085 arm_ccfsm_state = 2;
16088 else if (GET_CODE (scanbody) == PARALLEL)
16090 switch (get_attr_conds (this_insn))
16100 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16105 /* Instructions using or affecting the condition codes make it
16107 scanbody = PATTERN (this_insn);
16108 if (!(GET_CODE (scanbody) == SET
16109 || GET_CODE (scanbody) == PARALLEL)
16110 || get_attr_conds (this_insn) != CONDS_NOCOND)
16113 /* A conditional cirrus instruction must be followed by
16114 a non Cirrus instruction. However, since we
16115 conditionalize instructions in this function and by
16116 the time we get here we can't add instructions
16117 (nops), because shorten_branches() has already been
16118 called, we will disable conditionalizing Cirrus
16119 instructions to be safe. */
16120 if (GET_CODE (scanbody) != USE
16121 && GET_CODE (scanbody) != CLOBBER
16122 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16132 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16133 arm_target_label = CODE_LABEL_NUMBER (label);
16136 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16138 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16140 this_insn = next_nonnote_insn (this_insn);
16141 gcc_assert (!this_insn
16142 || (GET_CODE (this_insn) != BARRIER
16143 && GET_CODE (this_insn) != CODE_LABEL));
16147 /* Oh, dear! we ran off the end.. give up. */
16148 extract_constrain_insn_cached (insn);
16149 arm_ccfsm_state = 0;
16150 arm_target_insn = NULL;
16153 arm_target_insn = this_insn;
16156 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16159 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16161 if (reverse || then_not_else)
16162 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16165 /* Restore recog_data (getting the attributes of other insns can
16166 destroy this array, but final.c assumes that it remains intact
16167 across this call. */
16168 extract_constrain_insn_cached (insn);
16172 /* Output IT instructions. */
16174 thumb2_asm_output_opcode (FILE * stream)
16179 if (arm_condexec_mask)
16181 for (n = 0; n < arm_condexec_masklen; n++)
16182 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16184 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16185 arm_condition_codes[arm_current_cc]);
16186 arm_condexec_mask = 0;
16190 /* Returns true if REGNO is a valid register
16191 for holding a quantity of type MODE. */
16193 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16195 if (GET_MODE_CLASS (mode) == MODE_CC)
16196 return (regno == CC_REGNUM
16197 || (TARGET_HARD_FLOAT && TARGET_VFP
16198 && regno == VFPCC_REGNUM));
16201 /* For the Thumb we only allow values bigger than SImode in
16202 registers 0 - 6, so that there is always a second low
16203 register available to hold the upper part of the value.
16204 We probably we ought to ensure that the register is the
16205 start of an even numbered register pair. */
16206 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16208 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16209 && IS_CIRRUS_REGNUM (regno))
16210 /* We have outlawed SI values in Cirrus registers because they
16211 reside in the lower 32 bits, but SF values reside in the
16212 upper 32 bits. This causes gcc all sorts of grief. We can't
16213 even split the registers into pairs because Cirrus SI values
16214 get sign extended to 64bits-- aldyh. */
16215 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16217 if (TARGET_HARD_FLOAT && TARGET_VFP
16218 && IS_VFP_REGNUM (regno))
16220 if (mode == SFmode || mode == SImode)
16221 return VFP_REGNO_OK_FOR_SINGLE (regno);
16223 if (mode == DFmode)
16224 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16226 /* VFP registers can hold HFmode values, but there is no point in
16227 putting them there unless we have hardware conversion insns. */
16228 if (mode == HFmode)
16229 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16232 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16233 || (VALID_NEON_QREG_MODE (mode)
16234 && NEON_REGNO_OK_FOR_QUAD (regno))
16235 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16236 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16237 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16238 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16239 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16244 if (TARGET_REALLY_IWMMXT)
16246 if (IS_IWMMXT_GR_REGNUM (regno))
16247 return mode == SImode;
16249 if (IS_IWMMXT_REGNUM (regno))
16250 return VALID_IWMMXT_REG_MODE (mode);
16253 /* We allow almost any value to be stored in the general registers.
16254 Restrict doubleword quantities to even register pairs so that we can
16255 use ldrd. Do not allow very large Neon structure opaque modes in
16256 general registers; they would use too many. */
16257 if (regno <= LAST_ARM_REGNUM)
16258 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16259 && ARM_NUM_REGS (mode) <= 4;
16261 if (regno == FRAME_POINTER_REGNUM
16262 || regno == ARG_POINTER_REGNUM)
16263 /* We only allow integers in the fake hard registers. */
16264 return GET_MODE_CLASS (mode) == MODE_INT;
16266 /* The only registers left are the FPA registers
16267 which we only allow to hold FP values. */
16268 return (TARGET_HARD_FLOAT && TARGET_FPA
16269 && GET_MODE_CLASS (mode) == MODE_FLOAT
16270 && regno >= FIRST_FPA_REGNUM
16271 && regno <= LAST_FPA_REGNUM);
16274 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16275 not used in arm mode. */
16278 arm_regno_class (int regno)
16282 if (regno == STACK_POINTER_REGNUM)
16284 if (regno == CC_REGNUM)
16291 if (TARGET_THUMB2 && regno < 8)
16294 if ( regno <= LAST_ARM_REGNUM
16295 || regno == FRAME_POINTER_REGNUM
16296 || regno == ARG_POINTER_REGNUM)
16297 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16299 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16300 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16302 if (IS_CIRRUS_REGNUM (regno))
16303 return CIRRUS_REGS;
16305 if (IS_VFP_REGNUM (regno))
16307 if (regno <= D7_VFP_REGNUM)
16308 return VFP_D0_D7_REGS;
16309 else if (regno <= LAST_LO_VFP_REGNUM)
16310 return VFP_LO_REGS;
16312 return VFP_HI_REGS;
16315 if (IS_IWMMXT_REGNUM (regno))
16316 return IWMMXT_REGS;
16318 if (IS_IWMMXT_GR_REGNUM (regno))
16319 return IWMMXT_GR_REGS;
16324 /* Handle a special case when computing the offset
16325 of an argument from the frame pointer. */
16327 arm_debugger_arg_offset (int value, rtx addr)
16331 /* We are only interested if dbxout_parms() failed to compute the offset. */
16335 /* We can only cope with the case where the address is held in a register. */
16336 if (GET_CODE (addr) != REG)
16339 /* If we are using the frame pointer to point at the argument, then
16340 an offset of 0 is correct. */
16341 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16344 /* If we are using the stack pointer to point at the
16345 argument, then an offset of 0 is correct. */
16346 /* ??? Check this is consistent with thumb2 frame layout. */
16347 if ((TARGET_THUMB || !frame_pointer_needed)
16348 && REGNO (addr) == SP_REGNUM)
16351 /* Oh dear. The argument is pointed to by a register rather
16352 than being held in a register, or being stored at a known
16353 offset from the frame pointer. Since GDB only understands
16354 those two kinds of argument we must translate the address
16355 held in the register into an offset from the frame pointer.
16356 We do this by searching through the insns for the function
16357 looking to see where this register gets its value. If the
16358 register is initialized from the frame pointer plus an offset
16359 then we are in luck and we can continue, otherwise we give up.
16361 This code is exercised by producing debugging information
16362 for a function with arguments like this:
16364 double func (double a, double b, int c, double d) {return d;}
16366 Without this code the stab for parameter 'd' will be set to
16367 an offset of 0 from the frame pointer, rather than 8. */
16369 /* The if() statement says:
16371 If the insn is a normal instruction
16372 and if the insn is setting the value in a register
16373 and if the register being set is the register holding the address of the argument
16374 and if the address is computing by an addition
16375 that involves adding to a register
16376 which is the frame pointer
16381 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16383 if ( GET_CODE (insn) == INSN
16384 && GET_CODE (PATTERN (insn)) == SET
16385 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16386 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16387 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16388 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16389 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16392 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16401 warning (0, "unable to compute real location of stacked parameter");
16402 value = 8; /* XXX magic hack */
16408 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16411 if ((MASK) & insn_flags) \
16412 add_builtin_function ((NAME), (TYPE), (CODE), \
16413 BUILT_IN_MD, NULL, NULL_TREE); \
16417 struct builtin_description
16419 const unsigned int mask;
16420 const enum insn_code icode;
16421 const char * const name;
16422 const enum arm_builtins code;
16423 const enum rtx_code comparison;
16424 const unsigned int flag;
16427 static const struct builtin_description bdesc_2arg[] =
16429 #define IWMMXT_BUILTIN(code, string, builtin) \
16430 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16431 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16433 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16434 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16435 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16436 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16437 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16438 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16439 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16440 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16441 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16442 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16443 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16444 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16445 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16446 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16447 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16448 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16449 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16450 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16451 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16452 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16453 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16454 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16455 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16456 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16457 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16458 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16459 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16460 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16461 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16462 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16463 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16464 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16465 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16466 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16467 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16468 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16469 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16470 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16471 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16472 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16473 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16474 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16475 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16476 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16477 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16478 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16479 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16480 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16481 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16482 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16483 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16484 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16485 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16486 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16487 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16488 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16489 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16490 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16492 #define IWMMXT_BUILTIN2(code, builtin) \
16493 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16495 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16496 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16497 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16498 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16499 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16500 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16501 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16502 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16503 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16504 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16505 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16506 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16507 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16508 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16509 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16510 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16511 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16512 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16513 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16514 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16515 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16516 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16517 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16518 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16519 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16520 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16521 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16522 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16523 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16524 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16525 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16526 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16529 static const struct builtin_description bdesc_1arg[] =
16531 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16532 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16533 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16534 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16535 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16536 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16537 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16538 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16539 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16540 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16541 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16542 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16543 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16544 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16545 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16546 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16547 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16548 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16551 /* Set up all the iWMMXt builtins. This is
16552 not called if TARGET_IWMMXT is zero. */
16555 arm_init_iwmmxt_builtins (void)
16557 const struct builtin_description * d;
16559 tree endlink = void_list_node;
16561 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16562 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16563 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16566 = build_function_type (integer_type_node,
16567 tree_cons (NULL_TREE, integer_type_node, endlink));
16568 tree v8qi_ftype_v8qi_v8qi_int
16569 = build_function_type (V8QI_type_node,
16570 tree_cons (NULL_TREE, V8QI_type_node,
16571 tree_cons (NULL_TREE, V8QI_type_node,
16572 tree_cons (NULL_TREE,
16575 tree v4hi_ftype_v4hi_int
16576 = build_function_type (V4HI_type_node,
16577 tree_cons (NULL_TREE, V4HI_type_node,
16578 tree_cons (NULL_TREE, integer_type_node,
16580 tree v2si_ftype_v2si_int
16581 = build_function_type (V2SI_type_node,
16582 tree_cons (NULL_TREE, V2SI_type_node,
16583 tree_cons (NULL_TREE, integer_type_node,
16585 tree v2si_ftype_di_di
16586 = build_function_type (V2SI_type_node,
16587 tree_cons (NULL_TREE, long_long_integer_type_node,
16588 tree_cons (NULL_TREE, long_long_integer_type_node,
16590 tree di_ftype_di_int
16591 = build_function_type (long_long_integer_type_node,
16592 tree_cons (NULL_TREE, long_long_integer_type_node,
16593 tree_cons (NULL_TREE, integer_type_node,
16595 tree di_ftype_di_int_int
16596 = build_function_type (long_long_integer_type_node,
16597 tree_cons (NULL_TREE, long_long_integer_type_node,
16598 tree_cons (NULL_TREE, integer_type_node,
16599 tree_cons (NULL_TREE,
16602 tree int_ftype_v8qi
16603 = build_function_type (integer_type_node,
16604 tree_cons (NULL_TREE, V8QI_type_node,
16606 tree int_ftype_v4hi
16607 = build_function_type (integer_type_node,
16608 tree_cons (NULL_TREE, V4HI_type_node,
16610 tree int_ftype_v2si
16611 = build_function_type (integer_type_node,
16612 tree_cons (NULL_TREE, V2SI_type_node,
16614 tree int_ftype_v8qi_int
16615 = build_function_type (integer_type_node,
16616 tree_cons (NULL_TREE, V8QI_type_node,
16617 tree_cons (NULL_TREE, integer_type_node,
16619 tree int_ftype_v4hi_int
16620 = build_function_type (integer_type_node,
16621 tree_cons (NULL_TREE, V4HI_type_node,
16622 tree_cons (NULL_TREE, integer_type_node,
16624 tree int_ftype_v2si_int
16625 = build_function_type (integer_type_node,
16626 tree_cons (NULL_TREE, V2SI_type_node,
16627 tree_cons (NULL_TREE, integer_type_node,
16629 tree v8qi_ftype_v8qi_int_int
16630 = build_function_type (V8QI_type_node,
16631 tree_cons (NULL_TREE, V8QI_type_node,
16632 tree_cons (NULL_TREE, integer_type_node,
16633 tree_cons (NULL_TREE,
16636 tree v4hi_ftype_v4hi_int_int
16637 = build_function_type (V4HI_type_node,
16638 tree_cons (NULL_TREE, V4HI_type_node,
16639 tree_cons (NULL_TREE, integer_type_node,
16640 tree_cons (NULL_TREE,
16643 tree v2si_ftype_v2si_int_int
16644 = build_function_type (V2SI_type_node,
16645 tree_cons (NULL_TREE, V2SI_type_node,
16646 tree_cons (NULL_TREE, integer_type_node,
16647 tree_cons (NULL_TREE,
16650 /* Miscellaneous. */
16651 tree v8qi_ftype_v4hi_v4hi
16652 = build_function_type (V8QI_type_node,
16653 tree_cons (NULL_TREE, V4HI_type_node,
16654 tree_cons (NULL_TREE, V4HI_type_node,
16656 tree v4hi_ftype_v2si_v2si
16657 = build_function_type (V4HI_type_node,
16658 tree_cons (NULL_TREE, V2SI_type_node,
16659 tree_cons (NULL_TREE, V2SI_type_node,
16661 tree v2si_ftype_v4hi_v4hi
16662 = build_function_type (V2SI_type_node,
16663 tree_cons (NULL_TREE, V4HI_type_node,
16664 tree_cons (NULL_TREE, V4HI_type_node,
16666 tree v2si_ftype_v8qi_v8qi
16667 = build_function_type (V2SI_type_node,
16668 tree_cons (NULL_TREE, V8QI_type_node,
16669 tree_cons (NULL_TREE, V8QI_type_node,
16671 tree v4hi_ftype_v4hi_di
16672 = build_function_type (V4HI_type_node,
16673 tree_cons (NULL_TREE, V4HI_type_node,
16674 tree_cons (NULL_TREE,
16675 long_long_integer_type_node,
16677 tree v2si_ftype_v2si_di
16678 = build_function_type (V2SI_type_node,
16679 tree_cons (NULL_TREE, V2SI_type_node,
16680 tree_cons (NULL_TREE,
16681 long_long_integer_type_node,
16683 tree void_ftype_int_int
16684 = build_function_type (void_type_node,
16685 tree_cons (NULL_TREE, integer_type_node,
16686 tree_cons (NULL_TREE, integer_type_node,
16689 = build_function_type (long_long_unsigned_type_node, endlink);
16691 = build_function_type (long_long_integer_type_node,
16692 tree_cons (NULL_TREE, V8QI_type_node,
16695 = build_function_type (long_long_integer_type_node,
16696 tree_cons (NULL_TREE, V4HI_type_node,
16699 = build_function_type (long_long_integer_type_node,
16700 tree_cons (NULL_TREE, V2SI_type_node,
16702 tree v2si_ftype_v4hi
16703 = build_function_type (V2SI_type_node,
16704 tree_cons (NULL_TREE, V4HI_type_node,
16706 tree v4hi_ftype_v8qi
16707 = build_function_type (V4HI_type_node,
16708 tree_cons (NULL_TREE, V8QI_type_node,
16711 tree di_ftype_di_v4hi_v4hi
16712 = build_function_type (long_long_unsigned_type_node,
16713 tree_cons (NULL_TREE,
16714 long_long_unsigned_type_node,
16715 tree_cons (NULL_TREE, V4HI_type_node,
16716 tree_cons (NULL_TREE,
16720 tree di_ftype_v4hi_v4hi
16721 = build_function_type (long_long_unsigned_type_node,
16722 tree_cons (NULL_TREE, V4HI_type_node,
16723 tree_cons (NULL_TREE, V4HI_type_node,
16726 /* Normal vector binops. */
16727 tree v8qi_ftype_v8qi_v8qi
16728 = build_function_type (V8QI_type_node,
16729 tree_cons (NULL_TREE, V8QI_type_node,
16730 tree_cons (NULL_TREE, V8QI_type_node,
16732 tree v4hi_ftype_v4hi_v4hi
16733 = build_function_type (V4HI_type_node,
16734 tree_cons (NULL_TREE, V4HI_type_node,
16735 tree_cons (NULL_TREE, V4HI_type_node,
16737 tree v2si_ftype_v2si_v2si
16738 = build_function_type (V2SI_type_node,
16739 tree_cons (NULL_TREE, V2SI_type_node,
16740 tree_cons (NULL_TREE, V2SI_type_node,
16742 tree di_ftype_di_di
16743 = build_function_type (long_long_unsigned_type_node,
16744 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16745 tree_cons (NULL_TREE,
16746 long_long_unsigned_type_node,
16749 /* Add all builtins that are more or less simple operations on two
16751 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16753 /* Use one of the operands; the target can have a different mode for
16754 mask-generating compares. */
16755 enum machine_mode mode;
16761 mode = insn_data[d->icode].operand[1].mode;
16766 type = v8qi_ftype_v8qi_v8qi;
16769 type = v4hi_ftype_v4hi_v4hi;
16772 type = v2si_ftype_v2si_v2si;
16775 type = di_ftype_di_di;
16779 gcc_unreachable ();
16782 def_mbuiltin (d->mask, d->name, type, d->code);
16785 /* Add the remaining MMX insns with somewhat more complicated types. */
16786 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16787 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16788 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16790 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16791 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16792 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16793 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16794 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16795 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16797 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16798 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16799 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16800 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16801 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16802 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16805 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16808 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16814 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16818 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16820 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16821 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16825 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16826 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16828 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16832 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16835 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16836 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16837 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16839 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16840 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16843 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16844 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16846 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16848 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16851 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16852 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16853 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16854 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16855 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16856 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16857 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16858 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16859 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16860 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16861 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16863 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16864 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16865 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16866 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16868 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16869 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16870 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16871 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16872 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16873 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16874 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16878 arm_init_tls_builtins (void)
16882 ftype = build_function_type (ptr_type_node, void_list_node);
16883 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16884 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16886 TREE_NOTHROW (decl) = 1;
16887 TREE_READONLY (decl) = 1;
16890 enum neon_builtin_type_bits {
16906 #define v8qi_UP T_V8QI
16907 #define v4hi_UP T_V4HI
16908 #define v2si_UP T_V2SI
16909 #define v2sf_UP T_V2SF
16911 #define v16qi_UP T_V16QI
16912 #define v8hi_UP T_V8HI
16913 #define v4si_UP T_V4SI
16914 #define v4sf_UP T_V4SF
16915 #define v2di_UP T_V2DI
16920 #define UP(X) X##_UP
16955 NEON_LOADSTRUCTLANE,
16957 NEON_STORESTRUCTLANE,
16966 const neon_itype itype;
16968 const enum insn_code codes[T_MAX];
16969 const unsigned int num_vars;
16970 unsigned int base_fcode;
16971 } neon_builtin_datum;
16973 #define CF(N,X) CODE_FOR_neon_##N##X
16975 #define VAR1(T, N, A) \
16976 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16977 #define VAR2(T, N, A, B) \
16978 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16979 #define VAR3(T, N, A, B, C) \
16980 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16981 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16982 #define VAR4(T, N, A, B, C, D) \
16983 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16984 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16985 #define VAR5(T, N, A, B, C, D, E) \
16986 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16987 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16988 #define VAR6(T, N, A, B, C, D, E, F) \
16989 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16990 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16991 #define VAR7(T, N, A, B, C, D, E, F, G) \
16992 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16993 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16995 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16996 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16998 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16999 CF (N, G), CF (N, H) }, 8, 0
17000 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17001 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17002 | UP (H) | UP (I), \
17003 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17004 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17005 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17006 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17007 | UP (H) | UP (I) | UP (J), \
17008 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17009 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17011 /* The mode entries in the following table correspond to the "key" type of the
17012 instruction variant, i.e. equivalent to that which would be specified after
17013 the assembler mnemonic, which usually refers to the last vector operand.
17014 (Signed/unsigned/polynomial types are not differentiated between though, and
17015 are all mapped onto the same mode for a given element size.) The modes
17016 listed per instruction should be the same as those defined for that
17017 instruction's pattern in neon.md.
17018 WARNING: Variants should be listed in the same increasing order as
17019 neon_builtin_type_bits. */
17021 static neon_builtin_datum neon_builtin_data[] =
17023 { VAR10 (BINOP, vadd,
17024 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17025 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17026 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17027 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17028 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17029 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17030 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17031 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17032 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17033 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17034 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17035 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17036 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17037 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17038 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17039 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17040 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17041 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17042 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17043 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17044 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17045 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17046 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17047 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17048 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17049 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17050 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17051 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17052 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17053 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17054 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17055 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17056 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17057 { VAR10 (BINOP, vsub,
17058 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17059 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17060 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17061 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17062 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17063 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17064 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17065 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17066 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17067 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17068 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17069 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17070 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17071 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17072 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17073 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17074 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17075 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17076 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17077 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17078 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17079 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17080 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17081 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17082 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17083 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17084 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17085 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17086 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17087 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17088 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17089 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17090 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17091 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17092 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17093 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17094 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17095 /* FIXME: vget_lane supports more variants than this! */
17096 { VAR10 (GETLANE, vget_lane,
17097 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17098 { VAR10 (SETLANE, vset_lane,
17099 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17100 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17101 { VAR10 (DUP, vdup_n,
17102 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17103 { VAR10 (DUPLANE, vdup_lane,
17104 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17105 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17106 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17107 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17108 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17109 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17110 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17111 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17112 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17113 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17114 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17115 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17116 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17117 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17118 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17119 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17120 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17121 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17122 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17123 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17124 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17125 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17126 { VAR10 (BINOP, vext,
17127 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17128 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17129 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17130 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17131 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17132 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17133 { VAR10 (SELECT, vbsl,
17134 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17135 { VAR1 (VTBL, vtbl1, v8qi) },
17136 { VAR1 (VTBL, vtbl2, v8qi) },
17137 { VAR1 (VTBL, vtbl3, v8qi) },
17138 { VAR1 (VTBL, vtbl4, v8qi) },
17139 { VAR1 (VTBX, vtbx1, v8qi) },
17140 { VAR1 (VTBX, vtbx2, v8qi) },
17141 { VAR1 (VTBX, vtbx3, v8qi) },
17142 { VAR1 (VTBX, vtbx4, v8qi) },
17143 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17144 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17145 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17146 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17147 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17148 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17149 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17150 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17151 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17152 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17153 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17154 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17155 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17156 { VAR10 (LOAD1, vld1,
17157 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17158 { VAR10 (LOAD1LANE, vld1_lane,
17159 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17160 { VAR10 (LOAD1, vld1_dup,
17161 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17162 { VAR10 (STORE1, vst1,
17163 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17164 { VAR10 (STORE1LANE, vst1_lane,
17165 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17166 { VAR9 (LOADSTRUCT,
17167 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17168 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17169 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17170 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17171 { VAR9 (STORESTRUCT, vst2,
17172 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17173 { VAR7 (STORESTRUCTLANE, vst2_lane,
17174 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17175 { VAR9 (LOADSTRUCT,
17176 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17177 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17178 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17179 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17180 { VAR9 (STORESTRUCT, vst3,
17181 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17182 { VAR7 (STORESTRUCTLANE, vst3_lane,
17183 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17184 { VAR9 (LOADSTRUCT, vld4,
17185 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17186 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17187 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17188 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17189 { VAR9 (STORESTRUCT, vst4,
17190 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17191 { VAR7 (STORESTRUCTLANE, vst4_lane,
17192 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17193 { VAR10 (LOGICBINOP, vand,
17194 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17195 { VAR10 (LOGICBINOP, vorr,
17196 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17197 { VAR10 (BINOP, veor,
17198 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17199 { VAR10 (LOGICBINOP, vbic,
17200 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17201 { VAR10 (LOGICBINOP, vorn,
17202 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17218 arm_init_neon_builtins (void)
17220 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17222 tree neon_intQI_type_node;
17223 tree neon_intHI_type_node;
17224 tree neon_polyQI_type_node;
17225 tree neon_polyHI_type_node;
17226 tree neon_intSI_type_node;
17227 tree neon_intDI_type_node;
17228 tree neon_float_type_node;
17230 tree intQI_pointer_node;
17231 tree intHI_pointer_node;
17232 tree intSI_pointer_node;
17233 tree intDI_pointer_node;
17234 tree float_pointer_node;
17236 tree const_intQI_node;
17237 tree const_intHI_node;
17238 tree const_intSI_node;
17239 tree const_intDI_node;
17240 tree const_float_node;
17242 tree const_intQI_pointer_node;
17243 tree const_intHI_pointer_node;
17244 tree const_intSI_pointer_node;
17245 tree const_intDI_pointer_node;
17246 tree const_float_pointer_node;
17248 tree V8QI_type_node;
17249 tree V4HI_type_node;
17250 tree V2SI_type_node;
17251 tree V2SF_type_node;
17252 tree V16QI_type_node;
17253 tree V8HI_type_node;
17254 tree V4SI_type_node;
17255 tree V4SF_type_node;
17256 tree V2DI_type_node;
17258 tree intUQI_type_node;
17259 tree intUHI_type_node;
17260 tree intUSI_type_node;
17261 tree intUDI_type_node;
17263 tree intEI_type_node;
17264 tree intOI_type_node;
17265 tree intCI_type_node;
17266 tree intXI_type_node;
17268 tree V8QI_pointer_node;
17269 tree V4HI_pointer_node;
17270 tree V2SI_pointer_node;
17271 tree V2SF_pointer_node;
17272 tree V16QI_pointer_node;
17273 tree V8HI_pointer_node;
17274 tree V4SI_pointer_node;
17275 tree V4SF_pointer_node;
17276 tree V2DI_pointer_node;
17278 tree void_ftype_pv8qi_v8qi_v8qi;
17279 tree void_ftype_pv4hi_v4hi_v4hi;
17280 tree void_ftype_pv2si_v2si_v2si;
17281 tree void_ftype_pv2sf_v2sf_v2sf;
17282 tree void_ftype_pdi_di_di;
17283 tree void_ftype_pv16qi_v16qi_v16qi;
17284 tree void_ftype_pv8hi_v8hi_v8hi;
17285 tree void_ftype_pv4si_v4si_v4si;
17286 tree void_ftype_pv4sf_v4sf_v4sf;
17287 tree void_ftype_pv2di_v2di_v2di;
17289 tree reinterp_ftype_dreg[5][5];
17290 tree reinterp_ftype_qreg[5][5];
17291 tree dreg_types[5], qreg_types[5];
17293 /* Create distinguished type nodes for NEON vector element types,
17294 and pointers to values of such types, so we can detect them later. */
17295 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17296 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17297 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17298 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17299 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17300 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17301 neon_float_type_node = make_node (REAL_TYPE);
17302 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17303 layout_type (neon_float_type_node);
17305 /* Define typedefs which exactly correspond to the modes we are basing vector
17306 types on. If you change these names you'll need to change
17307 the table used by arm_mangle_type too. */
17308 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17309 "__builtin_neon_qi");
17310 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17311 "__builtin_neon_hi");
17312 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17313 "__builtin_neon_si");
17314 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17315 "__builtin_neon_sf");
17316 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17317 "__builtin_neon_di");
17318 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17319 "__builtin_neon_poly8");
17320 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17321 "__builtin_neon_poly16");
17323 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17324 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17325 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17326 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17327 float_pointer_node = build_pointer_type (neon_float_type_node);
17329 /* Next create constant-qualified versions of the above types. */
17330 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17332 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17334 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17336 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17338 const_float_node = build_qualified_type (neon_float_type_node,
17341 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17342 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17343 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17344 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17345 const_float_pointer_node = build_pointer_type (const_float_node);
17347 /* Now create vector types based on our NEON element types. */
17348 /* 64-bit vectors. */
17350 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17352 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17354 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17356 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17357 /* 128-bit vectors. */
17359 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17361 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17363 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17365 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17367 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17369 /* Unsigned integer types for various mode sizes. */
17370 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17371 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17372 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17373 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17375 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17376 "__builtin_neon_uqi");
17377 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17378 "__builtin_neon_uhi");
17379 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17380 "__builtin_neon_usi");
17381 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17382 "__builtin_neon_udi");
17384 /* Opaque integer types for structures of vectors. */
17385 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17386 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17387 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17388 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17390 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17391 "__builtin_neon_ti");
17392 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17393 "__builtin_neon_ei");
17394 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17395 "__builtin_neon_oi");
17396 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17397 "__builtin_neon_ci");
17398 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17399 "__builtin_neon_xi");
17401 /* Pointers to vector types. */
17402 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17403 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17404 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17405 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17406 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17407 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17408 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17409 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17410 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17412 /* Operations which return results as pairs. */
17413 void_ftype_pv8qi_v8qi_v8qi =
17414 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17415 V8QI_type_node, NULL);
17416 void_ftype_pv4hi_v4hi_v4hi =
17417 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17418 V4HI_type_node, NULL);
17419 void_ftype_pv2si_v2si_v2si =
17420 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17421 V2SI_type_node, NULL);
17422 void_ftype_pv2sf_v2sf_v2sf =
17423 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17424 V2SF_type_node, NULL);
17425 void_ftype_pdi_di_di =
17426 build_function_type_list (void_type_node, intDI_pointer_node,
17427 neon_intDI_type_node, neon_intDI_type_node, NULL);
17428 void_ftype_pv16qi_v16qi_v16qi =
17429 build_function_type_list (void_type_node, V16QI_pointer_node,
17430 V16QI_type_node, V16QI_type_node, NULL);
17431 void_ftype_pv8hi_v8hi_v8hi =
17432 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17433 V8HI_type_node, NULL);
17434 void_ftype_pv4si_v4si_v4si =
17435 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17436 V4SI_type_node, NULL);
17437 void_ftype_pv4sf_v4sf_v4sf =
17438 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17439 V4SF_type_node, NULL);
17440 void_ftype_pv2di_v2di_v2di =
17441 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17442 V2DI_type_node, NULL);
17444 dreg_types[0] = V8QI_type_node;
17445 dreg_types[1] = V4HI_type_node;
17446 dreg_types[2] = V2SI_type_node;
17447 dreg_types[3] = V2SF_type_node;
17448 dreg_types[4] = neon_intDI_type_node;
17450 qreg_types[0] = V16QI_type_node;
17451 qreg_types[1] = V8HI_type_node;
17452 qreg_types[2] = V4SI_type_node;
17453 qreg_types[3] = V4SF_type_node;
17454 qreg_types[4] = V2DI_type_node;
17456 for (i = 0; i < 5; i++)
17459 for (j = 0; j < 5; j++)
17461 reinterp_ftype_dreg[i][j]
17462 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17463 reinterp_ftype_qreg[i][j]
17464 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17468 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17470 neon_builtin_datum *d = &neon_builtin_data[i];
17471 unsigned int j, codeidx = 0;
17473 d->base_fcode = fcode;
17475 for (j = 0; j < T_MAX; j++)
17477 const char* const modenames[] = {
17478 "v8qi", "v4hi", "v2si", "v2sf", "di",
17479 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17483 enum insn_code icode;
17484 int is_load = 0, is_store = 0;
17486 if ((d->bits & (1 << j)) == 0)
17489 icode = d->codes[codeidx++];
17494 case NEON_LOAD1LANE:
17495 case NEON_LOADSTRUCT:
17496 case NEON_LOADSTRUCTLANE:
17498 /* Fall through. */
17500 case NEON_STORE1LANE:
17501 case NEON_STORESTRUCT:
17502 case NEON_STORESTRUCTLANE:
17505 /* Fall through. */
17508 case NEON_LOGICBINOP:
17509 case NEON_SHIFTINSERT:
17516 case NEON_SHIFTIMM:
17517 case NEON_SHIFTACC:
17523 case NEON_LANEMULL:
17524 case NEON_LANEMULH:
17526 case NEON_SCALARMUL:
17527 case NEON_SCALARMULL:
17528 case NEON_SCALARMULH:
17529 case NEON_SCALARMAC:
17535 tree return_type = void_type_node, args = void_list_node;
17537 /* Build a function type directly from the insn_data for this
17538 builtin. The build_function_type() function takes care of
17539 removing duplicates for us. */
17540 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17544 if (is_load && k == 1)
17546 /* Neon load patterns always have the memory operand
17547 (a SImode pointer) in the operand 1 position. We
17548 want a const pointer to the element type in that
17550 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17556 eltype = const_intQI_pointer_node;
17561 eltype = const_intHI_pointer_node;
17566 eltype = const_intSI_pointer_node;
17571 eltype = const_float_pointer_node;
17576 eltype = const_intDI_pointer_node;
17579 default: gcc_unreachable ();
17582 else if (is_store && k == 0)
17584 /* Similarly, Neon store patterns use operand 0 as
17585 the memory location to store to (a SImode pointer).
17586 Use a pointer to the element type of the store in
17588 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17594 eltype = intQI_pointer_node;
17599 eltype = intHI_pointer_node;
17604 eltype = intSI_pointer_node;
17609 eltype = float_pointer_node;
17614 eltype = intDI_pointer_node;
17617 default: gcc_unreachable ();
17622 switch (insn_data[icode].operand[k].mode)
17624 case VOIDmode: eltype = void_type_node; break;
17626 case QImode: eltype = neon_intQI_type_node; break;
17627 case HImode: eltype = neon_intHI_type_node; break;
17628 case SImode: eltype = neon_intSI_type_node; break;
17629 case SFmode: eltype = neon_float_type_node; break;
17630 case DImode: eltype = neon_intDI_type_node; break;
17631 case TImode: eltype = intTI_type_node; break;
17632 case EImode: eltype = intEI_type_node; break;
17633 case OImode: eltype = intOI_type_node; break;
17634 case CImode: eltype = intCI_type_node; break;
17635 case XImode: eltype = intXI_type_node; break;
17636 /* 64-bit vectors. */
17637 case V8QImode: eltype = V8QI_type_node; break;
17638 case V4HImode: eltype = V4HI_type_node; break;
17639 case V2SImode: eltype = V2SI_type_node; break;
17640 case V2SFmode: eltype = V2SF_type_node; break;
17641 /* 128-bit vectors. */
17642 case V16QImode: eltype = V16QI_type_node; break;
17643 case V8HImode: eltype = V8HI_type_node; break;
17644 case V4SImode: eltype = V4SI_type_node; break;
17645 case V4SFmode: eltype = V4SF_type_node; break;
17646 case V2DImode: eltype = V2DI_type_node; break;
17647 default: gcc_unreachable ();
17651 if (k == 0 && !is_store)
17652 return_type = eltype;
17654 args = tree_cons (NULL_TREE, eltype, args);
17657 ftype = build_function_type (return_type, args);
17661 case NEON_RESULTPAIR:
17663 switch (insn_data[icode].operand[1].mode)
17665 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17666 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17667 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17668 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17669 case DImode: ftype = void_ftype_pdi_di_di; break;
17670 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17671 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17672 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17673 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17674 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17675 default: gcc_unreachable ();
17680 case NEON_REINTERP:
17682 /* We iterate over 5 doubleword types, then 5 quadword
17685 switch (insn_data[icode].operand[0].mode)
17687 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17688 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17689 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17690 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17691 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17692 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17693 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17694 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17695 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17696 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17697 default: gcc_unreachable ();
17703 gcc_unreachable ();
17706 gcc_assert (ftype != NULL);
17708 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17710 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17717 arm_init_fp16_builtins (void)
17719 tree fp16_type = make_node (REAL_TYPE);
17720 TYPE_PRECISION (fp16_type) = 16;
17721 layout_type (fp16_type);
17722 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17726 arm_init_builtins (void)
17728 arm_init_tls_builtins ();
17730 if (TARGET_REALLY_IWMMXT)
17731 arm_init_iwmmxt_builtins ();
17734 arm_init_neon_builtins ();
17736 if (arm_fp16_format)
17737 arm_init_fp16_builtins ();
17740 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17742 static const char *
17743 arm_invalid_parameter_type (const_tree t)
17745 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17746 return N_("function parameters cannot have __fp16 type");
17750 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17752 static const char *
17753 arm_invalid_return_type (const_tree t)
17755 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17756 return N_("functions cannot return __fp16 type");
17760 /* Implement TARGET_PROMOTED_TYPE. */
17763 arm_promoted_type (const_tree t)
17765 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17766 return float_type_node;
17770 /* Implement TARGET_CONVERT_TO_TYPE.
17771 Specifically, this hook implements the peculiarity of the ARM
17772 half-precision floating-point C semantics that requires conversions between
17773 __fp16 to or from double to do an intermediate conversion to float. */
17776 arm_convert_to_type (tree type, tree expr)
17778 tree fromtype = TREE_TYPE (expr);
17779 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17781 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17782 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17783 return convert (type, convert (float_type_node, expr));
17787 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17788 This simply adds HFmode as a supported mode; even though we don't
17789 implement arithmetic on this type directly, it's supported by
17790 optabs conversions, much the way the double-word arithmetic is
17791 special-cased in the default hook. */
17794 arm_scalar_mode_supported_p (enum machine_mode mode)
17796 if (mode == HFmode)
17797 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17799 return default_scalar_mode_supported_p (mode);
17802 /* Errors in the source file can cause expand_expr to return const0_rtx
17803 where we expect a vector. To avoid crashing, use one of the vector
17804 clear instructions. */
17807 safe_vector_operand (rtx x, enum machine_mode mode)
17809 if (x != const0_rtx)
17811 x = gen_reg_rtx (mode);
17813 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17814 : gen_rtx_SUBREG (DImode, x, 0)));
17818 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17821 arm_expand_binop_builtin (enum insn_code icode,
17822 tree exp, rtx target)
17825 tree arg0 = CALL_EXPR_ARG (exp, 0);
17826 tree arg1 = CALL_EXPR_ARG (exp, 1);
17827 rtx op0 = expand_normal (arg0);
17828 rtx op1 = expand_normal (arg1);
17829 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17830 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17831 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17833 if (VECTOR_MODE_P (mode0))
17834 op0 = safe_vector_operand (op0, mode0);
17835 if (VECTOR_MODE_P (mode1))
17836 op1 = safe_vector_operand (op1, mode1);
17839 || GET_MODE (target) != tmode
17840 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17841 target = gen_reg_rtx (tmode);
17843 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17845 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17846 op0 = copy_to_mode_reg (mode0, op0);
17847 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17848 op1 = copy_to_mode_reg (mode1, op1);
17850 pat = GEN_FCN (icode) (target, op0, op1);
17857 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17860 arm_expand_unop_builtin (enum insn_code icode,
17861 tree exp, rtx target, int do_load)
17864 tree arg0 = CALL_EXPR_ARG (exp, 0);
17865 rtx op0 = expand_normal (arg0);
17866 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17867 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17870 || GET_MODE (target) != tmode
17871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17872 target = gen_reg_rtx (tmode);
17874 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17877 if (VECTOR_MODE_P (mode0))
17878 op0 = safe_vector_operand (op0, mode0);
17880 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17881 op0 = copy_to_mode_reg (mode0, op0);
17884 pat = GEN_FCN (icode) (target, op0);
17892 neon_builtin_compare (const void *a, const void *b)
17894 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17895 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17896 unsigned int soughtcode = key->base_fcode;
17898 if (soughtcode >= memb->base_fcode
17899 && soughtcode < memb->base_fcode + memb->num_vars)
17901 else if (soughtcode < memb->base_fcode)
17907 static enum insn_code
17908 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17910 neon_builtin_datum key, *found;
17913 key.base_fcode = fcode;
17914 found = (neon_builtin_datum *)
17915 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17916 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17917 gcc_assert (found);
17918 idx = fcode - (int) found->base_fcode;
17919 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17922 *itype = found->itype;
17924 return found->codes[idx];
17928 NEON_ARG_COPY_TO_REG,
17933 #define NEON_MAX_BUILTIN_ARGS 5
17935 /* Expand a Neon builtin. */
17937 arm_expand_neon_args (rtx target, int icode, int have_retval,
17942 tree arg[NEON_MAX_BUILTIN_ARGS];
17943 rtx op[NEON_MAX_BUILTIN_ARGS];
17944 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17945 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17950 || GET_MODE (target) != tmode
17951 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17952 target = gen_reg_rtx (tmode);
17954 va_start (ap, exp);
17958 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17960 if (thisarg == NEON_ARG_STOP)
17964 arg[argc] = CALL_EXPR_ARG (exp, argc);
17965 op[argc] = expand_normal (arg[argc]);
17966 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17970 case NEON_ARG_COPY_TO_REG:
17971 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17972 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17973 (op[argc], mode[argc]))
17974 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17977 case NEON_ARG_CONSTANT:
17978 /* FIXME: This error message is somewhat unhelpful. */
17979 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17980 (op[argc], mode[argc]))
17981 error ("argument must be a constant");
17984 case NEON_ARG_STOP:
17985 gcc_unreachable ();
17998 pat = GEN_FCN (icode) (target, op[0]);
18002 pat = GEN_FCN (icode) (target, op[0], op[1]);
18006 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18010 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18014 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18018 gcc_unreachable ();
18024 pat = GEN_FCN (icode) (op[0]);
18028 pat = GEN_FCN (icode) (op[0], op[1]);
18032 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18036 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18040 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18044 gcc_unreachable ();
18055 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18056 constants defined per-instruction or per instruction-variant. Instead, the
18057 required info is looked up in the table neon_builtin_data. */
18059 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18062 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18069 return arm_expand_neon_args (target, icode, 1, exp,
18070 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18074 case NEON_SCALARMUL:
18075 case NEON_SCALARMULL:
18076 case NEON_SCALARMULH:
18077 case NEON_SHIFTINSERT:
18078 case NEON_LOGICBINOP:
18079 return arm_expand_neon_args (target, icode, 1, exp,
18080 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18084 return arm_expand_neon_args (target, icode, 1, exp,
18085 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18086 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18090 case NEON_SHIFTIMM:
18091 return arm_expand_neon_args (target, icode, 1, exp,
18092 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18096 return arm_expand_neon_args (target, icode, 1, exp,
18097 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18101 case NEON_REINTERP:
18102 return arm_expand_neon_args (target, icode, 1, exp,
18103 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18107 return arm_expand_neon_args (target, icode, 1, exp,
18108 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18110 case NEON_RESULTPAIR:
18111 return arm_expand_neon_args (target, icode, 0, exp,
18112 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18116 case NEON_LANEMULL:
18117 case NEON_LANEMULH:
18118 return arm_expand_neon_args (target, icode, 1, exp,
18119 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18120 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18123 return arm_expand_neon_args (target, icode, 1, exp,
18124 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18125 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18127 case NEON_SHIFTACC:
18128 return arm_expand_neon_args (target, icode, 1, exp,
18129 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18130 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18132 case NEON_SCALARMAC:
18133 return arm_expand_neon_args (target, icode, 1, exp,
18134 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18135 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18139 return arm_expand_neon_args (target, icode, 1, exp,
18140 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18144 case NEON_LOADSTRUCT:
18145 return arm_expand_neon_args (target, icode, 1, exp,
18146 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18148 case NEON_LOAD1LANE:
18149 case NEON_LOADSTRUCTLANE:
18150 return arm_expand_neon_args (target, icode, 1, exp,
18151 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18155 case NEON_STORESTRUCT:
18156 return arm_expand_neon_args (target, icode, 0, exp,
18157 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18159 case NEON_STORE1LANE:
18160 case NEON_STORESTRUCTLANE:
18161 return arm_expand_neon_args (target, icode, 0, exp,
18162 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18166 gcc_unreachable ();
18169 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18171 neon_reinterpret (rtx dest, rtx src)
18173 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18176 /* Emit code to place a Neon pair result in memory locations (with equal
18179 neon_emit_pair_result_insn (enum machine_mode mode,
18180 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18183 rtx mem = gen_rtx_MEM (mode, destaddr);
18184 rtx tmp1 = gen_reg_rtx (mode);
18185 rtx tmp2 = gen_reg_rtx (mode);
18187 emit_insn (intfn (tmp1, op1, tmp2, op2));
18189 emit_move_insn (mem, tmp1);
18190 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18191 emit_move_insn (mem, tmp2);
18194 /* Set up operands for a register copy from src to dest, taking care not to
18195 clobber registers in the process.
18196 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18197 be called with a large N, so that should be OK. */
18200 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18202 unsigned int copied = 0, opctr = 0;
18203 unsigned int done = (1 << count) - 1;
18206 while (copied != done)
18208 for (i = 0; i < count; i++)
18212 for (j = 0; good && j < count; j++)
18213 if (i != j && (copied & (1 << j)) == 0
18214 && reg_overlap_mentioned_p (src[j], dest[i]))
18219 operands[opctr++] = dest[i];
18220 operands[opctr++] = src[i];
18226 gcc_assert (opctr == count * 2);
18229 /* Expand an expression EXP that calls a built-in function,
18230 with result going to TARGET if that's convenient
18231 (and in mode MODE if that's convenient).
18232 SUBTARGET may be used as the target for computing one of EXP's operands.
18233 IGNORE is nonzero if the value is to be ignored. */
18236 arm_expand_builtin (tree exp,
18238 rtx subtarget ATTRIBUTE_UNUSED,
18239 enum machine_mode mode ATTRIBUTE_UNUSED,
18240 int ignore ATTRIBUTE_UNUSED)
18242 const struct builtin_description * d;
18243 enum insn_code icode;
18244 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18252 int fcode = DECL_FUNCTION_CODE (fndecl);
18254 enum machine_mode tmode;
18255 enum machine_mode mode0;
18256 enum machine_mode mode1;
18257 enum machine_mode mode2;
18259 if (fcode >= ARM_BUILTIN_NEON_BASE)
18260 return arm_expand_neon_builtin (fcode, exp, target);
18264 case ARM_BUILTIN_TEXTRMSB:
18265 case ARM_BUILTIN_TEXTRMUB:
18266 case ARM_BUILTIN_TEXTRMSH:
18267 case ARM_BUILTIN_TEXTRMUH:
18268 case ARM_BUILTIN_TEXTRMSW:
18269 case ARM_BUILTIN_TEXTRMUW:
18270 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18271 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18272 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18273 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18274 : CODE_FOR_iwmmxt_textrmw);
18276 arg0 = CALL_EXPR_ARG (exp, 0);
18277 arg1 = CALL_EXPR_ARG (exp, 1);
18278 op0 = expand_normal (arg0);
18279 op1 = expand_normal (arg1);
18280 tmode = insn_data[icode].operand[0].mode;
18281 mode0 = insn_data[icode].operand[1].mode;
18282 mode1 = insn_data[icode].operand[2].mode;
18284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18285 op0 = copy_to_mode_reg (mode0, op0);
18286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18288 /* @@@ better error message */
18289 error ("selector must be an immediate");
18290 return gen_reg_rtx (tmode);
18293 || GET_MODE (target) != tmode
18294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18295 target = gen_reg_rtx (tmode);
18296 pat = GEN_FCN (icode) (target, op0, op1);
18302 case ARM_BUILTIN_TINSRB:
18303 case ARM_BUILTIN_TINSRH:
18304 case ARM_BUILTIN_TINSRW:
18305 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18306 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18307 : CODE_FOR_iwmmxt_tinsrw);
18308 arg0 = CALL_EXPR_ARG (exp, 0);
18309 arg1 = CALL_EXPR_ARG (exp, 1);
18310 arg2 = CALL_EXPR_ARG (exp, 2);
18311 op0 = expand_normal (arg0);
18312 op1 = expand_normal (arg1);
18313 op2 = expand_normal (arg2);
18314 tmode = insn_data[icode].operand[0].mode;
18315 mode0 = insn_data[icode].operand[1].mode;
18316 mode1 = insn_data[icode].operand[2].mode;
18317 mode2 = insn_data[icode].operand[3].mode;
18319 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18320 op0 = copy_to_mode_reg (mode0, op0);
18321 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18322 op1 = copy_to_mode_reg (mode1, op1);
18323 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18325 /* @@@ better error message */
18326 error ("selector must be an immediate");
18330 || GET_MODE (target) != tmode
18331 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18332 target = gen_reg_rtx (tmode);
18333 pat = GEN_FCN (icode) (target, op0, op1, op2);
18339 case ARM_BUILTIN_SETWCX:
18340 arg0 = CALL_EXPR_ARG (exp, 0);
18341 arg1 = CALL_EXPR_ARG (exp, 1);
18342 op0 = force_reg (SImode, expand_normal (arg0));
18343 op1 = expand_normal (arg1);
18344 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18347 case ARM_BUILTIN_GETWCX:
18348 arg0 = CALL_EXPR_ARG (exp, 0);
18349 op0 = expand_normal (arg0);
18350 target = gen_reg_rtx (SImode);
18351 emit_insn (gen_iwmmxt_tmrc (target, op0));
18354 case ARM_BUILTIN_WSHUFH:
18355 icode = CODE_FOR_iwmmxt_wshufh;
18356 arg0 = CALL_EXPR_ARG (exp, 0);
18357 arg1 = CALL_EXPR_ARG (exp, 1);
18358 op0 = expand_normal (arg0);
18359 op1 = expand_normal (arg1);
18360 tmode = insn_data[icode].operand[0].mode;
18361 mode1 = insn_data[icode].operand[1].mode;
18362 mode2 = insn_data[icode].operand[2].mode;
18364 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18365 op0 = copy_to_mode_reg (mode1, op0);
18366 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18368 /* @@@ better error message */
18369 error ("mask must be an immediate");
18373 || GET_MODE (target) != tmode
18374 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18375 target = gen_reg_rtx (tmode);
18376 pat = GEN_FCN (icode) (target, op0, op1);
18382 case ARM_BUILTIN_WSADB:
18383 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18384 case ARM_BUILTIN_WSADH:
18385 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18386 case ARM_BUILTIN_WSADBZ:
18387 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18388 case ARM_BUILTIN_WSADHZ:
18389 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18391 /* Several three-argument builtins. */
18392 case ARM_BUILTIN_WMACS:
18393 case ARM_BUILTIN_WMACU:
18394 case ARM_BUILTIN_WALIGN:
18395 case ARM_BUILTIN_TMIA:
18396 case ARM_BUILTIN_TMIAPH:
18397 case ARM_BUILTIN_TMIATT:
18398 case ARM_BUILTIN_TMIATB:
18399 case ARM_BUILTIN_TMIABT:
18400 case ARM_BUILTIN_TMIABB:
18401 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18402 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18403 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18404 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18405 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18406 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18407 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18408 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18409 : CODE_FOR_iwmmxt_walign);
18410 arg0 = CALL_EXPR_ARG (exp, 0);
18411 arg1 = CALL_EXPR_ARG (exp, 1);
18412 arg2 = CALL_EXPR_ARG (exp, 2);
18413 op0 = expand_normal (arg0);
18414 op1 = expand_normal (arg1);
18415 op2 = expand_normal (arg2);
18416 tmode = insn_data[icode].operand[0].mode;
18417 mode0 = insn_data[icode].operand[1].mode;
18418 mode1 = insn_data[icode].operand[2].mode;
18419 mode2 = insn_data[icode].operand[3].mode;
18421 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18422 op0 = copy_to_mode_reg (mode0, op0);
18423 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18424 op1 = copy_to_mode_reg (mode1, op1);
18425 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18426 op2 = copy_to_mode_reg (mode2, op2);
18428 || GET_MODE (target) != tmode
18429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18430 target = gen_reg_rtx (tmode);
18431 pat = GEN_FCN (icode) (target, op0, op1, op2);
18437 case ARM_BUILTIN_WZERO:
18438 target = gen_reg_rtx (DImode);
18439 emit_insn (gen_iwmmxt_clrdi (target));
18442 case ARM_BUILTIN_THREAD_POINTER:
18443 return arm_load_tp (target);
18449 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18450 if (d->code == (const enum arm_builtins) fcode)
18451 return arm_expand_binop_builtin (d->icode, exp, target);
18453 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18454 if (d->code == (const enum arm_builtins) fcode)
18455 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18457 /* @@@ Should really do something sensible here. */
18461 /* Return the number (counting from 0) of
18462 the least significant set bit in MASK. */
18465 number_of_first_bit_set (unsigned mask)
18470 (mask & (1 << bit)) == 0;
18477 /* Emit code to push or pop registers to or from the stack. F is the
18478 assembly file. MASK is the registers to push or pop. PUSH is
18479 nonzero if we should push, and zero if we should pop. For debugging
18480 output, if pushing, adjust CFA_OFFSET by the amount of space added
18481 to the stack. REAL_REGS should have the same number of bits set as
18482 MASK, and will be used instead (in the same order) to describe which
18483 registers were saved - this is used to mark the save slots when we
18484 push high registers after moving them to low registers. */
18486 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18487 unsigned long real_regs)
18490 int lo_mask = mask & 0xFF;
18491 int pushed_words = 0;
18495 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18497 /* Special case. Do not generate a POP PC statement here, do it in
18499 thumb_exit (f, -1);
18503 if (ARM_EABI_UNWIND_TABLES && push)
18505 fprintf (f, "\t.save\t{");
18506 for (regno = 0; regno < 15; regno++)
18508 if (real_regs & (1 << regno))
18510 if (real_regs & ((1 << regno) -1))
18512 asm_fprintf (f, "%r", regno);
18515 fprintf (f, "}\n");
18518 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18520 /* Look at the low registers first. */
18521 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18525 asm_fprintf (f, "%r", regno);
18527 if ((lo_mask & ~1) != 0)
18534 if (push && (mask & (1 << LR_REGNUM)))
18536 /* Catch pushing the LR. */
18540 asm_fprintf (f, "%r", LR_REGNUM);
18544 else if (!push && (mask & (1 << PC_REGNUM)))
18546 /* Catch popping the PC. */
18547 if (TARGET_INTERWORK || TARGET_BACKTRACE
18548 || crtl->calls_eh_return)
18550 /* The PC is never poped directly, instead
18551 it is popped into r3 and then BX is used. */
18552 fprintf (f, "}\n");
18554 thumb_exit (f, -1);
18563 asm_fprintf (f, "%r", PC_REGNUM);
18567 fprintf (f, "}\n");
18569 if (push && pushed_words && dwarf2out_do_frame ())
18571 char *l = dwarf2out_cfi_label (false);
18572 int pushed_mask = real_regs;
18574 *cfa_offset += pushed_words * 4;
18575 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18578 pushed_mask = real_regs;
18579 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18581 if (pushed_mask & 1)
18582 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18587 /* Generate code to return from a thumb function.
18588 If 'reg_containing_return_addr' is -1, then the return address is
18589 actually on the stack, at the stack pointer. */
18591 thumb_exit (FILE *f, int reg_containing_return_addr)
18593 unsigned regs_available_for_popping;
18594 unsigned regs_to_pop;
18596 unsigned available;
18600 int restore_a4 = FALSE;
18602 /* Compute the registers we need to pop. */
18606 if (reg_containing_return_addr == -1)
18608 regs_to_pop |= 1 << LR_REGNUM;
18612 if (TARGET_BACKTRACE)
18614 /* Restore the (ARM) frame pointer and stack pointer. */
18615 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18619 /* If there is nothing to pop then just emit the BX instruction and
18621 if (pops_needed == 0)
18623 if (crtl->calls_eh_return)
18624 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18626 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18629 /* Otherwise if we are not supporting interworking and we have not created
18630 a backtrace structure and the function was not entered in ARM mode then
18631 just pop the return address straight into the PC. */
18632 else if (!TARGET_INTERWORK
18633 && !TARGET_BACKTRACE
18634 && !is_called_in_ARM_mode (current_function_decl)
18635 && !crtl->calls_eh_return)
18637 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18641 /* Find out how many of the (return) argument registers we can corrupt. */
18642 regs_available_for_popping = 0;
18644 /* If returning via __builtin_eh_return, the bottom three registers
18645 all contain information needed for the return. */
18646 if (crtl->calls_eh_return)
18650 /* If we can deduce the registers used from the function's
18651 return value. This is more reliable that examining
18652 df_regs_ever_live_p () because that will be set if the register is
18653 ever used in the function, not just if the register is used
18654 to hold a return value. */
18656 if (crtl->return_rtx != 0)
18657 mode = GET_MODE (crtl->return_rtx);
18659 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18661 size = GET_MODE_SIZE (mode);
18665 /* In a void function we can use any argument register.
18666 In a function that returns a structure on the stack
18667 we can use the second and third argument registers. */
18668 if (mode == VOIDmode)
18669 regs_available_for_popping =
18670 (1 << ARG_REGISTER (1))
18671 | (1 << ARG_REGISTER (2))
18672 | (1 << ARG_REGISTER (3));
18674 regs_available_for_popping =
18675 (1 << ARG_REGISTER (2))
18676 | (1 << ARG_REGISTER (3));
18678 else if (size <= 4)
18679 regs_available_for_popping =
18680 (1 << ARG_REGISTER (2))
18681 | (1 << ARG_REGISTER (3));
18682 else if (size <= 8)
18683 regs_available_for_popping =
18684 (1 << ARG_REGISTER (3));
18687 /* Match registers to be popped with registers into which we pop them. */
18688 for (available = regs_available_for_popping,
18689 required = regs_to_pop;
18690 required != 0 && available != 0;
18691 available &= ~(available & - available),
18692 required &= ~(required & - required))
18695 /* If we have any popping registers left over, remove them. */
18697 regs_available_for_popping &= ~available;
18699 /* Otherwise if we need another popping register we can use
18700 the fourth argument register. */
18701 else if (pops_needed)
18703 /* If we have not found any free argument registers and
18704 reg a4 contains the return address, we must move it. */
18705 if (regs_available_for_popping == 0
18706 && reg_containing_return_addr == LAST_ARG_REGNUM)
18708 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18709 reg_containing_return_addr = LR_REGNUM;
18711 else if (size > 12)
18713 /* Register a4 is being used to hold part of the return value,
18714 but we have dire need of a free, low register. */
18717 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18720 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18722 /* The fourth argument register is available. */
18723 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18729 /* Pop as many registers as we can. */
18730 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18731 regs_available_for_popping);
18733 /* Process the registers we popped. */
18734 if (reg_containing_return_addr == -1)
18736 /* The return address was popped into the lowest numbered register. */
18737 regs_to_pop &= ~(1 << LR_REGNUM);
18739 reg_containing_return_addr =
18740 number_of_first_bit_set (regs_available_for_popping);
18742 /* Remove this register for the mask of available registers, so that
18743 the return address will not be corrupted by further pops. */
18744 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18747 /* If we popped other registers then handle them here. */
18748 if (regs_available_for_popping)
18752 /* Work out which register currently contains the frame pointer. */
18753 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18755 /* Move it into the correct place. */
18756 asm_fprintf (f, "\tmov\t%r, %r\n",
18757 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18759 /* (Temporarily) remove it from the mask of popped registers. */
18760 regs_available_for_popping &= ~(1 << frame_pointer);
18761 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18763 if (regs_available_for_popping)
18767 /* We popped the stack pointer as well,
18768 find the register that contains it. */
18769 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18771 /* Move it into the stack register. */
18772 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18774 /* At this point we have popped all necessary registers, so
18775 do not worry about restoring regs_available_for_popping
18776 to its correct value:
18778 assert (pops_needed == 0)
18779 assert (regs_available_for_popping == (1 << frame_pointer))
18780 assert (regs_to_pop == (1 << STACK_POINTER)) */
18784 /* Since we have just move the popped value into the frame
18785 pointer, the popping register is available for reuse, and
18786 we know that we still have the stack pointer left to pop. */
18787 regs_available_for_popping |= (1 << frame_pointer);
18791 /* If we still have registers left on the stack, but we no longer have
18792 any registers into which we can pop them, then we must move the return
18793 address into the link register and make available the register that
18795 if (regs_available_for_popping == 0 && pops_needed > 0)
18797 regs_available_for_popping |= 1 << reg_containing_return_addr;
18799 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18800 reg_containing_return_addr);
18802 reg_containing_return_addr = LR_REGNUM;
18805 /* If we have registers left on the stack then pop some more.
18806 We know that at most we will want to pop FP and SP. */
18807 if (pops_needed > 0)
18812 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18813 regs_available_for_popping);
18815 /* We have popped either FP or SP.
18816 Move whichever one it is into the correct register. */
18817 popped_into = number_of_first_bit_set (regs_available_for_popping);
18818 move_to = number_of_first_bit_set (regs_to_pop);
18820 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18822 regs_to_pop &= ~(1 << move_to);
18827 /* If we still have not popped everything then we must have only
18828 had one register available to us and we are now popping the SP. */
18829 if (pops_needed > 0)
18833 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18834 regs_available_for_popping);
18836 popped_into = number_of_first_bit_set (regs_available_for_popping);
18838 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18840 assert (regs_to_pop == (1 << STACK_POINTER))
18841 assert (pops_needed == 1)
18845 /* If necessary restore the a4 register. */
18848 if (reg_containing_return_addr != LR_REGNUM)
18850 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18851 reg_containing_return_addr = LR_REGNUM;
18854 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18857 if (crtl->calls_eh_return)
18858 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18860 /* Return to caller. */
18861 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18866 thumb1_final_prescan_insn (rtx insn)
18868 if (flag_print_asm_name)
18869 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18870 INSN_ADDRESSES (INSN_UID (insn)));
18874 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18876 unsigned HOST_WIDE_INT mask = 0xff;
18879 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18880 if (val == 0) /* XXX */
18883 for (i = 0; i < 25; i++)
18884 if ((val & (mask << i)) == val)
18890 /* Returns nonzero if the current function contains,
18891 or might contain a far jump. */
18893 thumb_far_jump_used_p (void)
18897 /* This test is only important for leaf functions. */
18898 /* assert (!leaf_function_p ()); */
18900 /* If we have already decided that far jumps may be used,
18901 do not bother checking again, and always return true even if
18902 it turns out that they are not being used. Once we have made
18903 the decision that far jumps are present (and that hence the link
18904 register will be pushed onto the stack) we cannot go back on it. */
18905 if (cfun->machine->far_jump_used)
18908 /* If this function is not being called from the prologue/epilogue
18909 generation code then it must be being called from the
18910 INITIAL_ELIMINATION_OFFSET macro. */
18911 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18913 /* In this case we know that we are being asked about the elimination
18914 of the arg pointer register. If that register is not being used,
18915 then there are no arguments on the stack, and we do not have to
18916 worry that a far jump might force the prologue to push the link
18917 register, changing the stack offsets. In this case we can just
18918 return false, since the presence of far jumps in the function will
18919 not affect stack offsets.
18921 If the arg pointer is live (or if it was live, but has now been
18922 eliminated and so set to dead) then we do have to test to see if
18923 the function might contain a far jump. This test can lead to some
18924 false negatives, since before reload is completed, then length of
18925 branch instructions is not known, so gcc defaults to returning their
18926 longest length, which in turn sets the far jump attribute to true.
18928 A false negative will not result in bad code being generated, but it
18929 will result in a needless push and pop of the link register. We
18930 hope that this does not occur too often.
18932 If we need doubleword stack alignment this could affect the other
18933 elimination offsets so we can't risk getting it wrong. */
18934 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18935 cfun->machine->arg_pointer_live = 1;
18936 else if (!cfun->machine->arg_pointer_live)
18940 /* Check to see if the function contains a branch
18941 insn with the far jump attribute set. */
18942 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18944 if (GET_CODE (insn) == JUMP_INSN
18945 /* Ignore tablejump patterns. */
18946 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18947 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18948 && get_attr_far_jump (insn) == FAR_JUMP_YES
18951 /* Record the fact that we have decided that
18952 the function does use far jumps. */
18953 cfun->machine->far_jump_used = 1;
18961 /* Return nonzero if FUNC must be entered in ARM mode. */
18963 is_called_in_ARM_mode (tree func)
18965 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18967 /* Ignore the problem about functions whose address is taken. */
18968 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18972 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18978 /* The bits which aren't usefully expanded as rtl. */
18980 thumb_unexpanded_epilogue (void)
18982 arm_stack_offsets *offsets;
18984 unsigned long live_regs_mask = 0;
18985 int high_regs_pushed = 0;
18986 int had_to_push_lr;
18989 if (cfun->machine->return_used_this_function != 0)
18992 if (IS_NAKED (arm_current_func_type ()))
18995 offsets = arm_get_frame_offsets ();
18996 live_regs_mask = offsets->saved_regs_mask;
18997 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18999 /* If we can deduce the registers used from the function's return value.
19000 This is more reliable that examining df_regs_ever_live_p () because that
19001 will be set if the register is ever used in the function, not just if
19002 the register is used to hold a return value. */
19003 size = arm_size_return_regs ();
19005 /* The prolog may have pushed some high registers to use as
19006 work registers. e.g. the testsuite file:
19007 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19008 compiles to produce:
19009 push {r4, r5, r6, r7, lr}
19013 as part of the prolog. We have to undo that pushing here. */
19015 if (high_regs_pushed)
19017 unsigned long mask = live_regs_mask & 0xff;
19020 /* The available low registers depend on the size of the value we are
19028 /* Oh dear! We have no low registers into which we can pop
19031 ("no low registers available for popping high registers");
19033 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19034 if (live_regs_mask & (1 << next_hi_reg))
19037 while (high_regs_pushed)
19039 /* Find lo register(s) into which the high register(s) can
19041 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19043 if (mask & (1 << regno))
19044 high_regs_pushed--;
19045 if (high_regs_pushed == 0)
19049 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19051 /* Pop the values into the low register(s). */
19052 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19054 /* Move the value(s) into the high registers. */
19055 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19057 if (mask & (1 << regno))
19059 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19062 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19063 if (live_regs_mask & (1 << next_hi_reg))
19068 live_regs_mask &= ~0x0f00;
19071 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19072 live_regs_mask &= 0xff;
19074 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19076 /* Pop the return address into the PC. */
19077 if (had_to_push_lr)
19078 live_regs_mask |= 1 << PC_REGNUM;
19080 /* Either no argument registers were pushed or a backtrace
19081 structure was created which includes an adjusted stack
19082 pointer, so just pop everything. */
19083 if (live_regs_mask)
19084 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19087 /* We have either just popped the return address into the
19088 PC or it is was kept in LR for the entire function. */
19089 if (!had_to_push_lr)
19090 thumb_exit (asm_out_file, LR_REGNUM);
19094 /* Pop everything but the return address. */
19095 if (live_regs_mask)
19096 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19099 if (had_to_push_lr)
19103 /* We have no free low regs, so save one. */
19104 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19108 /* Get the return address into a temporary register. */
19109 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19110 1 << LAST_ARG_REGNUM);
19114 /* Move the return address to lr. */
19115 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19117 /* Restore the low register. */
19118 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19123 regno = LAST_ARG_REGNUM;
19128 /* Remove the argument registers that were pushed onto the stack. */
19129 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19130 SP_REGNUM, SP_REGNUM,
19131 crtl->args.pretend_args_size);
19133 thumb_exit (asm_out_file, regno);
19139 /* Functions to save and restore machine-specific function data. */
19140 static struct machine_function *
19141 arm_init_machine_status (void)
19143 struct machine_function *machine;
19144 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19146 #if ARM_FT_UNKNOWN != 0
19147 machine->func_type = ARM_FT_UNKNOWN;
19152 /* Return an RTX indicating where the return address to the
19153 calling function can be found. */
19155 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19160 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19163 /* Do anything needed before RTL is emitted for each function. */
19165 arm_init_expanders (void)
19167 /* Arrange to initialize and mark the machine per-function status. */
19168 init_machine_status = arm_init_machine_status;
19170 /* This is to stop the combine pass optimizing away the alignment
19171 adjustment of va_arg. */
19172 /* ??? It is claimed that this should not be necessary. */
19174 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19178 /* Like arm_compute_initial_elimination offset. Simpler because there
19179 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19180 to point at the base of the local variables after static stack
19181 space for a function has been allocated. */
19184 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19186 arm_stack_offsets *offsets;
19188 offsets = arm_get_frame_offsets ();
19192 case ARG_POINTER_REGNUM:
19195 case STACK_POINTER_REGNUM:
19196 return offsets->outgoing_args - offsets->saved_args;
19198 case FRAME_POINTER_REGNUM:
19199 return offsets->soft_frame - offsets->saved_args;
19201 case ARM_HARD_FRAME_POINTER_REGNUM:
19202 return offsets->saved_regs - offsets->saved_args;
19204 case THUMB_HARD_FRAME_POINTER_REGNUM:
19205 return offsets->locals_base - offsets->saved_args;
19208 gcc_unreachable ();
19212 case FRAME_POINTER_REGNUM:
19215 case STACK_POINTER_REGNUM:
19216 return offsets->outgoing_args - offsets->soft_frame;
19218 case ARM_HARD_FRAME_POINTER_REGNUM:
19219 return offsets->saved_regs - offsets->soft_frame;
19221 case THUMB_HARD_FRAME_POINTER_REGNUM:
19222 return offsets->locals_base - offsets->soft_frame;
19225 gcc_unreachable ();
19230 gcc_unreachable ();
19234 /* Generate the rest of a function's prologue. */
19236 thumb1_expand_prologue (void)
19240 HOST_WIDE_INT amount;
19241 arm_stack_offsets *offsets;
19242 unsigned long func_type;
19244 unsigned long live_regs_mask;
19246 func_type = arm_current_func_type ();
19248 /* Naked functions don't have prologues. */
19249 if (IS_NAKED (func_type))
19252 if (IS_INTERRUPT (func_type))
19254 error ("interrupt Service Routines cannot be coded in Thumb mode");
19258 offsets = arm_get_frame_offsets ();
19259 live_regs_mask = offsets->saved_regs_mask;
19260 /* Load the pic register before setting the frame pointer,
19261 so we can use r7 as a temporary work register. */
19262 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19263 arm_load_pic_register (live_regs_mask);
19265 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19266 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19267 stack_pointer_rtx);
19269 amount = offsets->outgoing_args - offsets->saved_regs;
19274 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19275 GEN_INT (- amount)));
19276 RTX_FRAME_RELATED_P (insn) = 1;
19282 /* The stack decrement is too big for an immediate value in a single
19283 insn. In theory we could issue multiple subtracts, but after
19284 three of them it becomes more space efficient to place the full
19285 value in the constant pool and load into a register. (Also the
19286 ARM debugger really likes to see only one stack decrement per
19287 function). So instead we look for a scratch register into which
19288 we can load the decrement, and then we subtract this from the
19289 stack pointer. Unfortunately on the thumb the only available
19290 scratch registers are the argument registers, and we cannot use
19291 these as they may hold arguments to the function. Instead we
19292 attempt to locate a call preserved register which is used by this
19293 function. If we can find one, then we know that it will have
19294 been pushed at the start of the prologue and so we can corrupt
19296 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19297 if (live_regs_mask & (1 << regno))
19300 gcc_assert(regno <= LAST_LO_REGNUM);
19302 reg = gen_rtx_REG (SImode, regno);
19304 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19306 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19307 stack_pointer_rtx, reg));
19308 RTX_FRAME_RELATED_P (insn) = 1;
19309 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19310 plus_constant (stack_pointer_rtx,
19312 RTX_FRAME_RELATED_P (dwarf) = 1;
19313 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19317 if (frame_pointer_needed)
19318 thumb_set_frame_pointer (offsets);
19320 /* If we are profiling, make sure no instructions are scheduled before
19321 the call to mcount. Similarly if the user has requested no
19322 scheduling in the prolog. Similarly if we want non-call exceptions
19323 using the EABI unwinder, to prevent faulting instructions from being
19324 swapped with a stack adjustment. */
19325 if (crtl->profile || !TARGET_SCHED_PROLOG
19326 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19327 emit_insn (gen_blockage ());
19329 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19330 if (live_regs_mask & 0xff)
19331 cfun->machine->lr_save_eliminated = 0;
19336 thumb1_expand_epilogue (void)
19338 HOST_WIDE_INT amount;
19339 arm_stack_offsets *offsets;
19342 /* Naked functions don't have prologues. */
19343 if (IS_NAKED (arm_current_func_type ()))
19346 offsets = arm_get_frame_offsets ();
19347 amount = offsets->outgoing_args - offsets->saved_regs;
19349 if (frame_pointer_needed)
19351 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19352 amount = offsets->locals_base - offsets->saved_regs;
19355 gcc_assert (amount >= 0);
19359 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19360 GEN_INT (amount)));
19363 /* r3 is always free in the epilogue. */
19364 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19366 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19367 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19371 /* Emit a USE (stack_pointer_rtx), so that
19372 the stack adjustment will not be deleted. */
19373 emit_insn (gen_prologue_use (stack_pointer_rtx));
19375 if (crtl->profile || !TARGET_SCHED_PROLOG)
19376 emit_insn (gen_blockage ());
19378 /* Emit a clobber for each insn that will be restored in the epilogue,
19379 so that flow2 will get register lifetimes correct. */
19380 for (regno = 0; regno < 13; regno++)
19381 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19382 emit_clobber (gen_rtx_REG (SImode, regno));
19384 if (! df_regs_ever_live_p (LR_REGNUM))
19385 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19389 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19391 arm_stack_offsets *offsets;
19392 unsigned long live_regs_mask = 0;
19393 unsigned long l_mask;
19394 unsigned high_regs_pushed = 0;
19395 int cfa_offset = 0;
19398 if (IS_NAKED (arm_current_func_type ()))
19401 if (is_called_in_ARM_mode (current_function_decl))
19405 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19406 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19408 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19410 /* Generate code sequence to switch us into Thumb mode. */
19411 /* The .code 32 directive has already been emitted by
19412 ASM_DECLARE_FUNCTION_NAME. */
19413 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19414 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19416 /* Generate a label, so that the debugger will notice the
19417 change in instruction sets. This label is also used by
19418 the assembler to bypass the ARM code when this function
19419 is called from a Thumb encoded function elsewhere in the
19420 same file. Hence the definition of STUB_NAME here must
19421 agree with the definition in gas/config/tc-arm.c. */
19423 #define STUB_NAME ".real_start_of"
19425 fprintf (f, "\t.code\t16\n");
19427 if (arm_dllexport_name_p (name))
19428 name = arm_strip_name_encoding (name);
19430 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19431 fprintf (f, "\t.thumb_func\n");
19432 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19435 if (crtl->args.pretend_args_size)
19437 /* Output unwind directive for the stack adjustment. */
19438 if (ARM_EABI_UNWIND_TABLES)
19439 fprintf (f, "\t.pad #%d\n",
19440 crtl->args.pretend_args_size);
19442 if (cfun->machine->uses_anonymous_args)
19446 fprintf (f, "\tpush\t{");
19448 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19450 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19451 regno <= LAST_ARG_REGNUM;
19453 asm_fprintf (f, "%r%s", regno,
19454 regno == LAST_ARG_REGNUM ? "" : ", ");
19456 fprintf (f, "}\n");
19459 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19460 SP_REGNUM, SP_REGNUM,
19461 crtl->args.pretend_args_size);
19463 /* We don't need to record the stores for unwinding (would it
19464 help the debugger any if we did?), but record the change in
19465 the stack pointer. */
19466 if (dwarf2out_do_frame ())
19468 char *l = dwarf2out_cfi_label (false);
19470 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19471 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19475 /* Get the registers we are going to push. */
19476 offsets = arm_get_frame_offsets ();
19477 live_regs_mask = offsets->saved_regs_mask;
19478 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19479 l_mask = live_regs_mask & 0x40ff;
19480 /* Then count how many other high registers will need to be pushed. */
19481 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19483 if (TARGET_BACKTRACE)
19486 unsigned work_register;
19488 /* We have been asked to create a stack backtrace structure.
19489 The code looks like this:
19493 0 sub SP, #16 Reserve space for 4 registers.
19494 2 push {R7} Push low registers.
19495 4 add R7, SP, #20 Get the stack pointer before the push.
19496 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19497 8 mov R7, PC Get hold of the start of this code plus 12.
19498 10 str R7, [SP, #16] Store it.
19499 12 mov R7, FP Get hold of the current frame pointer.
19500 14 str R7, [SP, #4] Store it.
19501 16 mov R7, LR Get hold of the current return address.
19502 18 str R7, [SP, #12] Store it.
19503 20 add R7, SP, #16 Point at the start of the backtrace structure.
19504 22 mov FP, R7 Put this value into the frame pointer. */
19506 work_register = thumb_find_work_register (live_regs_mask);
19508 if (ARM_EABI_UNWIND_TABLES)
19509 asm_fprintf (f, "\t.pad #16\n");
19512 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19513 SP_REGNUM, SP_REGNUM);
19515 if (dwarf2out_do_frame ())
19517 char *l = dwarf2out_cfi_label (false);
19519 cfa_offset = cfa_offset + 16;
19520 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19525 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19526 offset = bit_count (l_mask) * UNITS_PER_WORD;
19531 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19532 offset + 16 + crtl->args.pretend_args_size);
19534 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19537 /* Make sure that the instruction fetching the PC is in the right place
19538 to calculate "start of backtrace creation code + 12". */
19541 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19542 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19544 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19545 ARM_HARD_FRAME_POINTER_REGNUM);
19546 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19551 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19552 ARM_HARD_FRAME_POINTER_REGNUM);
19553 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19555 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19556 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19560 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19561 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19563 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19565 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19566 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19568 /* Optimization: If we are not pushing any low registers but we are going
19569 to push some high registers then delay our first push. This will just
19570 be a push of LR and we can combine it with the push of the first high
19572 else if ((l_mask & 0xff) != 0
19573 || (high_regs_pushed == 0 && l_mask))
19574 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19576 if (high_regs_pushed)
19578 unsigned pushable_regs;
19579 unsigned next_hi_reg;
19581 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19582 if (live_regs_mask & (1 << next_hi_reg))
19585 pushable_regs = l_mask & 0xff;
19587 if (pushable_regs == 0)
19588 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19590 while (high_regs_pushed > 0)
19592 unsigned long real_regs_mask = 0;
19594 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19596 if (pushable_regs & (1 << regno))
19598 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19600 high_regs_pushed --;
19601 real_regs_mask |= (1 << next_hi_reg);
19603 if (high_regs_pushed)
19605 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19607 if (live_regs_mask & (1 << next_hi_reg))
19612 pushable_regs &= ~((1 << regno) - 1);
19618 /* If we had to find a work register and we have not yet
19619 saved the LR then add it to the list of regs to push. */
19620 if (l_mask == (1 << LR_REGNUM))
19622 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19624 real_regs_mask | (1 << LR_REGNUM));
19628 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19633 /* Handle the case of a double word load into a low register from
19634 a computed memory address. The computed address may involve a
19635 register which is overwritten by the load. */
19637 thumb_load_double_from_address (rtx *operands)
19645 gcc_assert (GET_CODE (operands[0]) == REG);
19646 gcc_assert (GET_CODE (operands[1]) == MEM);
19648 /* Get the memory address. */
19649 addr = XEXP (operands[1], 0);
19651 /* Work out how the memory address is computed. */
19652 switch (GET_CODE (addr))
19655 operands[2] = adjust_address (operands[1], SImode, 4);
19657 if (REGNO (operands[0]) == REGNO (addr))
19659 output_asm_insn ("ldr\t%H0, %2", operands);
19660 output_asm_insn ("ldr\t%0, %1", operands);
19664 output_asm_insn ("ldr\t%0, %1", operands);
19665 output_asm_insn ("ldr\t%H0, %2", operands);
19670 /* Compute <address> + 4 for the high order load. */
19671 operands[2] = adjust_address (operands[1], SImode, 4);
19673 output_asm_insn ("ldr\t%0, %1", operands);
19674 output_asm_insn ("ldr\t%H0, %2", operands);
19678 arg1 = XEXP (addr, 0);
19679 arg2 = XEXP (addr, 1);
19681 if (CONSTANT_P (arg1))
19682 base = arg2, offset = arg1;
19684 base = arg1, offset = arg2;
19686 gcc_assert (GET_CODE (base) == REG);
19688 /* Catch the case of <address> = <reg> + <reg> */
19689 if (GET_CODE (offset) == REG)
19691 int reg_offset = REGNO (offset);
19692 int reg_base = REGNO (base);
19693 int reg_dest = REGNO (operands[0]);
19695 /* Add the base and offset registers together into the
19696 higher destination register. */
19697 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19698 reg_dest + 1, reg_base, reg_offset);
19700 /* Load the lower destination register from the address in
19701 the higher destination register. */
19702 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19703 reg_dest, reg_dest + 1);
19705 /* Load the higher destination register from its own address
19707 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19708 reg_dest + 1, reg_dest + 1);
19712 /* Compute <address> + 4 for the high order load. */
19713 operands[2] = adjust_address (operands[1], SImode, 4);
19715 /* If the computed address is held in the low order register
19716 then load the high order register first, otherwise always
19717 load the low order register first. */
19718 if (REGNO (operands[0]) == REGNO (base))
19720 output_asm_insn ("ldr\t%H0, %2", operands);
19721 output_asm_insn ("ldr\t%0, %1", operands);
19725 output_asm_insn ("ldr\t%0, %1", operands);
19726 output_asm_insn ("ldr\t%H0, %2", operands);
19732 /* With no registers to worry about we can just load the value
19734 operands[2] = adjust_address (operands[1], SImode, 4);
19736 output_asm_insn ("ldr\t%H0, %2", operands);
19737 output_asm_insn ("ldr\t%0, %1", operands);
19741 gcc_unreachable ();
19748 thumb_output_move_mem_multiple (int n, rtx *operands)
19755 if (REGNO (operands[4]) > REGNO (operands[5]))
19758 operands[4] = operands[5];
19761 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19762 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19766 if (REGNO (operands[4]) > REGNO (operands[5]))
19769 operands[4] = operands[5];
19772 if (REGNO (operands[5]) > REGNO (operands[6]))
19775 operands[5] = operands[6];
19778 if (REGNO (operands[4]) > REGNO (operands[5]))
19781 operands[4] = operands[5];
19785 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19786 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19790 gcc_unreachable ();
19796 /* Output a call-via instruction for thumb state. */
19798 thumb_call_via_reg (rtx reg)
19800 int regno = REGNO (reg);
19803 gcc_assert (regno < LR_REGNUM);
19805 /* If we are in the normal text section we can use a single instance
19806 per compilation unit. If we are doing function sections, then we need
19807 an entry per section, since we can't rely on reachability. */
19808 if (in_section == text_section)
19810 thumb_call_reg_needed = 1;
19812 if (thumb_call_via_label[regno] == NULL)
19813 thumb_call_via_label[regno] = gen_label_rtx ();
19814 labelp = thumb_call_via_label + regno;
19818 if (cfun->machine->call_via[regno] == NULL)
19819 cfun->machine->call_via[regno] = gen_label_rtx ();
19820 labelp = cfun->machine->call_via + regno;
19823 output_asm_insn ("bl\t%a0", labelp);
19827 /* Routines for generating rtl. */
19829 thumb_expand_movmemqi (rtx *operands)
19831 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19832 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19833 HOST_WIDE_INT len = INTVAL (operands[2]);
19834 HOST_WIDE_INT offset = 0;
19838 emit_insn (gen_movmem12b (out, in, out, in));
19844 emit_insn (gen_movmem8b (out, in, out, in));
19850 rtx reg = gen_reg_rtx (SImode);
19851 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19852 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19859 rtx reg = gen_reg_rtx (HImode);
19860 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19861 plus_constant (in, offset))));
19862 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19870 rtx reg = gen_reg_rtx (QImode);
19871 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19872 plus_constant (in, offset))));
19873 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19879 thumb_reload_out_hi (rtx *operands)
19881 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19884 /* Handle reading a half-word from memory during reload. */
19886 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19888 gcc_unreachable ();
19891 /* Return the length of a function name prefix
19892 that starts with the character 'c'. */
19894 arm_get_strip_length (int c)
19898 ARM_NAME_ENCODING_LENGTHS
19903 /* Return a pointer to a function's name with any
19904 and all prefix encodings stripped from it. */
19906 arm_strip_name_encoding (const char *name)
19910 while ((skip = arm_get_strip_length (* name)))
19916 /* If there is a '*' anywhere in the name's prefix, then
19917 emit the stripped name verbatim, otherwise prepend an
19918 underscore if leading underscores are being used. */
19920 arm_asm_output_labelref (FILE *stream, const char *name)
19925 while ((skip = arm_get_strip_length (* name)))
19927 verbatim |= (*name == '*');
19932 fputs (name, stream);
19934 asm_fprintf (stream, "%U%s", name);
19938 arm_file_start (void)
19942 if (TARGET_UNIFIED_ASM)
19943 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19947 const char *fpu_name;
19948 if (arm_select[0].string)
19949 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19950 else if (arm_select[1].string)
19951 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19953 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19954 all_cores[arm_default_cpu].name);
19956 if (TARGET_SOFT_FLOAT)
19959 fpu_name = "softvfp";
19961 fpu_name = "softfpa";
19965 fpu_name = arm_fpu_desc->name;
19966 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
19968 if (TARGET_HARD_FLOAT)
19969 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19970 if (TARGET_HARD_FLOAT_ABI)
19971 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19974 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19976 /* Some of these attributes only apply when the corresponding features
19977 are used. However we don't have any easy way of figuring this out.
19978 Conservatively record the setting that would have been used. */
19980 /* Tag_ABI_FP_rounding. */
19981 if (flag_rounding_math)
19982 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19983 if (!flag_unsafe_math_optimizations)
19985 /* Tag_ABI_FP_denomal. */
19986 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19987 /* Tag_ABI_FP_exceptions. */
19988 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19990 /* Tag_ABI_FP_user_exceptions. */
19991 if (flag_signaling_nans)
19992 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19993 /* Tag_ABI_FP_number_model. */
19994 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19995 flag_finite_math_only ? 1 : 3);
19997 /* Tag_ABI_align8_needed. */
19998 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19999 /* Tag_ABI_align8_preserved. */
20000 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20001 /* Tag_ABI_enum_size. */
20002 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20003 flag_short_enums ? 1 : 2);
20005 /* Tag_ABI_optimization_goals. */
20008 else if (optimize >= 2)
20014 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20016 /* Tag_ABI_FP_16bit_format. */
20017 if (arm_fp16_format)
20018 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20019 (int)arm_fp16_format);
20021 if (arm_lang_output_object_attributes_hook)
20022 arm_lang_output_object_attributes_hook();
20024 default_file_start();
20028 arm_file_end (void)
20032 if (NEED_INDICATE_EXEC_STACK)
20033 /* Add .note.GNU-stack. */
20034 file_end_indicate_exec_stack ();
20036 if (! thumb_call_reg_needed)
20039 switch_to_section (text_section);
20040 asm_fprintf (asm_out_file, "\t.code 16\n");
20041 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20043 for (regno = 0; regno < LR_REGNUM; regno++)
20045 rtx label = thumb_call_via_label[regno];
20049 targetm.asm_out.internal_label (asm_out_file, "L",
20050 CODE_LABEL_NUMBER (label));
20051 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20057 /* Symbols in the text segment can be accessed without indirecting via the
20058 constant pool; it may take an extra binary operation, but this is still
20059 faster than indirecting via memory. Don't do this when not optimizing,
20060 since we won't be calculating al of the offsets necessary to do this
20064 arm_encode_section_info (tree decl, rtx rtl, int first)
20066 if (optimize > 0 && TREE_CONSTANT (decl))
20067 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20069 default_encode_section_info (decl, rtl, first);
20071 #endif /* !ARM_PE */
20074 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20076 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20077 && !strcmp (prefix, "L"))
20079 arm_ccfsm_state = 0;
20080 arm_target_insn = NULL;
20082 default_internal_label (stream, prefix, labelno);
20085 /* Output code to add DELTA to the first argument, and then jump
20086 to FUNCTION. Used for C++ multiple inheritance. */
20088 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20089 HOST_WIDE_INT delta,
20090 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20093 static int thunk_label = 0;
20096 int mi_delta = delta;
20097 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20099 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20102 mi_delta = - mi_delta;
20106 int labelno = thunk_label++;
20107 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20108 /* Thunks are entered in arm mode when avaiable. */
20109 if (TARGET_THUMB1_ONLY)
20111 /* push r3 so we can use it as a temporary. */
20112 /* TODO: Omit this save if r3 is not used. */
20113 fputs ("\tpush {r3}\n", file);
20114 fputs ("\tldr\tr3, ", file);
20118 fputs ("\tldr\tr12, ", file);
20120 assemble_name (file, label);
20121 fputc ('\n', file);
20124 /* If we are generating PIC, the ldr instruction below loads
20125 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20126 the address of the add + 8, so we have:
20128 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20131 Note that we have "+ 1" because some versions of GNU ld
20132 don't set the low bit of the result for R_ARM_REL32
20133 relocations against thumb function symbols.
20134 On ARMv6M this is +4, not +8. */
20135 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20136 assemble_name (file, labelpc);
20137 fputs (":\n", file);
20138 if (TARGET_THUMB1_ONLY)
20140 /* This is 2 insns after the start of the thunk, so we know it
20141 is 4-byte aligned. */
20142 fputs ("\tadd\tr3, pc, r3\n", file);
20143 fputs ("\tmov r12, r3\n", file);
20146 fputs ("\tadd\tr12, pc, r12\n", file);
20148 else if (TARGET_THUMB1_ONLY)
20149 fputs ("\tmov r12, r3\n", file);
20151 if (TARGET_THUMB1_ONLY)
20153 if (mi_delta > 255)
20155 fputs ("\tldr\tr3, ", file);
20156 assemble_name (file, label);
20157 fputs ("+4\n", file);
20158 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20159 mi_op, this_regno, this_regno);
20161 else if (mi_delta != 0)
20163 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20164 mi_op, this_regno, this_regno,
20170 /* TODO: Use movw/movt for large constants when available. */
20171 while (mi_delta != 0)
20173 if ((mi_delta & (3 << shift)) == 0)
20177 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20178 mi_op, this_regno, this_regno,
20179 mi_delta & (0xff << shift));
20180 mi_delta &= ~(0xff << shift);
20187 if (TARGET_THUMB1_ONLY)
20188 fputs ("\tpop\t{r3}\n", file);
20190 fprintf (file, "\tbx\tr12\n");
20191 ASM_OUTPUT_ALIGN (file, 2);
20192 assemble_name (file, label);
20193 fputs (":\n", file);
20196 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20197 rtx tem = XEXP (DECL_RTL (function), 0);
20198 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20199 tem = gen_rtx_MINUS (GET_MODE (tem),
20201 gen_rtx_SYMBOL_REF (Pmode,
20202 ggc_strdup (labelpc)));
20203 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20206 /* Output ".word .LTHUNKn". */
20207 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20209 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20210 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20214 fputs ("\tb\t", file);
20215 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20216 if (NEED_PLT_RELOC)
20217 fputs ("(PLT)", file);
20218 fputc ('\n', file);
20223 arm_emit_vector_const (FILE *file, rtx x)
20226 const char * pattern;
20228 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20230 switch (GET_MODE (x))
20232 case V2SImode: pattern = "%08x"; break;
20233 case V4HImode: pattern = "%04x"; break;
20234 case V8QImode: pattern = "%02x"; break;
20235 default: gcc_unreachable ();
20238 fprintf (file, "0x");
20239 for (i = CONST_VECTOR_NUNITS (x); i--;)
20243 element = CONST_VECTOR_ELT (x, i);
20244 fprintf (file, pattern, INTVAL (element));
20250 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20251 HFmode constant pool entries are actually loaded with ldr. */
20253 arm_emit_fp16_const (rtx c)
20258 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20259 bits = real_to_target (NULL, &r, HFmode);
20260 if (WORDS_BIG_ENDIAN)
20261 assemble_zeros (2);
20262 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20263 if (!WORDS_BIG_ENDIAN)
20264 assemble_zeros (2);
20268 arm_output_load_gr (rtx *operands)
20275 if (GET_CODE (operands [1]) != MEM
20276 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20277 || GET_CODE (reg = XEXP (sum, 0)) != REG
20278 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20279 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20280 return "wldrw%?\t%0, %1";
20282 /* Fix up an out-of-range load of a GR register. */
20283 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20284 wcgr = operands[0];
20286 output_asm_insn ("ldr%?\t%0, %1", operands);
20288 operands[0] = wcgr;
20290 output_asm_insn ("tmcr%?\t%0, %1", operands);
20291 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20296 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20298 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20299 named arg and all anonymous args onto the stack.
20300 XXX I know the prologue shouldn't be pushing registers, but it is faster
20304 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20305 enum machine_mode mode,
20308 int second_time ATTRIBUTE_UNUSED)
20312 cfun->machine->uses_anonymous_args = 1;
20313 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20315 nregs = pcum->aapcs_ncrn;
20316 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20320 nregs = pcum->nregs;
20322 if (nregs < NUM_ARG_REGS)
20323 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20326 /* Return nonzero if the CONSUMER instruction (a store) does not need
20327 PRODUCER's value to calculate the address. */
20330 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20332 rtx value = PATTERN (producer);
20333 rtx addr = PATTERN (consumer);
20335 if (GET_CODE (value) == COND_EXEC)
20336 value = COND_EXEC_CODE (value);
20337 if (GET_CODE (value) == PARALLEL)
20338 value = XVECEXP (value, 0, 0);
20339 value = XEXP (value, 0);
20340 if (GET_CODE (addr) == COND_EXEC)
20341 addr = COND_EXEC_CODE (addr);
20342 if (GET_CODE (addr) == PARALLEL)
20343 addr = XVECEXP (addr, 0, 0);
20344 addr = XEXP (addr, 0);
20346 return !reg_overlap_mentioned_p (value, addr);
20349 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20350 have an early register shift value or amount dependency on the
20351 result of PRODUCER. */
20354 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20356 rtx value = PATTERN (producer);
20357 rtx op = PATTERN (consumer);
20360 if (GET_CODE (value) == COND_EXEC)
20361 value = COND_EXEC_CODE (value);
20362 if (GET_CODE (value) == PARALLEL)
20363 value = XVECEXP (value, 0, 0);
20364 value = XEXP (value, 0);
20365 if (GET_CODE (op) == COND_EXEC)
20366 op = COND_EXEC_CODE (op);
20367 if (GET_CODE (op) == PARALLEL)
20368 op = XVECEXP (op, 0, 0);
20371 early_op = XEXP (op, 0);
20372 /* This is either an actual independent shift, or a shift applied to
20373 the first operand of another operation. We want the whole shift
20375 if (GET_CODE (early_op) == REG)
20378 return !reg_overlap_mentioned_p (value, early_op);
20381 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20382 have an early register shift value dependency on the result of
20386 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20388 rtx value = PATTERN (producer);
20389 rtx op = PATTERN (consumer);
20392 if (GET_CODE (value) == COND_EXEC)
20393 value = COND_EXEC_CODE (value);
20394 if (GET_CODE (value) == PARALLEL)
20395 value = XVECEXP (value, 0, 0);
20396 value = XEXP (value, 0);
20397 if (GET_CODE (op) == COND_EXEC)
20398 op = COND_EXEC_CODE (op);
20399 if (GET_CODE (op) == PARALLEL)
20400 op = XVECEXP (op, 0, 0);
20403 early_op = XEXP (op, 0);
20405 /* This is either an actual independent shift, or a shift applied to
20406 the first operand of another operation. We want the value being
20407 shifted, in either case. */
20408 if (GET_CODE (early_op) != REG)
20409 early_op = XEXP (early_op, 0);
20411 return !reg_overlap_mentioned_p (value, early_op);
20414 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20415 have an early register mult dependency on the result of
20419 arm_no_early_mul_dep (rtx producer, rtx consumer)
20421 rtx value = PATTERN (producer);
20422 rtx op = PATTERN (consumer);
20424 if (GET_CODE (value) == COND_EXEC)
20425 value = COND_EXEC_CODE (value);
20426 if (GET_CODE (value) == PARALLEL)
20427 value = XVECEXP (value, 0, 0);
20428 value = XEXP (value, 0);
20429 if (GET_CODE (op) == COND_EXEC)
20430 op = COND_EXEC_CODE (op);
20431 if (GET_CODE (op) == PARALLEL)
20432 op = XVECEXP (op, 0, 0);
20435 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20437 if (GET_CODE (XEXP (op, 0)) == MULT)
20438 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20440 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20446 /* We can't rely on the caller doing the proper promotion when
20447 using APCS or ATPCS. */
20450 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20452 return !TARGET_AAPCS_BASED;
20455 static enum machine_mode
20456 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20457 enum machine_mode mode,
20458 int *punsignedp ATTRIBUTE_UNUSED,
20459 const_tree fntype ATTRIBUTE_UNUSED,
20460 int for_return ATTRIBUTE_UNUSED)
20462 if (GET_MODE_CLASS (mode) == MODE_INT
20463 && GET_MODE_SIZE (mode) < 4)
20469 /* AAPCS based ABIs use short enums by default. */
20472 arm_default_short_enums (void)
20474 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20478 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20481 arm_align_anon_bitfield (void)
20483 return TARGET_AAPCS_BASED;
20487 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20490 arm_cxx_guard_type (void)
20492 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20495 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20496 has an accumulator dependency on the result of the producer (a
20497 multiplication instruction) and no other dependency on that result. */
20499 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20501 rtx mul = PATTERN (producer);
20502 rtx mac = PATTERN (consumer);
20504 rtx mac_op0, mac_op1, mac_acc;
20506 if (GET_CODE (mul) == COND_EXEC)
20507 mul = COND_EXEC_CODE (mul);
20508 if (GET_CODE (mac) == COND_EXEC)
20509 mac = COND_EXEC_CODE (mac);
20511 /* Check that mul is of the form (set (...) (mult ...))
20512 and mla is of the form (set (...) (plus (mult ...) (...))). */
20513 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20514 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20515 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20518 mul_result = XEXP (mul, 0);
20519 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20520 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20521 mac_acc = XEXP (XEXP (mac, 1), 1);
20523 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20524 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20525 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20529 /* The EABI says test the least significant bit of a guard variable. */
20532 arm_cxx_guard_mask_bit (void)
20534 return TARGET_AAPCS_BASED;
20538 /* The EABI specifies that all array cookies are 8 bytes long. */
20541 arm_get_cookie_size (tree type)
20545 if (!TARGET_AAPCS_BASED)
20546 return default_cxx_get_cookie_size (type);
20548 size = build_int_cst (sizetype, 8);
20553 /* The EABI says that array cookies should also contain the element size. */
20556 arm_cookie_has_size (void)
20558 return TARGET_AAPCS_BASED;
20562 /* The EABI says constructors and destructors should return a pointer to
20563 the object constructed/destroyed. */
20566 arm_cxx_cdtor_returns_this (void)
20568 return TARGET_AAPCS_BASED;
20571 /* The EABI says that an inline function may never be the key
20575 arm_cxx_key_method_may_be_inline (void)
20577 return !TARGET_AAPCS_BASED;
20581 arm_cxx_determine_class_data_visibility (tree decl)
20583 if (!TARGET_AAPCS_BASED
20584 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20587 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20588 is exported. However, on systems without dynamic vague linkage,
20589 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20590 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20591 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20593 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20594 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20598 arm_cxx_class_data_always_comdat (void)
20600 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20601 vague linkage if the class has no key function. */
20602 return !TARGET_AAPCS_BASED;
20606 /* The EABI says __aeabi_atexit should be used to register static
20610 arm_cxx_use_aeabi_atexit (void)
20612 return TARGET_AAPCS_BASED;
20617 arm_set_return_address (rtx source, rtx scratch)
20619 arm_stack_offsets *offsets;
20620 HOST_WIDE_INT delta;
20622 unsigned long saved_regs;
20624 offsets = arm_get_frame_offsets ();
20625 saved_regs = offsets->saved_regs_mask;
20627 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20628 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20631 if (frame_pointer_needed)
20632 addr = plus_constant(hard_frame_pointer_rtx, -4);
20635 /* LR will be the first saved register. */
20636 delta = offsets->outgoing_args - (offsets->frame + 4);
20641 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20642 GEN_INT (delta & ~4095)));
20647 addr = stack_pointer_rtx;
20649 addr = plus_constant (addr, delta);
20651 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20657 thumb_set_return_address (rtx source, rtx scratch)
20659 arm_stack_offsets *offsets;
20660 HOST_WIDE_INT delta;
20661 HOST_WIDE_INT limit;
20664 unsigned long mask;
20668 offsets = arm_get_frame_offsets ();
20669 mask = offsets->saved_regs_mask;
20670 if (mask & (1 << LR_REGNUM))
20673 /* Find the saved regs. */
20674 if (frame_pointer_needed)
20676 delta = offsets->soft_frame - offsets->saved_args;
20677 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20683 delta = offsets->outgoing_args - offsets->saved_args;
20686 /* Allow for the stack frame. */
20687 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20689 /* The link register is always the first saved register. */
20692 /* Construct the address. */
20693 addr = gen_rtx_REG (SImode, reg);
20696 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20697 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20701 addr = plus_constant (addr, delta);
20703 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20706 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20709 /* Implements target hook vector_mode_supported_p. */
20711 arm_vector_mode_supported_p (enum machine_mode mode)
20713 /* Neon also supports V2SImode, etc. listed in the clause below. */
20714 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20715 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20718 if ((TARGET_NEON || TARGET_IWMMXT)
20719 && ((mode == V2SImode)
20720 || (mode == V4HImode)
20721 || (mode == V8QImode)))
20727 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20728 ARM insns and therefore guarantee that the shift count is modulo 256.
20729 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20730 guarantee no particular behavior for out-of-range counts. */
20732 static unsigned HOST_WIDE_INT
20733 arm_shift_truncation_mask (enum machine_mode mode)
20735 return mode == SImode ? 255 : 0;
20739 /* Map internal gcc register numbers to DWARF2 register numbers. */
20742 arm_dbx_register_number (unsigned int regno)
20747 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20748 compatibility. The EABI defines them as registers 96-103. */
20749 if (IS_FPA_REGNUM (regno))
20750 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20752 if (IS_VFP_REGNUM (regno))
20754 /* See comment in arm_dwarf_register_span. */
20755 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20756 return 64 + regno - FIRST_VFP_REGNUM;
20758 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20761 if (IS_IWMMXT_GR_REGNUM (regno))
20762 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20764 if (IS_IWMMXT_REGNUM (regno))
20765 return 112 + regno - FIRST_IWMMXT_REGNUM;
20767 gcc_unreachable ();
20770 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20771 GCC models tham as 64 32-bit registers, so we need to describe this to
20772 the DWARF generation code. Other registers can use the default. */
20774 arm_dwarf_register_span (rtx rtl)
20781 regno = REGNO (rtl);
20782 if (!IS_VFP_REGNUM (regno))
20785 /* XXX FIXME: The EABI defines two VFP register ranges:
20786 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20788 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20789 corresponding D register. Until GDB supports this, we shall use the
20790 legacy encodings. We also use these encodings for D0-D15 for
20791 compatibility with older debuggers. */
20792 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20795 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20796 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20797 regno = (regno - FIRST_VFP_REGNUM) / 2;
20798 for (i = 0; i < nregs; i++)
20799 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20804 #ifdef TARGET_UNWIND_INFO
20805 /* Emit unwind directives for a store-multiple instruction or stack pointer
20806 push during alignment.
20807 These should only ever be generated by the function prologue code, so
20808 expect them to have a particular form. */
20811 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20814 HOST_WIDE_INT offset;
20815 HOST_WIDE_INT nregs;
20821 e = XVECEXP (p, 0, 0);
20822 if (GET_CODE (e) != SET)
20825 /* First insn will adjust the stack pointer. */
20826 if (GET_CODE (e) != SET
20827 || GET_CODE (XEXP (e, 0)) != REG
20828 || REGNO (XEXP (e, 0)) != SP_REGNUM
20829 || GET_CODE (XEXP (e, 1)) != PLUS)
20832 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20833 nregs = XVECLEN (p, 0) - 1;
20835 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20838 /* The function prologue may also push pc, but not annotate it as it is
20839 never restored. We turn this into a stack pointer adjustment. */
20840 if (nregs * 4 == offset - 4)
20842 fprintf (asm_out_file, "\t.pad #4\n");
20846 fprintf (asm_out_file, "\t.save {");
20848 else if (IS_VFP_REGNUM (reg))
20851 fprintf (asm_out_file, "\t.vsave {");
20853 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20855 /* FPA registers are done differently. */
20856 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20860 /* Unknown register type. */
20863 /* If the stack increment doesn't match the size of the saved registers,
20864 something has gone horribly wrong. */
20865 if (offset != nregs * reg_size)
20870 /* The remaining insns will describe the stores. */
20871 for (i = 1; i <= nregs; i++)
20873 /* Expect (set (mem <addr>) (reg)).
20874 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20875 e = XVECEXP (p, 0, i);
20876 if (GET_CODE (e) != SET
20877 || GET_CODE (XEXP (e, 0)) != MEM
20878 || GET_CODE (XEXP (e, 1)) != REG)
20881 reg = REGNO (XEXP (e, 1));
20886 fprintf (asm_out_file, ", ");
20887 /* We can't use %r for vfp because we need to use the
20888 double precision register names. */
20889 if (IS_VFP_REGNUM (reg))
20890 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20892 asm_fprintf (asm_out_file, "%r", reg);
20894 #ifdef ENABLE_CHECKING
20895 /* Check that the addresses are consecutive. */
20896 e = XEXP (XEXP (e, 0), 0);
20897 if (GET_CODE (e) == PLUS)
20899 offset += reg_size;
20900 if (GET_CODE (XEXP (e, 0)) != REG
20901 || REGNO (XEXP (e, 0)) != SP_REGNUM
20902 || GET_CODE (XEXP (e, 1)) != CONST_INT
20903 || offset != INTVAL (XEXP (e, 1)))
20907 || GET_CODE (e) != REG
20908 || REGNO (e) != SP_REGNUM)
20912 fprintf (asm_out_file, "}\n");
20915 /* Emit unwind directives for a SET. */
20918 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20926 switch (GET_CODE (e0))
20929 /* Pushing a single register. */
20930 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20931 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20932 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20935 asm_fprintf (asm_out_file, "\t.save ");
20936 if (IS_VFP_REGNUM (REGNO (e1)))
20937 asm_fprintf(asm_out_file, "{d%d}\n",
20938 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20940 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20944 if (REGNO (e0) == SP_REGNUM)
20946 /* A stack increment. */
20947 if (GET_CODE (e1) != PLUS
20948 || GET_CODE (XEXP (e1, 0)) != REG
20949 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20950 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20953 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20954 -INTVAL (XEXP (e1, 1)));
20956 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20958 HOST_WIDE_INT offset;
20960 if (GET_CODE (e1) == PLUS)
20962 if (GET_CODE (XEXP (e1, 0)) != REG
20963 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20965 reg = REGNO (XEXP (e1, 0));
20966 offset = INTVAL (XEXP (e1, 1));
20967 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20968 HARD_FRAME_POINTER_REGNUM, reg,
20969 INTVAL (XEXP (e1, 1)));
20971 else if (GET_CODE (e1) == REG)
20974 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20975 HARD_FRAME_POINTER_REGNUM, reg);
20980 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20982 /* Move from sp to reg. */
20983 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20985 else if (GET_CODE (e1) == PLUS
20986 && GET_CODE (XEXP (e1, 0)) == REG
20987 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20988 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20990 /* Set reg to offset from sp. */
20991 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20992 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20994 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20996 /* Stack pointer save before alignment. */
20998 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21011 /* Emit unwind directives for the given insn. */
21014 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21018 if (!ARM_EABI_UNWIND_TABLES)
21021 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21022 && (TREE_NOTHROW (current_function_decl)
21023 || crtl->all_throwers_are_sibcalls))
21026 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21029 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21031 pat = XEXP (pat, 0);
21033 pat = PATTERN (insn);
21035 switch (GET_CODE (pat))
21038 arm_unwind_emit_set (asm_out_file, pat);
21042 /* Store multiple. */
21043 arm_unwind_emit_sequence (asm_out_file, pat);
21052 /* Output a reference from a function exception table to the type_info
21053 object X. The EABI specifies that the symbol should be relocated by
21054 an R_ARM_TARGET2 relocation. */
21057 arm_output_ttype (rtx x)
21059 fputs ("\t.word\t", asm_out_file);
21060 output_addr_const (asm_out_file, x);
21061 /* Use special relocations for symbol references. */
21062 if (GET_CODE (x) != CONST_INT)
21063 fputs ("(TARGET2)", asm_out_file);
21064 fputc ('\n', asm_out_file);
21068 #endif /* TARGET_UNWIND_INFO */
21071 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21072 stack alignment. */
21075 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21077 rtx unspec = SET_SRC (pattern);
21078 gcc_assert (GET_CODE (unspec) == UNSPEC);
21082 case UNSPEC_STACK_ALIGN:
21083 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21084 put anything on the stack, so hopefully it won't matter.
21085 CFA = SP will be correct after alignment. */
21086 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21087 SET_DEST (pattern));
21090 gcc_unreachable ();
21095 /* Output unwind directives for the start/end of a function. */
21098 arm_output_fn_unwind (FILE * f, bool prologue)
21100 if (!ARM_EABI_UNWIND_TABLES)
21104 fputs ("\t.fnstart\n", f);
21107 /* If this function will never be unwound, then mark it as such.
21108 The came condition is used in arm_unwind_emit to suppress
21109 the frame annotations. */
21110 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21111 && (TREE_NOTHROW (current_function_decl)
21112 || crtl->all_throwers_are_sibcalls))
21113 fputs("\t.cantunwind\n", f);
21115 fputs ("\t.fnend\n", f);
21120 arm_emit_tls_decoration (FILE *fp, rtx x)
21122 enum tls_reloc reloc;
21125 val = XVECEXP (x, 0, 0);
21126 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21128 output_addr_const (fp, val);
21133 fputs ("(tlsgd)", fp);
21136 fputs ("(tlsldm)", fp);
21139 fputs ("(tlsldo)", fp);
21142 fputs ("(gottpoff)", fp);
21145 fputs ("(tpoff)", fp);
21148 gcc_unreachable ();
21156 fputs (" + (. - ", fp);
21157 output_addr_const (fp, XVECEXP (x, 0, 2));
21159 output_addr_const (fp, XVECEXP (x, 0, 3));
21169 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21172 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21174 gcc_assert (size == 4);
21175 fputs ("\t.word\t", file);
21176 output_addr_const (file, x);
21177 fputs ("(tlsldo)", file);
21181 arm_output_addr_const_extra (FILE *fp, rtx x)
21183 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21184 return arm_emit_tls_decoration (fp, x);
21185 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21188 int labelno = INTVAL (XVECEXP (x, 0, 0));
21190 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21191 assemble_name_raw (fp, label);
21195 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21197 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21201 output_addr_const (fp, XVECEXP (x, 0, 0));
21205 else if (GET_CODE (x) == CONST_VECTOR)
21206 return arm_emit_vector_const (fp, x);
21211 /* Output assembly for a shift instruction.
21212 SET_FLAGS determines how the instruction modifies the condition codes.
21213 0 - Do not set condition codes.
21214 1 - Set condition codes.
21215 2 - Use smallest instruction. */
21217 arm_output_shift(rtx * operands, int set_flags)
21220 static const char flag_chars[3] = {'?', '.', '!'};
21225 c = flag_chars[set_flags];
21226 if (TARGET_UNIFIED_ASM)
21228 shift = shift_op(operands[3], &val);
21232 operands[2] = GEN_INT(val);
21233 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21236 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21239 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21240 output_asm_insn (pattern, operands);
21244 /* Output a Thumb-1 casesi dispatch sequence. */
21246 thumb1_output_casesi (rtx *operands)
21248 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21249 addr_diff_vec_flags flags;
21251 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21253 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
21255 switch (GET_MODE(diff_vec))
21258 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21259 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21261 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21262 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21264 return "bl\t%___gnu_thumb1_case_si";
21266 gcc_unreachable ();
21270 /* Output a Thumb-2 casesi instruction. */
21272 thumb2_output_casesi (rtx *operands)
21274 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21276 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21278 output_asm_insn ("cmp\t%0, %1", operands);
21279 output_asm_insn ("bhi\t%l3", operands);
21280 switch (GET_MODE(diff_vec))
21283 return "tbb\t[%|pc, %0]";
21285 return "tbh\t[%|pc, %0, lsl #1]";
21289 output_asm_insn ("adr\t%4, %l2", operands);
21290 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21291 output_asm_insn ("add\t%4, %4, %5", operands);
21296 output_asm_insn ("adr\t%4, %l2", operands);
21297 return "ldr\t%|pc, [%4, %0, lsl #2]";
21300 gcc_unreachable ();
21304 /* Most ARM cores are single issue, but some newer ones can dual issue.
21305 The scheduler descriptions rely on this being correct. */
21307 arm_issue_rate (void)
21322 /* A table and a function to perform ARM-specific name mangling for
21323 NEON vector types in order to conform to the AAPCS (see "Procedure
21324 Call Standard for the ARM Architecture", Appendix A). To qualify
21325 for emission with the mangled names defined in that document, a
21326 vector type must not only be of the correct mode but also be
21327 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21330 enum machine_mode mode;
21331 const char *element_type_name;
21332 const char *aapcs_name;
21333 } arm_mangle_map_entry;
21335 static arm_mangle_map_entry arm_mangle_map[] = {
21336 /* 64-bit containerized types. */
21337 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21338 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21339 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21340 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21341 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21342 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21343 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21344 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21345 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21346 /* 128-bit containerized types. */
21347 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21348 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21349 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21350 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21351 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21352 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21353 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21354 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21355 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21356 { VOIDmode, NULL, NULL }
21360 arm_mangle_type (const_tree type)
21362 arm_mangle_map_entry *pos = arm_mangle_map;
21364 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21365 has to be managled as if it is in the "std" namespace. */
21366 if (TARGET_AAPCS_BASED
21367 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21369 static bool warned;
21370 if (!warned && warn_psabi && !in_system_header)
21373 inform (input_location,
21374 "the mangling of %<va_list%> has changed in GCC 4.4");
21376 return "St9__va_list";
21379 /* Half-precision float. */
21380 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21383 if (TREE_CODE (type) != VECTOR_TYPE)
21386 /* Check the mode of the vector type, and the name of the vector
21387 element type, against the table. */
21388 while (pos->mode != VOIDmode)
21390 tree elt_type = TREE_TYPE (type);
21392 if (pos->mode == TYPE_MODE (type)
21393 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21394 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21395 pos->element_type_name))
21396 return pos->aapcs_name;
21401 /* Use the default mangling for unrecognized (possibly user-defined)
21406 /* Order of allocation of core registers for Thumb: this allocation is
21407 written over the corresponding initial entries of the array
21408 initialized with REG_ALLOC_ORDER. We allocate all low registers
21409 first. Saving and restoring a low register is usually cheaper than
21410 using a call-clobbered high register. */
21412 static const int thumb_core_reg_alloc_order[] =
21414 3, 2, 1, 0, 4, 5, 6, 7,
21415 14, 12, 8, 9, 10, 11, 13, 15
21418 /* Adjust register allocation order when compiling for Thumb. */
21421 arm_order_regs_for_local_alloc (void)
21423 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21424 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21426 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21427 sizeof (thumb_core_reg_alloc_order));
21430 /* Set default optimization options. */
21432 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21434 /* Enable section anchors by default at -O1 or higher.
21435 Use 2 to distinguish from an explicit -fsection-anchors
21436 given on the command line. */
21438 flag_section_anchors = 2;
21441 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21444 arm_frame_pointer_required (void)
21446 return (cfun->has_nonlocal_label
21447 || SUBTARGET_FRAME_POINTER_REQUIRED
21448 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21451 /* Only thumb1 can't support conditional execution, so return true if
21452 the target is not thumb1. */
21454 arm_have_conditional_execution (void)
21456 return !TARGET_THUMB1;
21459 #include "gt-arm.h"