1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
166 static int aapcs_select_return_coproc (const_tree, const_tree);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
173 static void arm_encode_section_info (tree, rtx, int);
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
195 static tree arm_cxx_guard_type (void);
196 static bool arm_cxx_guard_mask_bit (void);
197 static tree arm_get_cookie_size (tree);
198 static bool arm_cookie_has_size (void);
199 static bool arm_cxx_cdtor_returns_this (void);
200 static bool arm_cxx_key_method_may_be_inline (void);
201 static void arm_cxx_determine_class_data_visibility (tree);
202 static bool arm_cxx_class_data_always_comdat (void);
203 static bool arm_cxx_use_aeabi_atexit (void);
204 static void arm_init_libfuncs (void);
205 static tree arm_build_builtin_va_list (void);
206 static void arm_expand_builtin_va_start (tree, rtx);
207 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
208 static bool arm_handle_option (size_t, const char *, int);
209 static void arm_target_help (void);
210 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
211 static bool arm_cannot_copy_insn_p (rtx);
212 static bool arm_tls_symbol_p (rtx x);
213 static int arm_issue_rate (void);
214 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
215 static bool arm_allocate_stack_slots_for_args (void);
216 static const char *arm_invalid_parameter_type (const_tree t);
217 static const char *arm_invalid_return_type (const_tree t);
218 static tree arm_promoted_type (const_tree t);
219 static tree arm_convert_to_type (tree type, tree expr);
220 static bool arm_scalar_mode_supported_p (enum machine_mode);
221 static bool arm_frame_pointer_required (void);
222 static bool arm_can_eliminate (const int, const int);
223 static void arm_asm_trampoline_template (FILE *);
224 static void arm_trampoline_init (rtx, tree, rtx);
225 static rtx arm_trampoline_adjust_address (rtx);
228 /* Table of machine attributes. */
229 static const struct attribute_spec arm_attribute_table[] =
231 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
232 /* Function calls made to this symbol must be done indirectly, because
233 it may lie outside of the 26 bit addressing range of a normal function
235 { "long_call", 0, 0, false, true, true, NULL },
236 /* Whereas these functions are always known to reside within the 26 bit
238 { "short_call", 0, 0, false, true, true, NULL },
239 /* Specify the procedure call conventions for a function. */
240 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
241 /* Interrupt Service Routines have special prologue and epilogue requirements. */
242 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
243 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
246 /* ARM/PE has three new attributes:
248 dllexport - for exporting a function/variable that will live in a dll
249 dllimport - for importing a function/variable from a dll
251 Microsoft allows multiple declspecs in one __declspec, separating
252 them with spaces. We do NOT support this. Instead, use __declspec
255 { "dllimport", 0, 0, true, false, false, NULL },
256 { "dllexport", 0, 0, true, false, false, NULL },
257 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
258 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
259 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
263 { NULL, 0, 0, false, false, false, NULL }
266 /* Initialize the GCC target structure. */
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
268 #undef TARGET_MERGE_DECL_ATTRIBUTES
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
272 #undef TARGET_LEGITIMIZE_ADDRESS
273 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275 #undef TARGET_ATTRIBUTE_TABLE
276 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START arm_file_start
280 #undef TARGET_ASM_FILE_END
281 #define TARGET_ASM_FILE_END arm_file_end
283 #undef TARGET_ASM_ALIGNED_SI_OP
284 #define TARGET_ASM_ALIGNED_SI_OP NULL
285 #undef TARGET_ASM_INTEGER
286 #define TARGET_ASM_INTEGER arm_assemble_integer
288 #undef TARGET_ASM_FUNCTION_PROLOGUE
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294 #undef TARGET_DEFAULT_TARGET_FLAGS
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
296 #undef TARGET_HANDLE_OPTION
297 #define TARGET_HANDLE_OPTION arm_handle_option
299 #define TARGET_HELP arm_target_help
301 #undef TARGET_COMP_TYPE_ATTRIBUTES
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
305 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307 #undef TARGET_SCHED_ADJUST_COST
308 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310 #undef TARGET_ENCODE_SECTION_INFO
312 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
314 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
317 #undef TARGET_STRIP_NAME_ENCODING
318 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320 #undef TARGET_ASM_INTERNAL_LABEL
321 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
324 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326 #undef TARGET_FUNCTION_VALUE
327 #define TARGET_FUNCTION_VALUE arm_function_value
329 #undef TARGET_LIBCALL_VALUE
330 #define TARGET_LIBCALL_VALUE arm_libcall_value
332 #undef TARGET_ASM_OUTPUT_MI_THUNK
333 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337 #undef TARGET_RTX_COSTS
338 #define TARGET_RTX_COSTS arm_rtx_costs
339 #undef TARGET_ADDRESS_COST
340 #define TARGET_ADDRESS_COST arm_address_cost
342 #undef TARGET_SHIFT_TRUNCATION_MASK
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347 #undef TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350 #undef TARGET_INIT_BUILTINS
351 #define TARGET_INIT_BUILTINS arm_init_builtins
352 #undef TARGET_EXPAND_BUILTIN
353 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355 #undef TARGET_INIT_LIBFUNCS
356 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358 #undef TARGET_PROMOTE_FUNCTION_MODE
359 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
360 #undef TARGET_PROMOTE_PROTOTYPES
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367 #undef TARGET_SETUP_INCOMING_VARARGS
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
371 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
374 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
375 #undef TARGET_TRAMPOLINE_INIT
376 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
377 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
378 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380 #undef TARGET_DEFAULT_SHORT_ENUMS
381 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383 #undef TARGET_ALIGN_ANON_BITFIELD
384 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386 #undef TARGET_NARROW_VOLATILE_BITFIELD
387 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389 #undef TARGET_CXX_GUARD_TYPE
390 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392 #undef TARGET_CXX_GUARD_MASK_BIT
393 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395 #undef TARGET_CXX_GET_COOKIE_SIZE
396 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398 #undef TARGET_CXX_COOKIE_HAS_SIZE
399 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401 #undef TARGET_CXX_CDTOR_RETURNS_THIS
402 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
405 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407 #undef TARGET_CXX_USE_AEABI_ATEXIT
408 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
411 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
412 arm_cxx_determine_class_data_visibility
414 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
415 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417 #undef TARGET_RETURN_IN_MSB
418 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423 #undef TARGET_MUST_PASS_IN_STACK
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426 #ifdef TARGET_UNWIND_INFO
427 #undef TARGET_UNWIND_EMIT
428 #define TARGET_UNWIND_EMIT arm_unwind_emit
430 /* EABI unwinding tables use a different format for the typeinfo tables. */
431 #undef TARGET_ASM_TTYPE
432 #define TARGET_ASM_TTYPE arm_output_ttype
434 #undef TARGET_ARM_EABI_UNWINDER
435 #define TARGET_ARM_EABI_UNWINDER true
436 #endif /* TARGET_UNWIND_INFO */
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441 #undef TARGET_CANNOT_COPY_INSN_P
442 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
445 #undef TARGET_HAVE_TLS
446 #define TARGET_HAVE_TLS true
449 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
450 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
452 #undef TARGET_CANNOT_FORCE_CONST_MEM
453 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
455 #undef TARGET_MAX_ANCHOR_OFFSET
456 #define TARGET_MAX_ANCHOR_OFFSET 4095
458 /* The minimum is set such that the total size of the block
459 for a particular anchor is -4088 + 1 + 4095 bytes, which is
460 divisible by eight, ensuring natural spacing of anchors. */
461 #undef TARGET_MIN_ANCHOR_OFFSET
462 #define TARGET_MIN_ANCHOR_OFFSET -4088
464 #undef TARGET_SCHED_ISSUE_RATE
465 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
467 #undef TARGET_MANGLE_TYPE
468 #define TARGET_MANGLE_TYPE arm_mangle_type
470 #undef TARGET_BUILD_BUILTIN_VA_LIST
471 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
472 #undef TARGET_EXPAND_BUILTIN_VA_START
473 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
474 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
475 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
478 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
479 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
482 #undef TARGET_LEGITIMATE_ADDRESS_P
483 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
485 #undef TARGET_INVALID_PARAMETER_TYPE
486 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
488 #undef TARGET_INVALID_RETURN_TYPE
489 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
491 #undef TARGET_PROMOTED_TYPE
492 #define TARGET_PROMOTED_TYPE arm_promoted_type
494 #undef TARGET_CONVERT_TO_TYPE
495 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
497 #undef TARGET_SCALAR_MODE_SUPPORTED_P
498 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
500 #undef TARGET_FRAME_POINTER_REQUIRED
501 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
503 #undef TARGET_CAN_ELIMINATE
504 #define TARGET_CAN_ELIMINATE arm_can_eliminate
506 struct gcc_target targetm = TARGET_INITIALIZER;
508 /* Obstack for minipool constant handling. */
509 static struct obstack minipool_obstack;
510 static char * minipool_startobj;
512 /* The maximum number of insns skipped which
513 will be conditionalised if possible. */
514 static int max_insns_skipped = 5;
516 extern FILE * asm_out_file;
518 /* True if we are currently building a constant table. */
519 int making_const_table;
521 /* The processor for which instructions should be scheduled. */
522 enum processor_type arm_tune = arm_none;
524 /* The default processor used if not overridden by commandline. */
525 static enum processor_type arm_default_cpu = arm_none;
527 /* Which floating point model to use. */
528 enum arm_fp_model arm_fp_model;
530 /* Which floating point hardware is available. */
531 enum fputype arm_fpu_arch;
533 /* Which floating point hardware to schedule for. */
534 enum fputype arm_fpu_tune;
536 /* Whether to use floating point hardware. */
537 enum float_abi_type arm_float_abi;
539 /* Which __fp16 format to use. */
540 enum arm_fp16_format_type arm_fp16_format;
542 /* Which ABI to use. */
543 enum arm_abi_type arm_abi;
545 /* Which thread pointer model to use. */
546 enum arm_tp_type target_thread_pointer = TP_AUTO;
548 /* Used to parse -mstructure_size_boundary command line option. */
549 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
551 /* Used for Thumb call_via trampolines. */
552 rtx thumb_call_via_label[14];
553 static int thumb_call_reg_needed;
555 /* Bit values used to identify processor capabilities. */
556 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
557 #define FL_ARCH3M (1 << 1) /* Extended multiply */
558 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
559 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
560 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
561 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
562 #define FL_THUMB (1 << 6) /* Thumb aware */
563 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
564 #define FL_STRONG (1 << 8) /* StrongARM */
565 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
566 #define FL_XSCALE (1 << 10) /* XScale */
567 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
568 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
569 media instructions. */
570 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
571 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
572 Note: ARM6 & 7 derivatives only. */
573 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
574 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
575 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
577 #define FL_DIV (1 << 18) /* Hardware divide. */
578 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
579 #define FL_NEON (1 << 20) /* Neon instructions. */
581 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
583 #define FL_FOR_ARCH2 FL_NOTM
584 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
585 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
586 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
587 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
588 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
589 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
590 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
591 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
592 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
593 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
594 #define FL_FOR_ARCH6J FL_FOR_ARCH6
595 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
596 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
597 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
598 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
599 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
600 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
601 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
602 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
603 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
605 /* The bits in this mask specify which
606 instructions we are allowed to generate. */
607 static unsigned long insn_flags = 0;
609 /* The bits in this mask specify which instruction scheduling options should
611 static unsigned long tune_flags = 0;
613 /* The following are used in the arm.md file as equivalents to bits
614 in the above two flag variables. */
616 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
619 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
622 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
625 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
628 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
631 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
634 /* Nonzero if this chip supports the ARM 6K extensions. */
637 /* Nonzero if instructions not present in the 'M' profile can be used. */
638 int arm_arch_notm = 0;
640 /* Nonzero if this chip can benefit from load scheduling. */
641 int arm_ld_sched = 0;
643 /* Nonzero if this chip is a StrongARM. */
644 int arm_tune_strongarm = 0;
646 /* Nonzero if this chip is a Cirrus variant. */
647 int arm_arch_cirrus = 0;
649 /* Nonzero if this chip supports Intel Wireless MMX technology. */
650 int arm_arch_iwmmxt = 0;
652 /* Nonzero if this chip is an XScale. */
653 int arm_arch_xscale = 0;
655 /* Nonzero if tuning for XScale */
656 int arm_tune_xscale = 0;
658 /* Nonzero if we want to tune for stores that access the write-buffer.
659 This typically means an ARM6 or ARM7 with MMU or MPU. */
660 int arm_tune_wbuf = 0;
662 /* Nonzero if tuning for Cortex-A9. */
663 int arm_tune_cortex_a9 = 0;
665 /* Nonzero if generating Thumb instructions. */
668 /* Nonzero if we should define __THUMB_INTERWORK__ in the
670 XXX This is a bit of a hack, it's intended to help work around
671 problems in GLD which doesn't understand that armv5t code is
672 interworking clean. */
673 int arm_cpp_interwork = 0;
675 /* Nonzero if chip supports Thumb 2. */
678 /* Nonzero if chip supports integer division instruction. */
681 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
682 must report the mode of the memory reference from PRINT_OPERAND to
683 PRINT_OPERAND_ADDRESS. */
684 enum machine_mode output_memory_reference_mode;
686 /* The register number to be used for the PIC offset register. */
687 unsigned arm_pic_register = INVALID_REGNUM;
689 /* Set to 1 after arm_reorg has started. Reset to start at the start of
690 the next function. */
691 static int after_arm_reorg = 0;
693 /* The maximum number of insns to be used when loading a constant. */
694 static int arm_constant_limit = 3;
696 static enum arm_pcs arm_pcs_default;
698 /* For an explanation of these variables, see final_prescan_insn below. */
700 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
701 enum arm_cond_code arm_current_cc;
703 int arm_target_label;
704 /* The number of conditionally executed insns, including the current insn. */
705 int arm_condexec_count = 0;
706 /* A bitmask specifying the patterns for the IT block.
707 Zero means do not output an IT block before this insn. */
708 int arm_condexec_mask = 0;
709 /* The number of bits used in arm_condexec_mask. */
710 int arm_condexec_masklen = 0;
712 /* The condition codes of the ARM, and the inverse function. */
713 static const char * const arm_condition_codes[] =
715 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
716 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
719 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
720 #define streq(string1, string2) (strcmp (string1, string2) == 0)
722 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
723 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
724 | (1 << PIC_OFFSET_TABLE_REGNUM)))
726 /* Initialization code. */
730 const char *const name;
731 enum processor_type core;
733 const unsigned long flags;
734 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
737 /* Not all of these give usefully different compilation alternatives,
738 but there is no simple way of generalizing them. */
739 static const struct processors all_cores[] =
742 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
743 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
744 #include "arm-cores.def"
746 {NULL, arm_none, NULL, 0, NULL}
749 static const struct processors all_architectures[] =
751 /* ARM Architectures */
752 /* We don't specify rtx_costs here as it will be figured out
755 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
756 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
757 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
758 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
759 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
760 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
761 implementations that support it, so we will leave it out for now. */
762 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
763 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
764 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
765 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
766 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
767 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
768 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
769 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
770 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
771 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
772 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
773 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
774 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
775 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
776 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
777 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
778 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
779 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
780 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
781 {NULL, arm_none, NULL, 0 , NULL}
784 struct arm_cpu_select
788 const struct processors * processors;
791 /* This is a magic structure. The 'string' field is magically filled in
792 with a pointer to the value specified by the user on the command line
793 assuming that the user has specified such a value. */
795 static struct arm_cpu_select arm_select[] =
797 /* string name processors */
798 { NULL, "-mcpu=", all_cores },
799 { NULL, "-march=", all_architectures },
800 { NULL, "-mtune=", all_cores }
803 /* Defines representing the indexes into the above table. */
804 #define ARM_OPT_SET_CPU 0
805 #define ARM_OPT_SET_ARCH 1
806 #define ARM_OPT_SET_TUNE 2
808 /* The name of the preprocessor macro to define for this architecture. */
810 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
819 /* Available values for -mfpu=. */
821 static const struct fpu_desc all_fpus[] =
823 {"fpa", FPUTYPE_FPA},
824 {"fpe2", FPUTYPE_FPA_EMU2},
825 {"fpe3", FPUTYPE_FPA_EMU2},
826 {"maverick", FPUTYPE_MAVERICK},
827 {"vfp", FPUTYPE_VFP},
828 {"vfp3", FPUTYPE_VFP3},
829 {"vfpv3", FPUTYPE_VFP3},
830 {"vfpv3-d16", FPUTYPE_VFP3D16},
831 {"neon", FPUTYPE_NEON},
832 {"neon-fp16", FPUTYPE_NEON_FP16}
836 /* Floating point models used by the different hardware.
837 See fputype in arm.h. */
839 static const enum arm_fp_model fp_model_for_fpu[] =
841 /* No FP hardware. */
842 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
843 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
844 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
845 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
846 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
847 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
848 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
849 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
850 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
851 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
858 enum float_abi_type abi_type;
862 /* Available values for -mfloat-abi=. */
864 static const struct float_abi all_float_abis[] =
866 {"soft", ARM_FLOAT_ABI_SOFT},
867 {"softfp", ARM_FLOAT_ABI_SOFTFP},
868 {"hard", ARM_FLOAT_ABI_HARD}
875 enum arm_fp16_format_type fp16_format_type;
879 /* Available values for -mfp16-format=. */
881 static const struct fp16_format all_fp16_formats[] =
883 {"none", ARM_FP16_FORMAT_NONE},
884 {"ieee", ARM_FP16_FORMAT_IEEE},
885 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
892 enum arm_abi_type abi_type;
896 /* Available values for -mabi=. */
898 static const struct abi_name arm_all_abis[] =
900 {"apcs-gnu", ARM_ABI_APCS},
901 {"atpcs", ARM_ABI_ATPCS},
902 {"aapcs", ARM_ABI_AAPCS},
903 {"iwmmxt", ARM_ABI_IWMMXT},
904 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
907 /* Supported TLS relocations. */
917 /* Emit an insn that's a simple single-set. Both the operands must be known
920 emit_set_insn (rtx x, rtx y)
922 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
925 /* Return the number of bits set in VALUE. */
927 bit_count (unsigned long value)
929 unsigned long count = 0;
934 value &= value - 1; /* Clear the least-significant set bit. */
940 /* Set up library functions unique to ARM. */
943 arm_init_libfuncs (void)
945 /* There are no special library functions unless we are using the
950 /* The functions below are described in Section 4 of the "Run-Time
951 ABI for the ARM architecture", Version 1.0. */
953 /* Double-precision floating-point arithmetic. Table 2. */
954 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
955 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
956 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
957 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
958 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
960 /* Double-precision comparisons. Table 3. */
961 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
962 set_optab_libfunc (ne_optab, DFmode, NULL);
963 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
964 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
965 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
966 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
967 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
969 /* Single-precision floating-point arithmetic. Table 4. */
970 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
971 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
972 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
973 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
974 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
976 /* Single-precision comparisons. Table 5. */
977 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
978 set_optab_libfunc (ne_optab, SFmode, NULL);
979 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
980 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
981 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
982 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
983 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
985 /* Floating-point to integer conversions. Table 6. */
986 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
987 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
988 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
989 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
990 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
991 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
992 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
993 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
995 /* Conversions between floating types. Table 7. */
996 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
997 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
999 /* Integer to floating-point conversions. Table 8. */
1000 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1001 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1002 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1003 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1004 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1005 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1006 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1007 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1009 /* Long long. Table 9. */
1010 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1011 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1012 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1013 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1014 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1015 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1016 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1017 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1019 /* Integer (32/32->32) division. \S 4.3.1. */
1020 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1021 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1023 /* The divmod functions are designed so that they can be used for
1024 plain division, even though they return both the quotient and the
1025 remainder. The quotient is returned in the usual location (i.e.,
1026 r0 for SImode, {r0, r1} for DImode), just as would be expected
1027 for an ordinary division routine. Because the AAPCS calling
1028 conventions specify that all of { r0, r1, r2, r3 } are
1029 callee-saved registers, there is no need to tell the compiler
1030 explicitly that those registers are clobbered by these
1032 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1033 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1035 /* For SImode division the ABI provides div-without-mod routines,
1036 which are faster. */
1037 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1038 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1040 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1041 divmod libcalls instead. */
1042 set_optab_libfunc (smod_optab, DImode, NULL);
1043 set_optab_libfunc (umod_optab, DImode, NULL);
1044 set_optab_libfunc (smod_optab, SImode, NULL);
1045 set_optab_libfunc (umod_optab, SImode, NULL);
1047 /* Half-precision float operations. The compiler handles all operations
1048 with NULL libfuncs by converting the SFmode. */
1049 switch (arm_fp16_format)
1051 case ARM_FP16_FORMAT_IEEE:
1052 case ARM_FP16_FORMAT_ALTERNATIVE:
1055 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1056 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1058 : "__gnu_f2h_alternative"));
1059 set_conv_libfunc (sext_optab, SFmode, HFmode,
1060 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1062 : "__gnu_h2f_alternative"));
1065 set_optab_libfunc (add_optab, HFmode, NULL);
1066 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1067 set_optab_libfunc (smul_optab, HFmode, NULL);
1068 set_optab_libfunc (neg_optab, HFmode, NULL);
1069 set_optab_libfunc (sub_optab, HFmode, NULL);
1072 set_optab_libfunc (eq_optab, HFmode, NULL);
1073 set_optab_libfunc (ne_optab, HFmode, NULL);
1074 set_optab_libfunc (lt_optab, HFmode, NULL);
1075 set_optab_libfunc (le_optab, HFmode, NULL);
1076 set_optab_libfunc (ge_optab, HFmode, NULL);
1077 set_optab_libfunc (gt_optab, HFmode, NULL);
1078 set_optab_libfunc (unord_optab, HFmode, NULL);
1085 if (TARGET_AAPCS_BASED)
1086 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1089 /* On AAPCS systems, this is the "struct __va_list". */
1090 static GTY(()) tree va_list_type;
1092 /* Return the type to use as __builtin_va_list. */
1094 arm_build_builtin_va_list (void)
1099 if (!TARGET_AAPCS_BASED)
1100 return std_build_builtin_va_list ();
1102 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1110 The C Library ABI further reinforces this definition in \S
1113 We must follow this definition exactly. The structure tag
1114 name is visible in C++ mangled names, and thus forms a part
1115 of the ABI. The field name may be used by people who
1116 #include <stdarg.h>. */
1117 /* Create the type. */
1118 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1119 /* Give it the required name. */
1120 va_list_name = build_decl (BUILTINS_LOCATION,
1122 get_identifier ("__va_list"),
1124 DECL_ARTIFICIAL (va_list_name) = 1;
1125 TYPE_NAME (va_list_type) = va_list_name;
1126 /* Create the __ap field. */
1127 ap_field = build_decl (BUILTINS_LOCATION,
1129 get_identifier ("__ap"),
1131 DECL_ARTIFICIAL (ap_field) = 1;
1132 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1133 TYPE_FIELDS (va_list_type) = ap_field;
1134 /* Compute its layout. */
1135 layout_type (va_list_type);
1137 return va_list_type;
1140 /* Return an expression of type "void *" pointing to the next
1141 available argument in a variable-argument list. VALIST is the
1142 user-level va_list object, of type __builtin_va_list. */
1144 arm_extract_valist_ptr (tree valist)
1146 if (TREE_TYPE (valist) == error_mark_node)
1147 return error_mark_node;
1149 /* On an AAPCS target, the pointer is stored within "struct
1151 if (TARGET_AAPCS_BASED)
1153 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1154 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1155 valist, ap_field, NULL_TREE);
1161 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1163 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1165 valist = arm_extract_valist_ptr (valist);
1166 std_expand_builtin_va_start (valist, nextarg);
1169 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1171 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1174 valist = arm_extract_valist_ptr (valist);
1175 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1178 /* Implement TARGET_HANDLE_OPTION. */
1181 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1186 arm_select[1].string = arg;
1190 arm_select[0].string = arg;
1193 case OPT_mhard_float:
1194 target_float_abi_name = "hard";
1197 case OPT_msoft_float:
1198 target_float_abi_name = "soft";
1202 arm_select[2].string = arg;
1211 arm_target_help (void)
1214 static int columns = 0;
1217 /* If we have not done so already, obtain the desired maximum width of
1218 the output. Note - this is a duplication of the code at the start of
1219 gcc/opts.c:print_specific_help() - the two copies should probably be
1220 replaced by a single function. */
1225 GET_ENVIRONMENT (p, "COLUMNS");
1228 int value = atoi (p);
1235 /* Use a reasonable default. */
1239 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1241 /* The - 2 is because we know that the last entry in the array is NULL. */
1242 i = ARRAY_SIZE (all_cores) - 2;
1244 printf (" %s", all_cores[i].name);
1245 remaining = columns - (strlen (all_cores[i].name) + 4);
1246 gcc_assert (remaining >= 0);
1250 int len = strlen (all_cores[i].name);
1252 if (remaining > len + 2)
1254 printf (", %s", all_cores[i].name);
1255 remaining -= len + 2;
1261 printf ("\n %s", all_cores[i].name);
1262 remaining = columns - (len + 4);
1266 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1268 i = ARRAY_SIZE (all_architectures) - 2;
1271 printf (" %s", all_architectures[i].name);
1272 remaining = columns - (strlen (all_architectures[i].name) + 4);
1273 gcc_assert (remaining >= 0);
1277 int len = strlen (all_architectures[i].name);
1279 if (remaining > len + 2)
1281 printf (", %s", all_architectures[i].name);
1282 remaining -= len + 2;
1288 printf ("\n %s", all_architectures[i].name);
1289 remaining = columns - (len + 4);
1296 /* Fix up any incompatible options that the user has specified.
1297 This has now turned into a maze. */
1299 arm_override_options (void)
1302 enum processor_type target_arch_cpu = arm_none;
1303 enum processor_type selected_cpu = arm_none;
1305 /* Set up the flags based on the cpu/architecture selected by the user. */
1306 for (i = ARRAY_SIZE (arm_select); i--;)
1308 struct arm_cpu_select * ptr = arm_select + i;
1310 if (ptr->string != NULL && ptr->string[0] != '\0')
1312 const struct processors * sel;
1314 for (sel = ptr->processors; sel->name != NULL; sel++)
1315 if (streq (ptr->string, sel->name))
1317 /* Set the architecture define. */
1318 if (i != ARM_OPT_SET_TUNE)
1319 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1321 /* Determine the processor core for which we should
1322 tune code-generation. */
1323 if (/* -mcpu= is a sensible default. */
1324 i == ARM_OPT_SET_CPU
1325 /* -mtune= overrides -mcpu= and -march=. */
1326 || i == ARM_OPT_SET_TUNE)
1327 arm_tune = (enum processor_type) (sel - ptr->processors);
1329 /* Remember the CPU associated with this architecture.
1330 If no other option is used to set the CPU type,
1331 we'll use this to guess the most suitable tuning
1333 if (i == ARM_OPT_SET_ARCH)
1334 target_arch_cpu = sel->core;
1336 if (i == ARM_OPT_SET_CPU)
1337 selected_cpu = (enum processor_type) (sel - ptr->processors);
1339 if (i != ARM_OPT_SET_TUNE)
1341 /* If we have been given an architecture and a processor
1342 make sure that they are compatible. We only generate
1343 a warning though, and we prefer the CPU over the
1345 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1346 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1349 insn_flags = sel->flags;
1355 if (sel->name == NULL)
1356 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1360 /* Guess the tuning options from the architecture if necessary. */
1361 if (arm_tune == arm_none)
1362 arm_tune = target_arch_cpu;
1364 /* If the user did not specify a processor, choose one for them. */
1365 if (insn_flags == 0)
1367 const struct processors * sel;
1368 unsigned int sought;
1370 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1371 if (selected_cpu == arm_none)
1373 #ifdef SUBTARGET_CPU_DEFAULT
1374 /* Use the subtarget default CPU if none was specified by
1376 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1378 /* Default to ARM6. */
1379 if (selected_cpu == arm_none)
1380 selected_cpu = arm6;
1382 sel = &all_cores[selected_cpu];
1384 insn_flags = sel->flags;
1386 /* Now check to see if the user has specified some command line
1387 switch that require certain abilities from the cpu. */
1390 if (TARGET_INTERWORK || TARGET_THUMB)
1392 sought |= (FL_THUMB | FL_MODE32);
1394 /* There are no ARM processors that support both APCS-26 and
1395 interworking. Therefore we force FL_MODE26 to be removed
1396 from insn_flags here (if it was set), so that the search
1397 below will always be able to find a compatible processor. */
1398 insn_flags &= ~FL_MODE26;
1401 if (sought != 0 && ((sought & insn_flags) != sought))
1403 /* Try to locate a CPU type that supports all of the abilities
1404 of the default CPU, plus the extra abilities requested by
1406 for (sel = all_cores; sel->name != NULL; sel++)
1407 if ((sel->flags & sought) == (sought | insn_flags))
1410 if (sel->name == NULL)
1412 unsigned current_bit_count = 0;
1413 const struct processors * best_fit = NULL;
1415 /* Ideally we would like to issue an error message here
1416 saying that it was not possible to find a CPU compatible
1417 with the default CPU, but which also supports the command
1418 line options specified by the programmer, and so they
1419 ought to use the -mcpu=<name> command line option to
1420 override the default CPU type.
1422 If we cannot find a cpu that has both the
1423 characteristics of the default cpu and the given
1424 command line options we scan the array again looking
1425 for a best match. */
1426 for (sel = all_cores; sel->name != NULL; sel++)
1427 if ((sel->flags & sought) == sought)
1431 count = bit_count (sel->flags & insn_flags);
1433 if (count >= current_bit_count)
1436 current_bit_count = count;
1440 gcc_assert (best_fit);
1444 insn_flags = sel->flags;
1446 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1447 arm_default_cpu = (enum processor_type) (sel - all_cores);
1448 if (arm_tune == arm_none)
1449 arm_tune = arm_default_cpu;
1452 /* The processor for which we should tune should now have been
1454 gcc_assert (arm_tune != arm_none);
1456 tune_flags = all_cores[(int)arm_tune].flags;
1458 if (target_fp16_format_name)
1460 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1462 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1464 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1468 if (i == ARRAY_SIZE (all_fp16_formats))
1469 error ("invalid __fp16 format option: -mfp16-format=%s",
1470 target_fp16_format_name);
1473 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1475 if (target_abi_name)
1477 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1479 if (streq (arm_all_abis[i].name, target_abi_name))
1481 arm_abi = arm_all_abis[i].abi_type;
1485 if (i == ARRAY_SIZE (arm_all_abis))
1486 error ("invalid ABI option: -mabi=%s", target_abi_name);
1489 arm_abi = ARM_DEFAULT_ABI;
1491 /* Make sure that the processor choice does not conflict with any of the
1492 other command line choices. */
1493 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1494 error ("target CPU does not support ARM mode");
1496 /* BPABI targets use linker tricks to allow interworking on cores
1497 without thumb support. */
1498 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1500 warning (0, "target CPU does not support interworking" );
1501 target_flags &= ~MASK_INTERWORK;
1504 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1506 warning (0, "target CPU does not support THUMB instructions");
1507 target_flags &= ~MASK_THUMB;
1510 if (TARGET_APCS_FRAME && TARGET_THUMB)
1512 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1513 target_flags &= ~MASK_APCS_FRAME;
1516 /* Callee super interworking implies thumb interworking. Adding
1517 this to the flags here simplifies the logic elsewhere. */
1518 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1519 target_flags |= MASK_INTERWORK;
1521 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1522 from here where no function is being compiled currently. */
1523 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1524 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1526 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1527 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1529 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1530 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1532 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1534 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1535 target_flags |= MASK_APCS_FRAME;
1538 if (TARGET_POKE_FUNCTION_NAME)
1539 target_flags |= MASK_APCS_FRAME;
1541 if (TARGET_APCS_REENT && flag_pic)
1542 error ("-fpic and -mapcs-reent are incompatible");
1544 if (TARGET_APCS_REENT)
1545 warning (0, "APCS reentrant code not supported. Ignored");
1547 /* If this target is normally configured to use APCS frames, warn if they
1548 are turned off and debugging is turned on. */
1550 && write_symbols != NO_DEBUG
1551 && !TARGET_APCS_FRAME
1552 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1553 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1555 if (TARGET_APCS_FLOAT)
1556 warning (0, "passing floating point arguments in fp regs not yet supported");
1558 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1559 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1560 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1561 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1562 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1563 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1564 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1565 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1566 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1567 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1568 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1569 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1571 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1572 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1573 thumb_code = (TARGET_ARM == 0);
1574 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1575 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1576 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1577 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1578 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1580 /* If we are not using the default (ARM mode) section anchor offset
1581 ranges, then set the correct ranges now. */
1584 /* Thumb-1 LDR instructions cannot have negative offsets.
1585 Permissible positive offset ranges are 5-bit (for byte loads),
1586 6-bit (for halfword loads), or 7-bit (for word loads).
1587 Empirical results suggest a 7-bit anchor range gives the best
1588 overall code size. */
1589 targetm.min_anchor_offset = 0;
1590 targetm.max_anchor_offset = 127;
1592 else if (TARGET_THUMB2)
1594 /* The minimum is set such that the total size of the block
1595 for a particular anchor is 248 + 1 + 4095 bytes, which is
1596 divisible by eight, ensuring natural spacing of anchors. */
1597 targetm.min_anchor_offset = -248;
1598 targetm.max_anchor_offset = 4095;
1601 /* V5 code we generate is completely interworking capable, so we turn off
1602 TARGET_INTERWORK here to avoid many tests later on. */
1604 /* XXX However, we must pass the right pre-processor defines to CPP
1605 or GLD can get confused. This is a hack. */
1606 if (TARGET_INTERWORK)
1607 arm_cpp_interwork = 1;
1610 target_flags &= ~MASK_INTERWORK;
1612 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1613 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1615 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1616 error ("iwmmxt abi requires an iwmmxt capable cpu");
1618 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1619 if (target_fpu_name == NULL && target_fpe_name != NULL)
1621 if (streq (target_fpe_name, "2"))
1622 target_fpu_name = "fpe2";
1623 else if (streq (target_fpe_name, "3"))
1624 target_fpu_name = "fpe3";
1626 error ("invalid floating point emulation option: -mfpe=%s",
1629 if (target_fpu_name != NULL)
1631 /* The user specified a FPU. */
1632 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1634 if (streq (all_fpus[i].name, target_fpu_name))
1636 arm_fpu_arch = all_fpus[i].fpu;
1637 arm_fpu_tune = arm_fpu_arch;
1638 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1642 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1643 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1647 #ifdef FPUTYPE_DEFAULT
1648 /* Use the default if it is specified for this platform. */
1649 arm_fpu_arch = FPUTYPE_DEFAULT;
1650 arm_fpu_tune = FPUTYPE_DEFAULT;
1652 /* Pick one based on CPU type. */
1653 /* ??? Some targets assume FPA is the default.
1654 if ((insn_flags & FL_VFP) != 0)
1655 arm_fpu_arch = FPUTYPE_VFP;
1658 if (arm_arch_cirrus)
1659 arm_fpu_arch = FPUTYPE_MAVERICK;
1661 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1663 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1664 arm_fpu_tune = FPUTYPE_FPA;
1666 arm_fpu_tune = arm_fpu_arch;
1667 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1668 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1671 if (target_float_abi_name != NULL)
1673 /* The user specified a FP ABI. */
1674 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1676 if (streq (all_float_abis[i].name, target_float_abi_name))
1678 arm_float_abi = all_float_abis[i].abi_type;
1682 if (i == ARRAY_SIZE (all_float_abis))
1683 error ("invalid floating point abi: -mfloat-abi=%s",
1684 target_float_abi_name);
1687 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1689 if (TARGET_AAPCS_BASED
1690 && (arm_fp_model == ARM_FP_MODEL_FPA))
1691 error ("FPA is unsupported in the AAPCS");
1693 if (TARGET_AAPCS_BASED)
1695 if (TARGET_CALLER_INTERWORKING)
1696 error ("AAPCS does not support -mcaller-super-interworking");
1698 if (TARGET_CALLEE_INTERWORKING)
1699 error ("AAPCS does not support -mcallee-super-interworking");
1702 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1703 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1704 will ever exist. GCC makes no attempt to support this combination. */
1705 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1706 sorry ("iWMMXt and hardware floating point");
1708 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1709 if (TARGET_THUMB2 && TARGET_IWMMXT)
1710 sorry ("Thumb-2 iWMMXt");
1712 /* __fp16 support currently assumes the core has ldrh. */
1713 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1714 sorry ("__fp16 and no ldrh");
1716 /* If soft-float is specified then don't use FPU. */
1717 if (TARGET_SOFT_FLOAT)
1718 arm_fpu_arch = FPUTYPE_NONE;
1720 if (TARGET_AAPCS_BASED)
1722 if (arm_abi == ARM_ABI_IWMMXT)
1723 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1724 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1725 && TARGET_HARD_FLOAT
1727 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1729 arm_pcs_default = ARM_PCS_AAPCS;
1733 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1734 sorry ("-mfloat-abi=hard and VFP");
1736 if (arm_abi == ARM_ABI_APCS)
1737 arm_pcs_default = ARM_PCS_APCS;
1739 arm_pcs_default = ARM_PCS_ATPCS;
1742 /* For arm2/3 there is no need to do any scheduling if there is only
1743 a floating point emulator, or we are doing software floating-point. */
1744 if ((TARGET_SOFT_FLOAT
1745 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1746 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1747 && (tune_flags & FL_MODE32) == 0)
1748 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1750 if (target_thread_switch)
1752 if (strcmp (target_thread_switch, "soft") == 0)
1753 target_thread_pointer = TP_SOFT;
1754 else if (strcmp (target_thread_switch, "auto") == 0)
1755 target_thread_pointer = TP_AUTO;
1756 else if (strcmp (target_thread_switch, "cp15") == 0)
1757 target_thread_pointer = TP_CP15;
1759 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1762 /* Use the cp15 method if it is available. */
1763 if (target_thread_pointer == TP_AUTO)
1765 if (arm_arch6k && !TARGET_THUMB)
1766 target_thread_pointer = TP_CP15;
1768 target_thread_pointer = TP_SOFT;
1771 if (TARGET_HARD_TP && TARGET_THUMB1)
1772 error ("can not use -mtp=cp15 with 16-bit Thumb");
1774 /* Override the default structure alignment for AAPCS ABI. */
1775 if (TARGET_AAPCS_BASED)
1776 arm_structure_size_boundary = 8;
1778 if (structure_size_string != NULL)
1780 int size = strtol (structure_size_string, NULL, 0);
1782 if (size == 8 || size == 32
1783 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1784 arm_structure_size_boundary = size;
1786 warning (0, "structure size boundary can only be set to %s",
1787 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1790 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1792 error ("RTP PIC is incompatible with Thumb");
1796 /* If stack checking is disabled, we can use r10 as the PIC register,
1797 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1798 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1800 if (TARGET_VXWORKS_RTP)
1801 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1802 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1805 if (flag_pic && TARGET_VXWORKS_RTP)
1806 arm_pic_register = 9;
1808 if (arm_pic_register_string != NULL)
1810 int pic_register = decode_reg_name (arm_pic_register_string);
1813 warning (0, "-mpic-register= is useless without -fpic");
1815 /* Prevent the user from choosing an obviously stupid PIC register. */
1816 else if (pic_register < 0 || call_used_regs[pic_register]
1817 || pic_register == HARD_FRAME_POINTER_REGNUM
1818 || pic_register == STACK_POINTER_REGNUM
1819 || pic_register >= PC_REGNUM
1820 || (TARGET_VXWORKS_RTP
1821 && (unsigned int) pic_register != arm_pic_register))
1822 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1824 arm_pic_register = pic_register;
1827 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1828 if (fix_cm3_ldrd == 2)
1830 if (selected_cpu == cortexm3)
1836 /* ??? We might want scheduling for thumb2. */
1837 if (TARGET_THUMB && flag_schedule_insns)
1839 /* Don't warn since it's on by default in -O2. */
1840 flag_schedule_insns = 0;
1845 arm_constant_limit = 1;
1847 /* If optimizing for size, bump the number of instructions that we
1848 are prepared to conditionally execute (even on a StrongARM). */
1849 max_insns_skipped = 6;
1853 /* For processors with load scheduling, it never costs more than
1854 2 cycles to load a constant, and the load scheduler may well
1855 reduce that to 1. */
1857 arm_constant_limit = 1;
1859 /* On XScale the longer latency of a load makes it more difficult
1860 to achieve a good schedule, so it's faster to synthesize
1861 constants that can be done in two insns. */
1862 if (arm_tune_xscale)
1863 arm_constant_limit = 2;
1865 /* StrongARM has early execution of branches, so a sequence
1866 that is worth skipping is shorter. */
1867 if (arm_tune_strongarm)
1868 max_insns_skipped = 3;
1871 /* Hot/Cold partitioning is not currently supported, since we can't
1872 handle literal pool placement in that case. */
1873 if (flag_reorder_blocks_and_partition)
1875 inform (input_location,
1876 "-freorder-blocks-and-partition not supported on this architecture");
1877 flag_reorder_blocks_and_partition = 0;
1878 flag_reorder_blocks = 1;
1881 /* Ideally we would want to use CFI directives to generate
1882 debug info. However this also creates the .eh_frame
1883 section, so disable them until GAS can handle
1884 this properly. See PR40521. */
1885 if (TARGET_AAPCS_BASED)
1886 flag_dwarf2_cfi_asm = 0;
1888 /* Register global variables with the garbage collector. */
1889 arm_add_gc_roots ();
1893 arm_add_gc_roots (void)
1895 gcc_obstack_init(&minipool_obstack);
1896 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1899 /* A table of known ARM exception types.
1900 For use with the interrupt function attribute. */
1904 const char *const arg;
1905 const unsigned long return_value;
1909 static const isr_attribute_arg isr_attribute_args [] =
1911 { "IRQ", ARM_FT_ISR },
1912 { "irq", ARM_FT_ISR },
1913 { "FIQ", ARM_FT_FIQ },
1914 { "fiq", ARM_FT_FIQ },
1915 { "ABORT", ARM_FT_ISR },
1916 { "abort", ARM_FT_ISR },
1917 { "ABORT", ARM_FT_ISR },
1918 { "abort", ARM_FT_ISR },
1919 { "UNDEF", ARM_FT_EXCEPTION },
1920 { "undef", ARM_FT_EXCEPTION },
1921 { "SWI", ARM_FT_EXCEPTION },
1922 { "swi", ARM_FT_EXCEPTION },
1923 { NULL, ARM_FT_NORMAL }
1926 /* Returns the (interrupt) function type of the current
1927 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1929 static unsigned long
1930 arm_isr_value (tree argument)
1932 const isr_attribute_arg * ptr;
1936 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1938 /* No argument - default to IRQ. */
1939 if (argument == NULL_TREE)
1942 /* Get the value of the argument. */
1943 if (TREE_VALUE (argument) == NULL_TREE
1944 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1945 return ARM_FT_UNKNOWN;
1947 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1949 /* Check it against the list of known arguments. */
1950 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1951 if (streq (arg, ptr->arg))
1952 return ptr->return_value;
1954 /* An unrecognized interrupt type. */
1955 return ARM_FT_UNKNOWN;
1958 /* Computes the type of the current function. */
1960 static unsigned long
1961 arm_compute_func_type (void)
1963 unsigned long type = ARM_FT_UNKNOWN;
1967 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1969 /* Decide if the current function is volatile. Such functions
1970 never return, and many memory cycles can be saved by not storing
1971 register values that will never be needed again. This optimization
1972 was added to speed up context switching in a kernel application. */
1974 && (TREE_NOTHROW (current_function_decl)
1975 || !(flag_unwind_tables
1976 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1977 && TREE_THIS_VOLATILE (current_function_decl))
1978 type |= ARM_FT_VOLATILE;
1980 if (cfun->static_chain_decl != NULL)
1981 type |= ARM_FT_NESTED;
1983 attr = DECL_ATTRIBUTES (current_function_decl);
1985 a = lookup_attribute ("naked", attr);
1987 type |= ARM_FT_NAKED;
1989 a = lookup_attribute ("isr", attr);
1991 a = lookup_attribute ("interrupt", attr);
1994 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1996 type |= arm_isr_value (TREE_VALUE (a));
2001 /* Returns the type of the current function. */
2004 arm_current_func_type (void)
2006 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2007 cfun->machine->func_type = arm_compute_func_type ();
2009 return cfun->machine->func_type;
2013 arm_allocate_stack_slots_for_args (void)
2015 /* Naked functions should not allocate stack slots for arguments. */
2016 return !IS_NAKED (arm_current_func_type ());
2020 /* Output assembler code for a block containing the constant parts
2021 of a trampoline, leaving space for the variable parts.
2023 On the ARM, (if r8 is the static chain regnum, and remembering that
2024 referencing pc adds an offset of 8) the trampoline looks like:
2027 .word static chain value
2028 .word function's address
2029 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2032 arm_asm_trampoline_template (FILE *f)
2036 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2037 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2039 else if (TARGET_THUMB2)
2041 /* The Thumb-2 trampoline is similar to the arm implementation.
2042 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2043 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2044 STATIC_CHAIN_REGNUM, PC_REGNUM);
2045 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2049 ASM_OUTPUT_ALIGN (f, 2);
2050 fprintf (f, "\t.code\t16\n");
2051 fprintf (f, ".Ltrampoline_start:\n");
2052 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2053 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2054 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2055 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2056 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2057 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2059 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2060 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2063 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2066 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2068 rtx fnaddr, mem, a_tramp;
2070 emit_block_move (m_tramp, assemble_trampoline_template (),
2071 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2073 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2074 emit_move_insn (mem, chain_value);
2076 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2077 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2078 emit_move_insn (mem, fnaddr);
2080 a_tramp = XEXP (m_tramp, 0);
2081 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2082 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2083 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2086 /* Thumb trampolines should be entered in thumb mode, so set
2087 the bottom bit of the address. */
2090 arm_trampoline_adjust_address (rtx addr)
2093 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2094 NULL, 0, OPTAB_LIB_WIDEN);
2098 /* Return 1 if it is possible to return using a single instruction.
2099 If SIBLING is non-null, this is a test for a return before a sibling
2100 call. SIBLING is the call insn, so we can examine its register usage. */
2103 use_return_insn (int iscond, rtx sibling)
2106 unsigned int func_type;
2107 unsigned long saved_int_regs;
2108 unsigned HOST_WIDE_INT stack_adjust;
2109 arm_stack_offsets *offsets;
2111 /* Never use a return instruction before reload has run. */
2112 if (!reload_completed)
2115 func_type = arm_current_func_type ();
2117 /* Naked, volatile and stack alignment functions need special
2119 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2122 /* So do interrupt functions that use the frame pointer and Thumb
2123 interrupt functions. */
2124 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2127 offsets = arm_get_frame_offsets ();
2128 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2130 /* As do variadic functions. */
2131 if (crtl->args.pretend_args_size
2132 || cfun->machine->uses_anonymous_args
2133 /* Or if the function calls __builtin_eh_return () */
2134 || crtl->calls_eh_return
2135 /* Or if the function calls alloca */
2136 || cfun->calls_alloca
2137 /* Or if there is a stack adjustment. However, if the stack pointer
2138 is saved on the stack, we can use a pre-incrementing stack load. */
2139 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2140 && stack_adjust == 4)))
2143 saved_int_regs = offsets->saved_regs_mask;
2145 /* Unfortunately, the insn
2147 ldmib sp, {..., sp, ...}
2149 triggers a bug on most SA-110 based devices, such that the stack
2150 pointer won't be correctly restored if the instruction takes a
2151 page fault. We work around this problem by popping r3 along with
2152 the other registers, since that is never slower than executing
2153 another instruction.
2155 We test for !arm_arch5 here, because code for any architecture
2156 less than this could potentially be run on one of the buggy
2158 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2160 /* Validate that r3 is a call-clobbered register (always true in
2161 the default abi) ... */
2162 if (!call_used_regs[3])
2165 /* ... that it isn't being used for a return value ... */
2166 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2169 /* ... or for a tail-call argument ... */
2172 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2174 if (find_regno_fusage (sibling, USE, 3))
2178 /* ... and that there are no call-saved registers in r0-r2
2179 (always true in the default ABI). */
2180 if (saved_int_regs & 0x7)
2184 /* Can't be done if interworking with Thumb, and any registers have been
2186 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2189 /* On StrongARM, conditional returns are expensive if they aren't
2190 taken and multiple registers have been stacked. */
2191 if (iscond && arm_tune_strongarm)
2193 /* Conditional return when just the LR is stored is a simple
2194 conditional-load instruction, that's not expensive. */
2195 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2199 && arm_pic_register != INVALID_REGNUM
2200 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2204 /* If there are saved registers but the LR isn't saved, then we need
2205 two instructions for the return. */
2206 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2209 /* Can't be done if any of the FPA regs are pushed,
2210 since this also requires an insn. */
2211 if (TARGET_HARD_FLOAT && TARGET_FPA)
2212 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2213 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2216 /* Likewise VFP regs. */
2217 if (TARGET_HARD_FLOAT && TARGET_VFP)
2218 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2219 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2222 if (TARGET_REALLY_IWMMXT)
2223 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2224 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2230 /* Return TRUE if int I is a valid immediate ARM constant. */
2233 const_ok_for_arm (HOST_WIDE_INT i)
2237 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2238 be all zero, or all one. */
2239 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2240 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2241 != ((~(unsigned HOST_WIDE_INT) 0)
2242 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2245 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2247 /* Fast return for 0 and small values. We must do this for zero, since
2248 the code below can't handle that one case. */
2249 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2252 /* Get the number of trailing zeros. */
2253 lowbit = ffs((int) i) - 1;
2255 /* Only even shifts are allowed in ARM mode so round down to the
2256 nearest even number. */
2260 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2265 /* Allow rotated constants in ARM mode. */
2267 && ((i & ~0xc000003f) == 0
2268 || (i & ~0xf000000f) == 0
2269 || (i & ~0xfc000003) == 0))
2276 /* Allow repeated pattern. */
2279 if (i == v || i == (v | (v << 8)))
2286 /* Return true if I is a valid constant for the operation CODE. */
2288 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2290 if (const_ok_for_arm (i))
2314 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2316 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2322 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2326 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2333 /* Emit a sequence of insns to handle a large constant.
2334 CODE is the code of the operation required, it can be any of SET, PLUS,
2335 IOR, AND, XOR, MINUS;
2336 MODE is the mode in which the operation is being performed;
2337 VAL is the integer to operate on;
2338 SOURCE is the other operand (a register, or a null-pointer for SET);
2339 SUBTARGETS means it is safe to create scratch registers if that will
2340 either produce a simpler sequence, or we will want to cse the values.
2341 Return value is the number of insns emitted. */
2343 /* ??? Tweak this for thumb2. */
2345 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2346 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2350 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2351 cond = COND_EXEC_TEST (PATTERN (insn));
2355 if (subtargets || code == SET
2356 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2357 && REGNO (target) != REGNO (source)))
2359 /* After arm_reorg has been called, we can't fix up expensive
2360 constants by pushing them into memory so we must synthesize
2361 them in-line, regardless of the cost. This is only likely to
2362 be more costly on chips that have load delay slots and we are
2363 compiling without running the scheduler (so no splitting
2364 occurred before the final instruction emission).
2366 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2368 if (!after_arm_reorg
2370 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2372 > arm_constant_limit + (code != SET)))
2376 /* Currently SET is the only monadic value for CODE, all
2377 the rest are diadic. */
2378 if (TARGET_USE_MOVT)
2379 arm_emit_movpair (target, GEN_INT (val));
2381 emit_set_insn (target, GEN_INT (val));
2387 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2389 if (TARGET_USE_MOVT)
2390 arm_emit_movpair (temp, GEN_INT (val));
2392 emit_set_insn (temp, GEN_INT (val));
2394 /* For MINUS, the value is subtracted from, since we never
2395 have subtraction of a constant. */
2397 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2399 emit_set_insn (target,
2400 gen_rtx_fmt_ee (code, mode, source, temp));
2406 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2410 /* Return the number of ARM instructions required to synthesize the given
2413 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2415 HOST_WIDE_INT temp1;
2423 if (remainder & (3 << (i - 2)))
2428 temp1 = remainder & ((0x0ff << end)
2429 | ((i < end) ? (0xff >> (32 - end)) : 0));
2430 remainder &= ~temp1;
2435 } while (remainder);
2439 /* Emit an instruction with the indicated PATTERN. If COND is
2440 non-NULL, conditionalize the execution of the instruction on COND
2444 emit_constant_insn (rtx cond, rtx pattern)
2447 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2448 emit_insn (pattern);
2451 /* As above, but extra parameter GENERATE which, if clear, suppresses
2453 /* ??? This needs more work for thumb2. */
2456 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2457 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2462 int can_negate_initial = 0;
2465 int num_bits_set = 0;
2466 int set_sign_bit_copies = 0;
2467 int clear_sign_bit_copies = 0;
2468 int clear_zero_bit_copies = 0;
2469 int set_zero_bit_copies = 0;
2471 unsigned HOST_WIDE_INT temp1, temp2;
2472 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2474 /* Find out which operations are safe for a given CODE. Also do a quick
2475 check for degenerate cases; these can occur when DImode operations
2487 can_negate_initial = 1;
2491 if (remainder == 0xffffffff)
2494 emit_constant_insn (cond,
2495 gen_rtx_SET (VOIDmode, target,
2496 GEN_INT (ARM_SIGN_EXTEND (val))));
2502 if (reload_completed && rtx_equal_p (target, source))
2506 emit_constant_insn (cond,
2507 gen_rtx_SET (VOIDmode, target, source));
2519 emit_constant_insn (cond,
2520 gen_rtx_SET (VOIDmode, target, const0_rtx));
2523 if (remainder == 0xffffffff)
2525 if (reload_completed && rtx_equal_p (target, source))
2528 emit_constant_insn (cond,
2529 gen_rtx_SET (VOIDmode, target, source));
2538 if (reload_completed && rtx_equal_p (target, source))
2541 emit_constant_insn (cond,
2542 gen_rtx_SET (VOIDmode, target, source));
2546 /* We don't know how to handle other cases yet. */
2547 gcc_assert (remainder == 0xffffffff);
2550 emit_constant_insn (cond,
2551 gen_rtx_SET (VOIDmode, target,
2552 gen_rtx_NOT (mode, source)));
2556 /* We treat MINUS as (val - source), since (source - val) is always
2557 passed as (source + (-val)). */
2561 emit_constant_insn (cond,
2562 gen_rtx_SET (VOIDmode, target,
2563 gen_rtx_NEG (mode, source)));
2566 if (const_ok_for_arm (val))
2569 emit_constant_insn (cond,
2570 gen_rtx_SET (VOIDmode, target,
2571 gen_rtx_MINUS (mode, GEN_INT (val),
2583 /* If we can do it in one insn get out quickly. */
2584 if (const_ok_for_arm (val)
2585 || (can_negate_initial && const_ok_for_arm (-val))
2586 || (can_invert && const_ok_for_arm (~val)))
2589 emit_constant_insn (cond,
2590 gen_rtx_SET (VOIDmode, target,
2592 ? gen_rtx_fmt_ee (code, mode, source,
2598 /* Calculate a few attributes that may be useful for specific
2600 /* Count number of leading zeros. */
2601 for (i = 31; i >= 0; i--)
2603 if ((remainder & (1 << i)) == 0)
2604 clear_sign_bit_copies++;
2609 /* Count number of leading 1's. */
2610 for (i = 31; i >= 0; i--)
2612 if ((remainder & (1 << i)) != 0)
2613 set_sign_bit_copies++;
2618 /* Count number of trailing zero's. */
2619 for (i = 0; i <= 31; i++)
2621 if ((remainder & (1 << i)) == 0)
2622 clear_zero_bit_copies++;
2627 /* Count number of trailing 1's. */
2628 for (i = 0; i <= 31; i++)
2630 if ((remainder & (1 << i)) != 0)
2631 set_zero_bit_copies++;
2639 /* See if we can use movw. */
2640 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2643 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2648 /* See if we can do this by sign_extending a constant that is known
2649 to be negative. This is a good, way of doing it, since the shift
2650 may well merge into a subsequent insn. */
2651 if (set_sign_bit_copies > 1)
2653 if (const_ok_for_arm
2654 (temp1 = ARM_SIGN_EXTEND (remainder
2655 << (set_sign_bit_copies - 1))))
2659 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2660 emit_constant_insn (cond,
2661 gen_rtx_SET (VOIDmode, new_src,
2663 emit_constant_insn (cond,
2664 gen_ashrsi3 (target, new_src,
2665 GEN_INT (set_sign_bit_copies - 1)));
2669 /* For an inverted constant, we will need to set the low bits,
2670 these will be shifted out of harm's way. */
2671 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2672 if (const_ok_for_arm (~temp1))
2676 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2677 emit_constant_insn (cond,
2678 gen_rtx_SET (VOIDmode, new_src,
2680 emit_constant_insn (cond,
2681 gen_ashrsi3 (target, new_src,
2682 GEN_INT (set_sign_bit_copies - 1)));
2688 /* See if we can calculate the value as the difference between two
2689 valid immediates. */
2690 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2692 int topshift = clear_sign_bit_copies & ~1;
2694 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2695 & (0xff000000 >> topshift));
2697 /* If temp1 is zero, then that means the 9 most significant
2698 bits of remainder were 1 and we've caused it to overflow.
2699 When topshift is 0 we don't need to do anything since we
2700 can borrow from 'bit 32'. */
2701 if (temp1 == 0 && topshift != 0)
2702 temp1 = 0x80000000 >> (topshift - 1);
2704 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2706 if (const_ok_for_arm (temp2))
2710 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2711 emit_constant_insn (cond,
2712 gen_rtx_SET (VOIDmode, new_src,
2714 emit_constant_insn (cond,
2715 gen_addsi3 (target, new_src,
2723 /* See if we can generate this by setting the bottom (or the top)
2724 16 bits, and then shifting these into the other half of the
2725 word. We only look for the simplest cases, to do more would cost
2726 too much. Be careful, however, not to generate this when the
2727 alternative would take fewer insns. */
2728 if (val & 0xffff0000)
2730 temp1 = remainder & 0xffff0000;
2731 temp2 = remainder & 0x0000ffff;
2733 /* Overlaps outside this range are best done using other methods. */
2734 for (i = 9; i < 24; i++)
2736 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2737 && !const_ok_for_arm (temp2))
2739 rtx new_src = (subtargets
2740 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2742 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2743 source, subtargets, generate);
2751 gen_rtx_ASHIFT (mode, source,
2758 /* Don't duplicate cases already considered. */
2759 for (i = 17; i < 24; i++)
2761 if (((temp1 | (temp1 >> i)) == remainder)
2762 && !const_ok_for_arm (temp1))
2764 rtx new_src = (subtargets
2765 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2767 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2768 source, subtargets, generate);
2773 gen_rtx_SET (VOIDmode, target,
2776 gen_rtx_LSHIFTRT (mode, source,
2787 /* If we have IOR or XOR, and the constant can be loaded in a
2788 single instruction, and we can find a temporary to put it in,
2789 then this can be done in two instructions instead of 3-4. */
2791 /* TARGET can't be NULL if SUBTARGETS is 0 */
2792 || (reload_completed && !reg_mentioned_p (target, source)))
2794 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2798 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2800 emit_constant_insn (cond,
2801 gen_rtx_SET (VOIDmode, sub,
2803 emit_constant_insn (cond,
2804 gen_rtx_SET (VOIDmode, target,
2805 gen_rtx_fmt_ee (code, mode,
2816 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2817 and the remainder 0s for e.g. 0xfff00000)
2818 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2820 This can be done in 2 instructions by using shifts with mov or mvn.
2825 mvn r0, r0, lsr #12 */
2826 if (set_sign_bit_copies > 8
2827 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2831 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2832 rtx shift = GEN_INT (set_sign_bit_copies);
2836 gen_rtx_SET (VOIDmode, sub,
2838 gen_rtx_ASHIFT (mode,
2843 gen_rtx_SET (VOIDmode, target,
2845 gen_rtx_LSHIFTRT (mode, sub,
2852 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2854 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2856 For eg. r0 = r0 | 0xfff
2861 if (set_zero_bit_copies > 8
2862 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2866 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2867 rtx shift = GEN_INT (set_zero_bit_copies);
2871 gen_rtx_SET (VOIDmode, sub,
2873 gen_rtx_LSHIFTRT (mode,
2878 gen_rtx_SET (VOIDmode, target,
2880 gen_rtx_ASHIFT (mode, sub,
2886 /* This will never be reached for Thumb2 because orn is a valid
2887 instruction. This is for Thumb1 and the ARM 32 bit cases.
2889 x = y | constant (such that ~constant is a valid constant)
2891 x = ~(~y & ~constant).
2893 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2897 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2898 emit_constant_insn (cond,
2899 gen_rtx_SET (VOIDmode, sub,
2900 gen_rtx_NOT (mode, source)));
2903 sub = gen_reg_rtx (mode);
2904 emit_constant_insn (cond,
2905 gen_rtx_SET (VOIDmode, sub,
2906 gen_rtx_AND (mode, source,
2908 emit_constant_insn (cond,
2909 gen_rtx_SET (VOIDmode, target,
2910 gen_rtx_NOT (mode, sub)));
2917 /* See if two shifts will do 2 or more insn's worth of work. */
2918 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2920 HOST_WIDE_INT shift_mask = ((0xffffffff
2921 << (32 - clear_sign_bit_copies))
2924 if ((remainder | shift_mask) != 0xffffffff)
2928 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2929 insns = arm_gen_constant (AND, mode, cond,
2930 remainder | shift_mask,
2931 new_src, source, subtargets, 1);
2936 rtx targ = subtargets ? NULL_RTX : target;
2937 insns = arm_gen_constant (AND, mode, cond,
2938 remainder | shift_mask,
2939 targ, source, subtargets, 0);
2945 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2946 rtx shift = GEN_INT (clear_sign_bit_copies);
2948 emit_insn (gen_ashlsi3 (new_src, source, shift));
2949 emit_insn (gen_lshrsi3 (target, new_src, shift));
2955 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2957 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2959 if ((remainder | shift_mask) != 0xffffffff)
2963 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2965 insns = arm_gen_constant (AND, mode, cond,
2966 remainder | shift_mask,
2967 new_src, source, subtargets, 1);
2972 rtx targ = subtargets ? NULL_RTX : target;
2974 insns = arm_gen_constant (AND, mode, cond,
2975 remainder | shift_mask,
2976 targ, source, subtargets, 0);
2982 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2983 rtx shift = GEN_INT (clear_zero_bit_copies);
2985 emit_insn (gen_lshrsi3 (new_src, source, shift));
2986 emit_insn (gen_ashlsi3 (target, new_src, shift));
2998 for (i = 0; i < 32; i++)
2999 if (remainder & (1 << i))
3003 || (code != IOR && can_invert && num_bits_set > 16))
3004 remainder = (~remainder) & 0xffffffff;
3005 else if (code == PLUS && num_bits_set > 16)
3006 remainder = (-remainder) & 0xffffffff;
3013 /* Now try and find a way of doing the job in either two or three
3015 We start by looking for the largest block of zeros that are aligned on
3016 a 2-bit boundary, we then fill up the temps, wrapping around to the
3017 top of the word when we drop off the bottom.
3018 In the worst case this code should produce no more than four insns.
3019 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3020 best place to start. */
3022 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3028 int best_consecutive_zeros = 0;
3030 for (i = 0; i < 32; i += 2)
3032 int consecutive_zeros = 0;
3034 if (!(remainder & (3 << i)))
3036 while ((i < 32) && !(remainder & (3 << i)))
3038 consecutive_zeros += 2;
3041 if (consecutive_zeros > best_consecutive_zeros)
3043 best_consecutive_zeros = consecutive_zeros;
3044 best_start = i - consecutive_zeros;
3050 /* So long as it won't require any more insns to do so, it's
3051 desirable to emit a small constant (in bits 0...9) in the last
3052 insn. This way there is more chance that it can be combined with
3053 a later addressing insn to form a pre-indexed load or store
3054 operation. Consider:
3056 *((volatile int *)0xe0000100) = 1;
3057 *((volatile int *)0xe0000110) = 2;
3059 We want this to wind up as:
3063 str rB, [rA, #0x100]
3065 str rB, [rA, #0x110]
3067 rather than having to synthesize both large constants from scratch.
3069 Therefore, we calculate how many insns would be required to emit
3070 the constant starting from `best_start', and also starting from
3071 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3072 yield a shorter sequence, we may as well use zero. */
3074 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
3075 && (count_insns_for_constant (remainder, 0) <=
3076 count_insns_for_constant (remainder, best_start)))
3080 /* Now start emitting the insns. */
3088 if (remainder & (3 << (i - 2)))
3093 temp1 = remainder & ((0x0ff << end)
3094 | ((i < end) ? (0xff >> (32 - end)) : 0));
3095 remainder &= ~temp1;
3099 rtx new_src, temp1_rtx;
3101 if (code == SET || code == MINUS)
3103 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3104 if (can_invert && code != MINUS)
3109 if (remainder && subtargets)
3110 new_src = gen_reg_rtx (mode);
3115 else if (can_negate)
3119 temp1 = trunc_int_for_mode (temp1, mode);
3120 temp1_rtx = GEN_INT (temp1);
3124 else if (code == MINUS)
3125 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3127 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3129 emit_constant_insn (cond,
3130 gen_rtx_SET (VOIDmode, new_src,
3140 else if (code == MINUS)
3149 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3162 /* Canonicalize a comparison so that we are more likely to recognize it.
3163 This can be done for a few constant compares, where we can make the
3164 immediate value easier to load. */
3167 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3170 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3171 unsigned HOST_WIDE_INT maxval;
3172 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3183 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3185 *op1 = GEN_INT (i + 1);
3186 return code == GT ? GE : LT;
3193 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3195 *op1 = GEN_INT (i - 1);
3196 return code == GE ? GT : LE;
3202 if (i != ~((unsigned HOST_WIDE_INT) 0)
3203 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3205 *op1 = GEN_INT (i + 1);
3206 return code == GTU ? GEU : LTU;
3213 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3215 *op1 = GEN_INT (i - 1);
3216 return code == GEU ? GTU : LEU;
3228 /* Define how to find the value returned by a function. */
3231 arm_function_value(const_tree type, const_tree func,
3232 bool outgoing ATTRIBUTE_UNUSED)
3234 enum machine_mode mode;
3235 int unsignedp ATTRIBUTE_UNUSED;
3236 rtx r ATTRIBUTE_UNUSED;
3238 mode = TYPE_MODE (type);
3240 if (TARGET_AAPCS_BASED)
3241 return aapcs_allocate_return_reg (mode, type, func);
3243 /* Promote integer types. */
3244 if (INTEGRAL_TYPE_P (type))
3245 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3247 /* Promotes small structs returned in a register to full-word size
3248 for big-endian AAPCS. */
3249 if (arm_return_in_msb (type))
3251 HOST_WIDE_INT size = int_size_in_bytes (type);
3252 if (size % UNITS_PER_WORD != 0)
3254 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3255 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3259 return LIBCALL_VALUE (mode);
3263 libcall_eq (const void *p1, const void *p2)
3265 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3269 libcall_hash (const void *p1)
3271 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3275 add_libcall (htab_t htab, rtx libcall)
3277 *htab_find_slot (htab, libcall, INSERT) = libcall;
3281 arm_libcall_uses_aapcs_base (const_rtx libcall)
3283 static bool init_done = false;
3284 static htab_t libcall_htab;
3290 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3292 add_libcall (libcall_htab,
3293 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3294 add_libcall (libcall_htab,
3295 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3296 add_libcall (libcall_htab,
3297 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3298 add_libcall (libcall_htab,
3299 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3301 add_libcall (libcall_htab,
3302 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3303 add_libcall (libcall_htab,
3304 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3305 add_libcall (libcall_htab,
3306 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3307 add_libcall (libcall_htab,
3308 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3310 add_libcall (libcall_htab,
3311 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3312 add_libcall (libcall_htab,
3313 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3314 add_libcall (libcall_htab,
3315 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3316 add_libcall (libcall_htab,
3317 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3318 add_libcall (libcall_htab,
3319 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3320 add_libcall (libcall_htab,
3321 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3324 return libcall && htab_find (libcall_htab, libcall) != NULL;
3328 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3330 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3331 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3333 /* The following libcalls return their result in integer registers,
3334 even though they return a floating point value. */
3335 if (arm_libcall_uses_aapcs_base (libcall))
3336 return gen_rtx_REG (mode, ARG_REGISTER(1));
3340 return LIBCALL_VALUE (mode);
3343 /* Determine the amount of memory needed to store the possible return
3344 registers of an untyped call. */
3346 arm_apply_result_size (void)
3352 if (TARGET_HARD_FLOAT_ABI)
3358 if (TARGET_MAVERICK)
3361 if (TARGET_IWMMXT_ABI)
3368 /* Decide whether TYPE should be returned in memory (true)
3369 or in a register (false). FNTYPE is the type of the function making
3372 arm_return_in_memory (const_tree type, const_tree fntype)
3376 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3378 if (TARGET_AAPCS_BASED)
3380 /* Simple, non-aggregate types (ie not including vectors and
3381 complex) are always returned in a register (or registers).
3382 We don't care about which register here, so we can short-cut
3383 some of the detail. */
3384 if (!AGGREGATE_TYPE_P (type)
3385 && TREE_CODE (type) != VECTOR_TYPE
3386 && TREE_CODE (type) != COMPLEX_TYPE)
3389 /* Any return value that is no larger than one word can be
3391 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3394 /* Check any available co-processors to see if they accept the
3395 type as a register candidate (VFP, for example, can return
3396 some aggregates in consecutive registers). These aren't
3397 available if the call is variadic. */
3398 if (aapcs_select_return_coproc (type, fntype) >= 0)
3401 /* Vector values should be returned using ARM registers, not
3402 memory (unless they're over 16 bytes, which will break since
3403 we only have four call-clobbered registers to play with). */
3404 if (TREE_CODE (type) == VECTOR_TYPE)
3405 return (size < 0 || size > (4 * UNITS_PER_WORD));
3407 /* The rest go in memory. */
3411 if (TREE_CODE (type) == VECTOR_TYPE)
3412 return (size < 0 || size > (4 * UNITS_PER_WORD));
3414 if (!AGGREGATE_TYPE_P (type) &&
3415 (TREE_CODE (type) != VECTOR_TYPE))
3416 /* All simple types are returned in registers. */
3419 if (arm_abi != ARM_ABI_APCS)
3421 /* ATPCS and later return aggregate types in memory only if they are
3422 larger than a word (or are variable size). */
3423 return (size < 0 || size > UNITS_PER_WORD);
3426 /* For the arm-wince targets we choose to be compatible with Microsoft's
3427 ARM and Thumb compilers, which always return aggregates in memory. */
3429 /* All structures/unions bigger than one word are returned in memory.
3430 Also catch the case where int_size_in_bytes returns -1. In this case
3431 the aggregate is either huge or of variable size, and in either case
3432 we will want to return it via memory and not in a register. */
3433 if (size < 0 || size > UNITS_PER_WORD)
3436 if (TREE_CODE (type) == RECORD_TYPE)
3440 /* For a struct the APCS says that we only return in a register
3441 if the type is 'integer like' and every addressable element
3442 has an offset of zero. For practical purposes this means
3443 that the structure can have at most one non bit-field element
3444 and that this element must be the first one in the structure. */
3446 /* Find the first field, ignoring non FIELD_DECL things which will
3447 have been created by C++. */
3448 for (field = TYPE_FIELDS (type);
3449 field && TREE_CODE (field) != FIELD_DECL;
3450 field = TREE_CHAIN (field))
3454 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3456 /* Check that the first field is valid for returning in a register. */
3458 /* ... Floats are not allowed */
3459 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3462 /* ... Aggregates that are not themselves valid for returning in
3463 a register are not allowed. */
3464 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3467 /* Now check the remaining fields, if any. Only bitfields are allowed,
3468 since they are not addressable. */
3469 for (field = TREE_CHAIN (field);
3471 field = TREE_CHAIN (field))
3473 if (TREE_CODE (field) != FIELD_DECL)
3476 if (!DECL_BIT_FIELD_TYPE (field))
3483 if (TREE_CODE (type) == UNION_TYPE)
3487 /* Unions can be returned in registers if every element is
3488 integral, or can be returned in an integer register. */
3489 for (field = TYPE_FIELDS (type);
3491 field = TREE_CHAIN (field))
3493 if (TREE_CODE (field) != FIELD_DECL)
3496 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3499 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3505 #endif /* not ARM_WINCE */
3507 /* Return all other types in memory. */
3511 /* Indicate whether or not words of a double are in big-endian order. */
3514 arm_float_words_big_endian (void)
3516 if (TARGET_MAVERICK)
3519 /* For FPA, float words are always big-endian. For VFP, floats words
3520 follow the memory system mode. */
3528 return (TARGET_BIG_END ? 1 : 0);
3533 const struct pcs_attribute_arg
3537 } pcs_attribute_args[] =
3539 {"aapcs", ARM_PCS_AAPCS},
3540 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3542 /* We could recognize these, but changes would be needed elsewhere
3543 * to implement them. */
3544 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3545 {"atpcs", ARM_PCS_ATPCS},
3546 {"apcs", ARM_PCS_APCS},
3548 {NULL, ARM_PCS_UNKNOWN}
3552 arm_pcs_from_attribute (tree attr)
3554 const struct pcs_attribute_arg *ptr;
3557 /* Get the value of the argument. */
3558 if (TREE_VALUE (attr) == NULL_TREE
3559 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3560 return ARM_PCS_UNKNOWN;
3562 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3564 /* Check it against the list of known arguments. */
3565 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3566 if (streq (arg, ptr->arg))
3569 /* An unrecognized interrupt type. */
3570 return ARM_PCS_UNKNOWN;
3573 /* Get the PCS variant to use for this call. TYPE is the function's type
3574 specification, DECL is the specific declartion. DECL may be null if
3575 the call could be indirect or if this is a library call. */
3577 arm_get_pcs_model (const_tree type, const_tree decl)
3579 bool user_convention = false;
3580 enum arm_pcs user_pcs = arm_pcs_default;
3585 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3588 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3589 user_convention = true;
3592 if (TARGET_AAPCS_BASED)
3594 /* Detect varargs functions. These always use the base rules
3595 (no argument is ever a candidate for a co-processor
3597 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3598 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3599 != void_type_node));
3601 if (user_convention)
3603 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3604 sorry ("Non-AAPCS derived PCS variant");
3605 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3606 error ("Variadic functions must use the base AAPCS variant");
3610 return ARM_PCS_AAPCS;
3611 else if (user_convention)
3613 else if (decl && flag_unit_at_a_time)
3615 /* Local functions never leak outside this compilation unit,
3616 so we are free to use whatever conventions are
3618 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3619 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3621 return ARM_PCS_AAPCS_LOCAL;
3624 else if (user_convention && user_pcs != arm_pcs_default)
3625 sorry ("PCS variant");
3627 /* For everything else we use the target's default. */
3628 return arm_pcs_default;
3633 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3634 const_tree fntype ATTRIBUTE_UNUSED,
3635 rtx libcall ATTRIBUTE_UNUSED,
3636 const_tree fndecl ATTRIBUTE_UNUSED)
3638 /* Record the unallocated VFP registers. */
3639 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3640 pcum->aapcs_vfp_reg_alloc = 0;
3643 /* Walk down the type tree of TYPE counting consecutive base elements.
3644 If *MODEP is VOIDmode, then set it to the first valid floating point
3645 type. If a non-floating point type is found, or if a floating point
3646 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3647 otherwise return the count in the sub-tree. */
3649 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3651 enum machine_mode mode;
3654 switch (TREE_CODE (type))
3657 mode = TYPE_MODE (type);
3658 if (mode != DFmode && mode != SFmode)
3661 if (*modep == VOIDmode)
3670 mode = TYPE_MODE (TREE_TYPE (type));
3671 if (mode != DFmode && mode != SFmode)
3674 if (*modep == VOIDmode)
3683 /* Use V2SImode and V4SImode as representatives of all 64-bit
3684 and 128-bit vector types, whether or not those modes are
3685 supported with the present options. */
3686 size = int_size_in_bytes (type);
3699 if (*modep == VOIDmode)
3702 /* Vector modes are considered to be opaque: two vectors are
3703 equivalent for the purposes of being homogeneous aggregates
3704 if they are the same size. */
3713 tree index = TYPE_DOMAIN (type);
3715 /* Can't handle incomplete types. */
3716 if (!COMPLETE_TYPE_P(type))
3719 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3722 || !TYPE_MAX_VALUE (index)
3723 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3724 || !TYPE_MIN_VALUE (index)
3725 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3729 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3730 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3732 /* There must be no padding. */
3733 if (!host_integerp (TYPE_SIZE (type), 1)
3734 || (tree_low_cst (TYPE_SIZE (type), 1)
3735 != count * GET_MODE_BITSIZE (*modep)))
3747 /* Can't handle incomplete types. */
3748 if (!COMPLETE_TYPE_P(type))
3751 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3753 if (TREE_CODE (field) != FIELD_DECL)
3756 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3762 /* There must be no padding. */
3763 if (!host_integerp (TYPE_SIZE (type), 1)
3764 || (tree_low_cst (TYPE_SIZE (type), 1)
3765 != count * GET_MODE_BITSIZE (*modep)))
3772 case QUAL_UNION_TYPE:
3774 /* These aren't very interesting except in a degenerate case. */
3779 /* Can't handle incomplete types. */
3780 if (!COMPLETE_TYPE_P(type))
3783 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3785 if (TREE_CODE (field) != FIELD_DECL)
3788 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3791 count = count > sub_count ? count : sub_count;
3794 /* There must be no padding. */
3795 if (!host_integerp (TYPE_SIZE (type), 1)
3796 || (tree_low_cst (TYPE_SIZE (type), 1)
3797 != count * GET_MODE_BITSIZE (*modep)))
3811 aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
3812 enum machine_mode *base_mode,
3815 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3816 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3817 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3823 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3826 *base_mode = (mode == DCmode ? DFmode : SFmode);
3829 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3831 enum machine_mode aggregate_mode = VOIDmode;
3832 int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
3834 if (ag_count > 0 && ag_count <= 4)
3837 *base_mode = aggregate_mode;
3845 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3846 enum machine_mode mode, const_tree type)
3848 int count ATTRIBUTE_UNUSED;
3849 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3851 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3852 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3853 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3855 return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3859 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3862 if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
3863 || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
3864 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3866 return aapcs_vfp_is_call_or_return_candidate (mode, type,
3867 &pcum->aapcs_vfp_rmode,
3868 &pcum->aapcs_vfp_rcount);
3872 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3873 const_tree type ATTRIBUTE_UNUSED)
3875 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3876 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3879 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3880 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3882 pcum->aapcs_vfp_reg_alloc = mask << regno;
3883 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3886 int rcount = pcum->aapcs_vfp_rcount;
3888 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3892 /* Avoid using unsupported vector modes. */
3893 if (rmode == V2SImode)
3895 else if (rmode == V4SImode)
3902 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3903 for (i = 0; i < rcount; i++)
3905 rtx tmp = gen_rtx_REG (rmode,
3906 FIRST_VFP_REGNUM + regno + i * rshift);
3907 tmp = gen_rtx_EXPR_LIST
3909 GEN_INT (i * GET_MODE_SIZE (rmode)));
3910 XVECEXP (par, 0, i) = tmp;
3913 pcum->aapcs_reg = par;
3916 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3923 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3924 enum machine_mode mode,
3925 const_tree type ATTRIBUTE_UNUSED)
3927 if (!(pcs_variant == ARM_PCS_AAPCS_VFP
3928 || (pcs_variant == ARM_PCS_AAPCS_LOCAL
3929 && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
3931 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3934 enum machine_mode ag_mode;
3939 aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
3943 if (ag_mode == V2SImode)
3945 else if (ag_mode == V4SImode)
3951 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3952 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3953 for (i = 0; i < count; i++)
3955 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3956 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3957 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3958 XVECEXP (par, 0, i) = tmp;
3964 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3968 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3969 enum machine_mode mode ATTRIBUTE_UNUSED,
3970 const_tree type ATTRIBUTE_UNUSED)
3972 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3973 pcum->aapcs_vfp_reg_alloc = 0;
3977 #define AAPCS_CP(X) \
3979 aapcs_ ## X ## _cum_init, \
3980 aapcs_ ## X ## _is_call_candidate, \
3981 aapcs_ ## X ## _allocate, \
3982 aapcs_ ## X ## _is_return_candidate, \
3983 aapcs_ ## X ## _allocate_return_reg, \
3984 aapcs_ ## X ## _advance \
3987 /* Table of co-processors that can be used to pass arguments in
3988 registers. Idealy no arugment should be a candidate for more than
3989 one co-processor table entry, but the table is processed in order
3990 and stops after the first match. If that entry then fails to put
3991 the argument into a co-processor register, the argument will go on
3995 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
3996 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
3998 /* Return true if an argument of mode MODE (or type TYPE if MODE is
3999 BLKmode) is a candidate for this co-processor's registers; this
4000 function should ignore any position-dependent state in
4001 CUMULATIVE_ARGS and only use call-type dependent information. */
4002 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4004 /* Return true if the argument does get a co-processor register; it
4005 should set aapcs_reg to an RTX of the register allocated as is
4006 required for a return from FUNCTION_ARG. */
4007 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4009 /* Return true if a result of mode MODE (or type TYPE if MODE is
4010 BLKmode) is can be returned in this co-processor's registers. */
4011 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4013 /* Allocate and return an RTX element to hold the return type of a
4014 call, this routine must not fail and will only be called if
4015 is_return_candidate returned true with the same parameters. */
4016 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4018 /* Finish processing this argument and prepare to start processing
4020 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4021 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4029 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4034 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4035 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4042 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4044 /* We aren't passed a decl, so we can't check that a call is local.
4045 However, it isn't clear that that would be a win anyway, since it
4046 might limit some tail-calling opportunities. */
4047 enum arm_pcs pcs_variant;
4051 const_tree fndecl = NULL_TREE;
4053 if (TREE_CODE (fntype) == FUNCTION_DECL)
4056 fntype = TREE_TYPE (fntype);
4059 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4062 pcs_variant = arm_pcs_default;
4064 if (pcs_variant != ARM_PCS_AAPCS)
4068 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4069 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4078 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4081 /* We aren't passed a decl, so we can't check that a call is local.
4082 However, it isn't clear that that would be a win anyway, since it
4083 might limit some tail-calling opportunities. */
4084 enum arm_pcs pcs_variant;
4085 int unsignedp ATTRIBUTE_UNUSED;
4089 const_tree fndecl = NULL_TREE;
4091 if (TREE_CODE (fntype) == FUNCTION_DECL)
4094 fntype = TREE_TYPE (fntype);
4097 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4100 pcs_variant = arm_pcs_default;
4102 /* Promote integer types. */
4103 if (type && INTEGRAL_TYPE_P (type))
4104 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4106 if (pcs_variant != ARM_PCS_AAPCS)
4110 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4111 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4113 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4117 /* Promotes small structs returned in a register to full-word size
4118 for big-endian AAPCS. */
4119 if (type && arm_return_in_msb (type))
4121 HOST_WIDE_INT size = int_size_in_bytes (type);
4122 if (size % UNITS_PER_WORD != 0)
4124 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4125 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4129 return gen_rtx_REG (mode, R0_REGNUM);
4133 aapcs_libcall_value (enum machine_mode mode)
4135 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4138 /* Lay out a function argument using the AAPCS rules. The rule
4139 numbers referred to here are those in the AAPCS. */
4141 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4142 tree type, int named)
4147 /* We only need to do this once per argument. */
4148 if (pcum->aapcs_arg_processed)
4151 pcum->aapcs_arg_processed = true;
4153 /* Special case: if named is false then we are handling an incoming
4154 anonymous argument which is on the stack. */
4158 /* Is this a potential co-processor register candidate? */
4159 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4161 int slot = aapcs_select_call_coproc (pcum, mode, type);
4162 pcum->aapcs_cprc_slot = slot;
4164 /* We don't have to apply any of the rules from part B of the
4165 preparation phase, these are handled elsewhere in the
4170 /* A Co-processor register candidate goes either in its own
4171 class of registers or on the stack. */
4172 if (!pcum->aapcs_cprc_failed[slot])
4174 /* C1.cp - Try to allocate the argument to co-processor
4176 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4179 /* C2.cp - Put the argument on the stack and note that we
4180 can't assign any more candidates in this slot. We also
4181 need to note that we have allocated stack space, so that
4182 we won't later try to split a non-cprc candidate between
4183 core registers and the stack. */
4184 pcum->aapcs_cprc_failed[slot] = true;
4185 pcum->can_split = false;
4188 /* We didn't get a register, so this argument goes on the
4190 gcc_assert (pcum->can_split == false);
4195 /* C3 - For double-word aligned arguments, round the NCRN up to the
4196 next even number. */
4197 ncrn = pcum->aapcs_ncrn;
4198 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4201 nregs = ARM_NUM_REGS2(mode, type);
4203 /* Sigh, this test should really assert that nregs > 0, but a GCC
4204 extension allows empty structs and then gives them empty size; it
4205 then allows such a structure to be passed by value. For some of
4206 the code below we have to pretend that such an argument has
4207 non-zero size so that we 'locate' it correctly either in
4208 registers or on the stack. */
4209 gcc_assert (nregs >= 0);
4211 nregs2 = nregs ? nregs : 1;
4213 /* C4 - Argument fits entirely in core registers. */
4214 if (ncrn + nregs2 <= NUM_ARG_REGS)
4216 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4217 pcum->aapcs_next_ncrn = ncrn + nregs;
4221 /* C5 - Some core registers left and there are no arguments already
4222 on the stack: split this argument between the remaining core
4223 registers and the stack. */
4224 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4226 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4227 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4228 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4232 /* C6 - NCRN is set to 4. */
4233 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4235 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4239 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4240 for a call to a function whose data type is FNTYPE.
4241 For a library call, FNTYPE is NULL. */
4243 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4245 tree fndecl ATTRIBUTE_UNUSED)
4247 /* Long call handling. */
4249 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4251 pcum->pcs_variant = arm_pcs_default;
4253 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4255 if (arm_libcall_uses_aapcs_base (libname))
4256 pcum->pcs_variant = ARM_PCS_AAPCS;
4258 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4259 pcum->aapcs_reg = NULL_RTX;
4260 pcum->aapcs_partial = 0;
4261 pcum->aapcs_arg_processed = false;
4262 pcum->aapcs_cprc_slot = -1;
4263 pcum->can_split = true;
4265 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4269 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4271 pcum->aapcs_cprc_failed[i] = false;
4272 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4280 /* On the ARM, the offset starts at 0. */
4282 pcum->iwmmxt_nregs = 0;
4283 pcum->can_split = true;
4285 /* Varargs vectors are treated the same as long long.
4286 named_count avoids having to change the way arm handles 'named' */
4287 pcum->named_count = 0;
4290 if (TARGET_REALLY_IWMMXT && fntype)
4294 for (fn_arg = TYPE_ARG_TYPES (fntype);
4296 fn_arg = TREE_CHAIN (fn_arg))
4297 pcum->named_count += 1;
4299 if (! pcum->named_count)
4300 pcum->named_count = INT_MAX;
4305 /* Return true if mode/type need doubleword alignment. */
4307 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4309 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4310 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4314 /* Determine where to put an argument to a function.
4315 Value is zero to push the argument on the stack,
4316 or a hard register in which to store the argument.
4318 MODE is the argument's machine mode.
4319 TYPE is the data type of the argument (as a tree).
4320 This is null for libcalls where that information may
4322 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4323 the preceding args and about the function being called.
4324 NAMED is nonzero if this argument is a named parameter
4325 (otherwise it is an extra parameter matching an ellipsis). */
4328 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4329 tree type, int named)
4333 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4334 a call insn (op3 of a call_value insn). */
4335 if (mode == VOIDmode)
4338 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4340 aapcs_layout_arg (pcum, mode, type, named);
4341 return pcum->aapcs_reg;
4344 /* Varargs vectors are treated the same as long long.
4345 named_count avoids having to change the way arm handles 'named' */
4346 if (TARGET_IWMMXT_ABI
4347 && arm_vector_mode_supported_p (mode)
4348 && pcum->named_count > pcum->nargs + 1)
4350 if (pcum->iwmmxt_nregs <= 9)
4351 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4354 pcum->can_split = false;
4359 /* Put doubleword aligned quantities in even register pairs. */
4361 && ARM_DOUBLEWORD_ALIGN
4362 && arm_needs_doubleword_align (mode, type))
4365 if (mode == VOIDmode)
4366 /* Pick an arbitrary value for operand 2 of the call insn. */
4369 /* Only allow splitting an arg between regs and memory if all preceding
4370 args were allocated to regs. For args passed by reference we only count
4371 the reference pointer. */
4372 if (pcum->can_split)
4375 nregs = ARM_NUM_REGS2 (mode, type);
4377 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4380 return gen_rtx_REG (mode, pcum->nregs);
4384 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4385 tree type, bool named)
4387 int nregs = pcum->nregs;
4389 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4391 aapcs_layout_arg (pcum, mode, type, named);
4392 return pcum->aapcs_partial;
4395 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4398 if (NUM_ARG_REGS > nregs
4399 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4401 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4407 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4408 tree type, bool named)
4410 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4412 aapcs_layout_arg (pcum, mode, type, named);
4414 if (pcum->aapcs_cprc_slot >= 0)
4416 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4418 pcum->aapcs_cprc_slot = -1;
4421 /* Generic stuff. */
4422 pcum->aapcs_arg_processed = false;
4423 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4424 pcum->aapcs_reg = NULL_RTX;
4425 pcum->aapcs_partial = 0;
4430 if (arm_vector_mode_supported_p (mode)
4431 && pcum->named_count > pcum->nargs
4432 && TARGET_IWMMXT_ABI)
4433 pcum->iwmmxt_nregs += 1;
4435 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4439 /* Variable sized types are passed by reference. This is a GCC
4440 extension to the ARM ABI. */
4443 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4444 enum machine_mode mode ATTRIBUTE_UNUSED,
4445 const_tree type, bool named ATTRIBUTE_UNUSED)
4447 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4450 /* Encode the current state of the #pragma [no_]long_calls. */
4453 OFF, /* No #pragma [no_]long_calls is in effect. */
4454 LONG, /* #pragma long_calls is in effect. */
4455 SHORT /* #pragma no_long_calls is in effect. */
4458 static arm_pragma_enum arm_pragma_long_calls = OFF;
4461 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4463 arm_pragma_long_calls = LONG;
4467 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4469 arm_pragma_long_calls = SHORT;
4473 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4475 arm_pragma_long_calls = OFF;
4478 /* Handle an attribute requiring a FUNCTION_DECL;
4479 arguments as in struct attribute_spec.handler. */
4481 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4482 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4484 if (TREE_CODE (*node) != FUNCTION_DECL)
4486 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4488 *no_add_attrs = true;
4494 /* Handle an "interrupt" or "isr" attribute;
4495 arguments as in struct attribute_spec.handler. */
4497 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4502 if (TREE_CODE (*node) != FUNCTION_DECL)
4504 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4506 *no_add_attrs = true;
4508 /* FIXME: the argument if any is checked for type attributes;
4509 should it be checked for decl ones? */
4513 if (TREE_CODE (*node) == FUNCTION_TYPE
4514 || TREE_CODE (*node) == METHOD_TYPE)
4516 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4518 warning (OPT_Wattributes, "%qE attribute ignored",
4520 *no_add_attrs = true;
4523 else if (TREE_CODE (*node) == POINTER_TYPE
4524 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4525 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4526 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4528 *node = build_variant_type_copy (*node);
4529 TREE_TYPE (*node) = build_type_attribute_variant
4531 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4532 *no_add_attrs = true;
4536 /* Possibly pass this attribute on from the type to a decl. */
4537 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4538 | (int) ATTR_FLAG_FUNCTION_NEXT
4539 | (int) ATTR_FLAG_ARRAY_NEXT))
4541 *no_add_attrs = true;
4542 return tree_cons (name, args, NULL_TREE);
4546 warning (OPT_Wattributes, "%qE attribute ignored",
4555 /* Handle a "pcs" attribute; arguments as in struct
4556 attribute_spec.handler. */
4558 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4561 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4563 warning (OPT_Wattributes, "%qE attribute ignored", name);
4564 *no_add_attrs = true;
4569 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4570 /* Handle the "notshared" attribute. This attribute is another way of
4571 requesting hidden visibility. ARM's compiler supports
4572 "__declspec(notshared)"; we support the same thing via an
4576 arm_handle_notshared_attribute (tree *node,
4577 tree name ATTRIBUTE_UNUSED,
4578 tree args ATTRIBUTE_UNUSED,
4579 int flags ATTRIBUTE_UNUSED,
4582 tree decl = TYPE_NAME (*node);
4586 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4587 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4588 *no_add_attrs = false;
4594 /* Return 0 if the attributes for two types are incompatible, 1 if they
4595 are compatible, and 2 if they are nearly compatible (which causes a
4596 warning to be generated). */
4598 arm_comp_type_attributes (const_tree type1, const_tree type2)
4602 /* Check for mismatch of non-default calling convention. */
4603 if (TREE_CODE (type1) != FUNCTION_TYPE)
4606 /* Check for mismatched call attributes. */
4607 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4608 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4609 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4610 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4612 /* Only bother to check if an attribute is defined. */
4613 if (l1 | l2 | s1 | s2)
4615 /* If one type has an attribute, the other must have the same attribute. */
4616 if ((l1 != l2) || (s1 != s2))
4619 /* Disallow mixed attributes. */
4620 if ((l1 & s2) || (l2 & s1))
4624 /* Check for mismatched ISR attribute. */
4625 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4627 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4628 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4630 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4637 /* Assigns default attributes to newly defined type. This is used to
4638 set short_call/long_call attributes for function types of
4639 functions defined inside corresponding #pragma scopes. */
4641 arm_set_default_type_attributes (tree type)
4643 /* Add __attribute__ ((long_call)) to all functions, when
4644 inside #pragma long_calls or __attribute__ ((short_call)),
4645 when inside #pragma no_long_calls. */
4646 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4648 tree type_attr_list, attr_name;
4649 type_attr_list = TYPE_ATTRIBUTES (type);
4651 if (arm_pragma_long_calls == LONG)
4652 attr_name = get_identifier ("long_call");
4653 else if (arm_pragma_long_calls == SHORT)
4654 attr_name = get_identifier ("short_call");
4658 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4659 TYPE_ATTRIBUTES (type) = type_attr_list;
4663 /* Return true if DECL is known to be linked into section SECTION. */
4666 arm_function_in_section_p (tree decl, section *section)
4668 /* We can only be certain about functions defined in the same
4669 compilation unit. */
4670 if (!TREE_STATIC (decl))
4673 /* Make sure that SYMBOL always binds to the definition in this
4674 compilation unit. */
4675 if (!targetm.binds_local_p (decl))
4678 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4679 if (!DECL_SECTION_NAME (decl))
4681 /* Make sure that we will not create a unique section for DECL. */
4682 if (flag_function_sections || DECL_ONE_ONLY (decl))
4686 return function_section (decl) == section;
4689 /* Return nonzero if a 32-bit "long_call" should be generated for
4690 a call from the current function to DECL. We generate a long_call
4693 a. has an __attribute__((long call))
4694 or b. is within the scope of a #pragma long_calls
4695 or c. the -mlong-calls command line switch has been specified
4697 However we do not generate a long call if the function:
4699 d. has an __attribute__ ((short_call))
4700 or e. is inside the scope of a #pragma no_long_calls
4701 or f. is defined in the same section as the current function. */
4704 arm_is_long_call_p (tree decl)
4709 return TARGET_LONG_CALLS;
4711 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4712 if (lookup_attribute ("short_call", attrs))
4715 /* For "f", be conservative, and only cater for cases in which the
4716 whole of the current function is placed in the same section. */
4717 if (!flag_reorder_blocks_and_partition
4718 && TREE_CODE (decl) == FUNCTION_DECL
4719 && arm_function_in_section_p (decl, current_function_section ()))
4722 if (lookup_attribute ("long_call", attrs))
4725 return TARGET_LONG_CALLS;
4728 /* Return nonzero if it is ok to make a tail-call to DECL. */
4730 arm_function_ok_for_sibcall (tree decl, tree exp)
4732 unsigned long func_type;
4734 if (cfun->machine->sibcall_blocked)
4737 /* Never tailcall something for which we have no decl, or if we
4738 are in Thumb mode. */
4739 if (decl == NULL || TARGET_THUMB)
4742 /* The PIC register is live on entry to VxWorks PLT entries, so we
4743 must make the call before restoring the PIC register. */
4744 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4747 /* Cannot tail-call to long calls, since these are out of range of
4748 a branch instruction. */
4749 if (arm_is_long_call_p (decl))
4752 /* If we are interworking and the function is not declared static
4753 then we can't tail-call it unless we know that it exists in this
4754 compilation unit (since it might be a Thumb routine). */
4755 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4758 func_type = arm_current_func_type ();
4759 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4760 if (IS_INTERRUPT (func_type))
4763 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4765 /* Check that the return value locations are the same. For
4766 example that we aren't returning a value from the sibling in
4767 a VFP register but then need to transfer it to a core
4771 a = arm_function_value (TREE_TYPE (exp), decl, false);
4772 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4774 if (!rtx_equal_p (a, b))
4778 /* Never tailcall if function may be called with a misaligned SP. */
4779 if (IS_STACKALIGN (func_type))
4782 /* Everything else is ok. */
4787 /* Addressing mode support functions. */
4789 /* Return nonzero if X is a legitimate immediate operand when compiling
4790 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4792 legitimate_pic_operand_p (rtx x)
4794 if (GET_CODE (x) == SYMBOL_REF
4795 || (GET_CODE (x) == CONST
4796 && GET_CODE (XEXP (x, 0)) == PLUS
4797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4803 /* Record that the current function needs a PIC register. Initialize
4804 cfun->machine->pic_reg if we have not already done so. */
4807 require_pic_register (void)
4809 /* A lot of the logic here is made obscure by the fact that this
4810 routine gets called as part of the rtx cost estimation process.
4811 We don't want those calls to affect any assumptions about the real
4812 function; and further, we can't call entry_of_function() until we
4813 start the real expansion process. */
4814 if (!crtl->uses_pic_offset_table)
4816 gcc_assert (can_create_pseudo_p ());
4817 if (arm_pic_register != INVALID_REGNUM)
4819 if (!cfun->machine->pic_reg)
4820 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4822 /* Play games to avoid marking the function as needing pic
4823 if we are being called as part of the cost-estimation
4825 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4826 crtl->uses_pic_offset_table = 1;
4832 if (!cfun->machine->pic_reg)
4833 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4835 /* Play games to avoid marking the function as needing pic
4836 if we are being called as part of the cost-estimation
4838 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4840 crtl->uses_pic_offset_table = 1;
4843 arm_load_pic_register (0UL);
4847 /* We can be called during expansion of PHI nodes, where
4848 we can't yet emit instructions directly in the final
4849 insn stream. Queue the insns on the entry edge, they will
4850 be committed after everything else is expanded. */
4851 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4858 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4860 if (GET_CODE (orig) == SYMBOL_REF
4861 || GET_CODE (orig) == LABEL_REF)
4863 rtx pic_ref, address;
4867 /* If this function doesn't have a pic register, create one now. */
4868 require_pic_register ();
4872 gcc_assert (can_create_pseudo_p ());
4873 reg = gen_reg_rtx (Pmode);
4879 address = gen_reg_rtx (Pmode);
4884 emit_insn (gen_pic_load_addr_arm (address, orig));
4885 else if (TARGET_THUMB2)
4886 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
4887 else /* TARGET_THUMB1 */
4888 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4890 /* VxWorks does not impose a fixed gap between segments; the run-time
4891 gap can be different from the object-file gap. We therefore can't
4892 use GOTOFF unless we are absolutely sure that the symbol is in the
4893 same segment as the GOT. Unfortunately, the flexibility of linker
4894 scripts means that we can't be sure of that in general, so assume
4895 that GOTOFF is never valid on VxWorks. */
4896 if ((GET_CODE (orig) == LABEL_REF
4897 || (GET_CODE (orig) == SYMBOL_REF &&
4898 SYMBOL_REF_LOCAL_P (orig)))
4900 && !TARGET_VXWORKS_RTP)
4901 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4904 pic_ref = gen_const_mem (Pmode,
4905 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4909 insn = emit_move_insn (reg, pic_ref);
4911 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4913 set_unique_reg_note (insn, REG_EQUAL, orig);
4917 else if (GET_CODE (orig) == CONST)
4921 if (GET_CODE (XEXP (orig, 0)) == PLUS
4922 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4925 /* Handle the case where we have: const (UNSPEC_TLS). */
4926 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4927 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4930 /* Handle the case where we have:
4931 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4933 if (GET_CODE (XEXP (orig, 0)) == PLUS
4934 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4935 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4937 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4943 gcc_assert (can_create_pseudo_p ());
4944 reg = gen_reg_rtx (Pmode);
4947 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4949 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4950 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4951 base == reg ? 0 : reg);
4953 if (GET_CODE (offset) == CONST_INT)
4955 /* The base register doesn't really matter, we only want to
4956 test the index for the appropriate mode. */
4957 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4959 gcc_assert (can_create_pseudo_p ());
4960 offset = force_reg (Pmode, offset);
4963 if (GET_CODE (offset) == CONST_INT)
4964 return plus_constant (base, INTVAL (offset));
4967 if (GET_MODE_SIZE (mode) > 4
4968 && (GET_MODE_CLASS (mode) == MODE_INT
4969 || TARGET_SOFT_FLOAT))
4971 emit_insn (gen_addsi3 (reg, base, offset));
4975 return gen_rtx_PLUS (Pmode, base, offset);
4982 /* Find a spare register to use during the prolog of a function. */
4985 thumb_find_work_register (unsigned long pushed_regs_mask)
4989 /* Check the argument registers first as these are call-used. The
4990 register allocation order means that sometimes r3 might be used
4991 but earlier argument registers might not, so check them all. */
4992 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
4993 if (!df_regs_ever_live_p (reg))
4996 /* Before going on to check the call-saved registers we can try a couple
4997 more ways of deducing that r3 is available. The first is when we are
4998 pushing anonymous arguments onto the stack and we have less than 4
4999 registers worth of fixed arguments(*). In this case r3 will be part of
5000 the variable argument list and so we can be sure that it will be
5001 pushed right at the start of the function. Hence it will be available
5002 for the rest of the prologue.
5003 (*): ie crtl->args.pretend_args_size is greater than 0. */
5004 if (cfun->machine->uses_anonymous_args
5005 && crtl->args.pretend_args_size > 0)
5006 return LAST_ARG_REGNUM;
5008 /* The other case is when we have fixed arguments but less than 4 registers
5009 worth. In this case r3 might be used in the body of the function, but
5010 it is not being used to convey an argument into the function. In theory
5011 we could just check crtl->args.size to see how many bytes are
5012 being passed in argument registers, but it seems that it is unreliable.
5013 Sometimes it will have the value 0 when in fact arguments are being
5014 passed. (See testcase execute/20021111-1.c for an example). So we also
5015 check the args_info.nregs field as well. The problem with this field is
5016 that it makes no allowances for arguments that are passed to the
5017 function but which are not used. Hence we could miss an opportunity
5018 when a function has an unused argument in r3. But it is better to be
5019 safe than to be sorry. */
5020 if (! cfun->machine->uses_anonymous_args
5021 && crtl->args.size >= 0
5022 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5023 && crtl->args.info.nregs < 4)
5024 return LAST_ARG_REGNUM;
5026 /* Otherwise look for a call-saved register that is going to be pushed. */
5027 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5028 if (pushed_regs_mask & (1 << reg))
5033 /* Thumb-2 can use high regs. */
5034 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5035 if (pushed_regs_mask & (1 << reg))
5038 /* Something went wrong - thumb_compute_save_reg_mask()
5039 should have arranged for a suitable register to be pushed. */
5043 static GTY(()) int pic_labelno;
5045 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5049 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5051 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5053 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5056 gcc_assert (flag_pic);
5058 pic_reg = cfun->machine->pic_reg;
5059 if (TARGET_VXWORKS_RTP)
5061 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5062 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5063 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5065 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5067 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5068 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5072 /* We use an UNSPEC rather than a LABEL_REF because this label
5073 never appears in the code stream. */
5075 labelno = GEN_INT (pic_labelno++);
5076 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5077 l1 = gen_rtx_CONST (VOIDmode, l1);
5079 /* On the ARM the PC register contains 'dot + 8' at the time of the
5080 addition, on the Thumb it is 'dot + 4'. */
5081 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5082 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5084 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5088 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
5089 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5091 else if (TARGET_THUMB2)
5093 /* Thumb-2 only allows very limited access to the PC. Calculate the
5094 address in a temporary register. */
5095 if (arm_pic_register != INVALID_REGNUM)
5097 pic_tmp = gen_rtx_REG (SImode,
5098 thumb_find_work_register (saved_regs));
5102 gcc_assert (can_create_pseudo_p ());
5103 pic_tmp = gen_reg_rtx (Pmode);
5106 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
5107 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
5108 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
5110 else /* TARGET_THUMB1 */
5112 if (arm_pic_register != INVALID_REGNUM
5113 && REGNO (pic_reg) > LAST_LO_REGNUM)
5115 /* We will have pushed the pic register, so we should always be
5116 able to find a work register. */
5117 pic_tmp = gen_rtx_REG (SImode,
5118 thumb_find_work_register (saved_regs));
5119 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5120 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5123 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5124 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5128 /* Need to emit this whether or not we obey regdecls,
5129 since setjmp/longjmp can cause life info to screw up. */
5134 /* Return nonzero if X is valid as an ARM state addressing register. */
5136 arm_address_register_rtx_p (rtx x, int strict_p)
5140 if (GET_CODE (x) != REG)
5146 return ARM_REGNO_OK_FOR_BASE_P (regno);
5148 return (regno <= LAST_ARM_REGNUM
5149 || regno >= FIRST_PSEUDO_REGISTER
5150 || regno == FRAME_POINTER_REGNUM
5151 || regno == ARG_POINTER_REGNUM);
5154 /* Return TRUE if this rtx is the difference of a symbol and a label,
5155 and will reduce to a PC-relative relocation in the object file.
5156 Expressions like this can be left alone when generating PIC, rather
5157 than forced through the GOT. */
5159 pcrel_constant_p (rtx x)
5161 if (GET_CODE (x) == MINUS)
5162 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5167 /* Return nonzero if X is a valid ARM state address operand. */
5169 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5173 enum rtx_code code = GET_CODE (x);
5175 if (arm_address_register_rtx_p (x, strict_p))
5178 use_ldrd = (TARGET_LDRD
5180 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5182 if (code == POST_INC || code == PRE_DEC
5183 || ((code == PRE_INC || code == POST_DEC)
5184 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5185 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5187 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5188 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5189 && GET_CODE (XEXP (x, 1)) == PLUS
5190 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5192 rtx addend = XEXP (XEXP (x, 1), 1);
5194 /* Don't allow ldrd post increment by register because it's hard
5195 to fixup invalid register choices. */
5197 && GET_CODE (x) == POST_MODIFY
5198 && GET_CODE (addend) == REG)
5201 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5202 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5205 /* After reload constants split into minipools will have addresses
5206 from a LABEL_REF. */
5207 else if (reload_completed
5208 && (code == LABEL_REF
5210 && GET_CODE (XEXP (x, 0)) == PLUS
5211 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5212 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5215 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5218 else if (code == PLUS)
5220 rtx xop0 = XEXP (x, 0);
5221 rtx xop1 = XEXP (x, 1);
5223 return ((arm_address_register_rtx_p (xop0, strict_p)
5224 && GET_CODE(xop1) == CONST_INT
5225 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5226 || (arm_address_register_rtx_p (xop1, strict_p)
5227 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5231 /* Reload currently can't handle MINUS, so disable this for now */
5232 else if (GET_CODE (x) == MINUS)
5234 rtx xop0 = XEXP (x, 0);
5235 rtx xop1 = XEXP (x, 1);
5237 return (arm_address_register_rtx_p (xop0, strict_p)
5238 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5242 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5243 && code == SYMBOL_REF
5244 && CONSTANT_POOL_ADDRESS_P (x)
5246 && symbol_mentioned_p (get_pool_constant (x))
5247 && ! pcrel_constant_p (get_pool_constant (x))))
5253 /* Return nonzero if X is a valid Thumb-2 address operand. */
5255 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5258 enum rtx_code code = GET_CODE (x);
5260 if (arm_address_register_rtx_p (x, strict_p))
5263 use_ldrd = (TARGET_LDRD
5265 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5267 if (code == POST_INC || code == PRE_DEC
5268 || ((code == PRE_INC || code == POST_DEC)
5269 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5270 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5272 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5273 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5274 && GET_CODE (XEXP (x, 1)) == PLUS
5275 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5277 /* Thumb-2 only has autoincrement by constant. */
5278 rtx addend = XEXP (XEXP (x, 1), 1);
5279 HOST_WIDE_INT offset;
5281 if (GET_CODE (addend) != CONST_INT)
5284 offset = INTVAL(addend);
5285 if (GET_MODE_SIZE (mode) <= 4)
5286 return (offset > -256 && offset < 256);
5288 return (use_ldrd && offset > -1024 && offset < 1024
5289 && (offset & 3) == 0);
5292 /* After reload constants split into minipools will have addresses
5293 from a LABEL_REF. */
5294 else if (reload_completed
5295 && (code == LABEL_REF
5297 && GET_CODE (XEXP (x, 0)) == PLUS
5298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5299 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5302 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5305 else if (code == PLUS)
5307 rtx xop0 = XEXP (x, 0);
5308 rtx xop1 = XEXP (x, 1);
5310 return ((arm_address_register_rtx_p (xop0, strict_p)
5311 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5312 || (arm_address_register_rtx_p (xop1, strict_p)
5313 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5316 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5317 && code == SYMBOL_REF
5318 && CONSTANT_POOL_ADDRESS_P (x)
5320 && symbol_mentioned_p (get_pool_constant (x))
5321 && ! pcrel_constant_p (get_pool_constant (x))))
5327 /* Return nonzero if INDEX is valid for an address index operand in
5330 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5333 HOST_WIDE_INT range;
5334 enum rtx_code code = GET_CODE (index);
5336 /* Standard coprocessor addressing modes. */
5337 if (TARGET_HARD_FLOAT
5338 && (TARGET_FPA || TARGET_MAVERICK)
5339 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5340 || (TARGET_MAVERICK && mode == DImode)))
5341 return (code == CONST_INT && INTVAL (index) < 1024
5342 && INTVAL (index) > -1024
5343 && (INTVAL (index) & 3) == 0);
5346 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5347 return (code == CONST_INT
5348 && INTVAL (index) < 1016
5349 && INTVAL (index) > -1024
5350 && (INTVAL (index) & 3) == 0);
5352 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5353 return (code == CONST_INT
5354 && INTVAL (index) < 1024
5355 && INTVAL (index) > -1024
5356 && (INTVAL (index) & 3) == 0);
5358 if (arm_address_register_rtx_p (index, strict_p)
5359 && (GET_MODE_SIZE (mode) <= 4))
5362 if (mode == DImode || mode == DFmode)
5364 if (code == CONST_INT)
5366 HOST_WIDE_INT val = INTVAL (index);
5369 return val > -256 && val < 256;
5371 return val > -4096 && val < 4092;
5374 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5377 if (GET_MODE_SIZE (mode) <= 4
5381 || (mode == QImode && outer == SIGN_EXTEND))))
5385 rtx xiop0 = XEXP (index, 0);
5386 rtx xiop1 = XEXP (index, 1);
5388 return ((arm_address_register_rtx_p (xiop0, strict_p)
5389 && power_of_two_operand (xiop1, SImode))
5390 || (arm_address_register_rtx_p (xiop1, strict_p)
5391 && power_of_two_operand (xiop0, SImode)));
5393 else if (code == LSHIFTRT || code == ASHIFTRT
5394 || code == ASHIFT || code == ROTATERT)
5396 rtx op = XEXP (index, 1);
5398 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5399 && GET_CODE (op) == CONST_INT
5401 && INTVAL (op) <= 31);
5405 /* For ARM v4 we may be doing a sign-extend operation during the
5411 || (outer == SIGN_EXTEND && mode == QImode))
5417 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5419 return (code == CONST_INT
5420 && INTVAL (index) < range
5421 && INTVAL (index) > -range);
5424 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5425 index operand. i.e. 1, 2, 4 or 8. */
5427 thumb2_index_mul_operand (rtx op)
5431 if (GET_CODE(op) != CONST_INT)
5435 return (val == 1 || val == 2 || val == 4 || val == 8);
5438 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5440 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5442 enum rtx_code code = GET_CODE (index);
5444 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5445 /* Standard coprocessor addressing modes. */
5446 if (TARGET_HARD_FLOAT
5447 && (TARGET_FPA || TARGET_MAVERICK)
5448 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5449 || (TARGET_MAVERICK && mode == DImode)))
5450 return (code == CONST_INT && INTVAL (index) < 1024
5451 && INTVAL (index) > -1024
5452 && (INTVAL (index) & 3) == 0);
5454 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5456 /* For DImode assume values will usually live in core regs
5457 and only allow LDRD addressing modes. */
5458 if (!TARGET_LDRD || mode != DImode)
5459 return (code == CONST_INT
5460 && INTVAL (index) < 1024
5461 && INTVAL (index) > -1024
5462 && (INTVAL (index) & 3) == 0);
5466 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5467 return (code == CONST_INT
5468 && INTVAL (index) < 1016
5469 && INTVAL (index) > -1024
5470 && (INTVAL (index) & 3) == 0);
5472 if (arm_address_register_rtx_p (index, strict_p)
5473 && (GET_MODE_SIZE (mode) <= 4))
5476 if (mode == DImode || mode == DFmode)
5478 if (code == CONST_INT)
5480 HOST_WIDE_INT val = INTVAL (index);
5481 /* ??? Can we assume ldrd for thumb2? */
5482 /* Thumb-2 ldrd only has reg+const addressing modes. */
5483 /* ldrd supports offsets of +-1020.
5484 However the ldr fallback does not. */
5485 return val > -256 && val < 256 && (val & 3) == 0;
5493 rtx xiop0 = XEXP (index, 0);
5494 rtx xiop1 = XEXP (index, 1);
5496 return ((arm_address_register_rtx_p (xiop0, strict_p)
5497 && thumb2_index_mul_operand (xiop1))
5498 || (arm_address_register_rtx_p (xiop1, strict_p)
5499 && thumb2_index_mul_operand (xiop0)));
5501 else if (code == ASHIFT)
5503 rtx op = XEXP (index, 1);
5505 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5506 && GET_CODE (op) == CONST_INT
5508 && INTVAL (op) <= 3);
5511 return (code == CONST_INT
5512 && INTVAL (index) < 4096
5513 && INTVAL (index) > -256);
5516 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5518 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5522 if (GET_CODE (x) != REG)
5528 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5530 return (regno <= LAST_LO_REGNUM
5531 || regno > LAST_VIRTUAL_REGISTER
5532 || regno == FRAME_POINTER_REGNUM
5533 || (GET_MODE_SIZE (mode) >= 4
5534 && (regno == STACK_POINTER_REGNUM
5535 || regno >= FIRST_PSEUDO_REGISTER
5536 || x == hard_frame_pointer_rtx
5537 || x == arg_pointer_rtx)));
5540 /* Return nonzero if x is a legitimate index register. This is the case
5541 for any base register that can access a QImode object. */
5543 thumb1_index_register_rtx_p (rtx x, int strict_p)
5545 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5548 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5550 The AP may be eliminated to either the SP or the FP, so we use the
5551 least common denominator, e.g. SImode, and offsets from 0 to 64.
5553 ??? Verify whether the above is the right approach.
5555 ??? Also, the FP may be eliminated to the SP, so perhaps that
5556 needs special handling also.
5558 ??? Look at how the mips16 port solves this problem. It probably uses
5559 better ways to solve some of these problems.
5561 Although it is not incorrect, we don't accept QImode and HImode
5562 addresses based on the frame pointer or arg pointer until the
5563 reload pass starts. This is so that eliminating such addresses
5564 into stack based ones won't produce impossible code. */
5566 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5568 /* ??? Not clear if this is right. Experiment. */
5569 if (GET_MODE_SIZE (mode) < 4
5570 && !(reload_in_progress || reload_completed)
5571 && (reg_mentioned_p (frame_pointer_rtx, x)
5572 || reg_mentioned_p (arg_pointer_rtx, x)
5573 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5574 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5575 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5576 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5579 /* Accept any base register. SP only in SImode or larger. */
5580 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5583 /* This is PC relative data before arm_reorg runs. */
5584 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5585 && GET_CODE (x) == SYMBOL_REF
5586 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5589 /* This is PC relative data after arm_reorg runs. */
5590 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5592 && (GET_CODE (x) == LABEL_REF
5593 || (GET_CODE (x) == CONST
5594 && GET_CODE (XEXP (x, 0)) == PLUS
5595 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5596 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5599 /* Post-inc indexing only supported for SImode and larger. */
5600 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5601 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5604 else if (GET_CODE (x) == PLUS)
5606 /* REG+REG address can be any two index registers. */
5607 /* We disallow FRAME+REG addressing since we know that FRAME
5608 will be replaced with STACK, and SP relative addressing only
5609 permits SP+OFFSET. */
5610 if (GET_MODE_SIZE (mode) <= 4
5611 && XEXP (x, 0) != frame_pointer_rtx
5612 && XEXP (x, 1) != frame_pointer_rtx
5613 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5614 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5617 /* REG+const has 5-7 bit offset for non-SP registers. */
5618 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5619 || XEXP (x, 0) == arg_pointer_rtx)
5620 && GET_CODE (XEXP (x, 1)) == CONST_INT
5621 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5624 /* REG+const has 10-bit offset for SP, but only SImode and
5625 larger is supported. */
5626 /* ??? Should probably check for DI/DFmode overflow here
5627 just like GO_IF_LEGITIMATE_OFFSET does. */
5628 else if (GET_CODE (XEXP (x, 0)) == REG
5629 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5630 && GET_MODE_SIZE (mode) >= 4
5631 && GET_CODE (XEXP (x, 1)) == CONST_INT
5632 && INTVAL (XEXP (x, 1)) >= 0
5633 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5634 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5637 else if (GET_CODE (XEXP (x, 0)) == REG
5638 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5639 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5640 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5641 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5642 && GET_MODE_SIZE (mode) >= 4
5643 && GET_CODE (XEXP (x, 1)) == CONST_INT
5644 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5648 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5649 && GET_MODE_SIZE (mode) == 4
5650 && GET_CODE (x) == SYMBOL_REF
5651 && CONSTANT_POOL_ADDRESS_P (x)
5653 && symbol_mentioned_p (get_pool_constant (x))
5654 && ! pcrel_constant_p (get_pool_constant (x))))
5660 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5661 instruction of mode MODE. */
5663 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5665 switch (GET_MODE_SIZE (mode))
5668 return val >= 0 && val < 32;
5671 return val >= 0 && val < 64 && (val & 1) == 0;
5675 && (val + GET_MODE_SIZE (mode)) <= 128
5681 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5684 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5685 else if (TARGET_THUMB2)
5686 return thumb2_legitimate_address_p (mode, x, strict_p);
5687 else /* if (TARGET_THUMB1) */
5688 return thumb1_legitimate_address_p (mode, x, strict_p);
5691 /* Build the SYMBOL_REF for __tls_get_addr. */
5693 static GTY(()) rtx tls_get_addr_libfunc;
5696 get_tls_get_addr (void)
5698 if (!tls_get_addr_libfunc)
5699 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5700 return tls_get_addr_libfunc;
5704 arm_load_tp (rtx target)
5707 target = gen_reg_rtx (SImode);
5711 /* Can return in any reg. */
5712 emit_insn (gen_load_tp_hard (target));
5716 /* Always returned in r0. Immediately copy the result into a pseudo,
5717 otherwise other uses of r0 (e.g. setting up function arguments) may
5718 clobber the value. */
5722 emit_insn (gen_load_tp_soft ());
5724 tmp = gen_rtx_REG (SImode, 0);
5725 emit_move_insn (target, tmp);
5731 load_tls_operand (rtx x, rtx reg)
5735 if (reg == NULL_RTX)
5736 reg = gen_reg_rtx (SImode);
5738 tmp = gen_rtx_CONST (SImode, x);
5740 emit_move_insn (reg, tmp);
5746 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5748 rtx insns, label, labelno, sum;
5752 labelno = GEN_INT (pic_labelno++);
5753 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5754 label = gen_rtx_CONST (VOIDmode, label);
5756 sum = gen_rtx_UNSPEC (Pmode,
5757 gen_rtvec (4, x, GEN_INT (reloc), label,
5758 GEN_INT (TARGET_ARM ? 8 : 4)),
5760 reg = load_tls_operand (sum, reg);
5763 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5764 else if (TARGET_THUMB2)
5767 /* Thumb-2 only allows very limited access to the PC. Calculate
5768 the address in a temporary register. */
5769 tmp = gen_reg_rtx (SImode);
5770 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5771 emit_insn (gen_addsi3(reg, reg, tmp));
5773 else /* TARGET_THUMB1 */
5774 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5776 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5777 Pmode, 1, reg, Pmode);
5779 insns = get_insns ();
5786 legitimize_tls_address (rtx x, rtx reg)
5788 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5789 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5793 case TLS_MODEL_GLOBAL_DYNAMIC:
5794 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5795 dest = gen_reg_rtx (Pmode);
5796 emit_libcall_block (insns, dest, ret, x);
5799 case TLS_MODEL_LOCAL_DYNAMIC:
5800 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5802 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5803 share the LDM result with other LD model accesses. */
5804 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5806 dest = gen_reg_rtx (Pmode);
5807 emit_libcall_block (insns, dest, ret, eqv);
5809 /* Load the addend. */
5810 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5812 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5813 return gen_rtx_PLUS (Pmode, dest, addend);
5815 case TLS_MODEL_INITIAL_EXEC:
5816 labelno = GEN_INT (pic_labelno++);
5817 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5818 label = gen_rtx_CONST (VOIDmode, label);
5819 sum = gen_rtx_UNSPEC (Pmode,
5820 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5821 GEN_INT (TARGET_ARM ? 8 : 4)),
5823 reg = load_tls_operand (sum, reg);
5826 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5827 else if (TARGET_THUMB2)
5830 /* Thumb-2 only allows very limited access to the PC. Calculate
5831 the address in a temporary register. */
5832 tmp = gen_reg_rtx (SImode);
5833 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5834 emit_insn (gen_addsi3(reg, reg, tmp));
5835 emit_move_insn (reg, gen_const_mem (SImode, reg));
5839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5840 emit_move_insn (reg, gen_const_mem (SImode, reg));
5843 tp = arm_load_tp (NULL_RTX);
5845 return gen_rtx_PLUS (Pmode, tp, reg);
5847 case TLS_MODEL_LOCAL_EXEC:
5848 tp = arm_load_tp (NULL_RTX);
5850 reg = gen_rtx_UNSPEC (Pmode,
5851 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5853 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5855 return gen_rtx_PLUS (Pmode, tp, reg);
5862 /* Try machine-dependent ways of modifying an illegitimate address
5863 to be legitimate. If we find one, return the new, valid address. */
5865 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5869 /* TODO: legitimize_address for Thumb2. */
5872 return thumb_legitimize_address (x, orig_x, mode);
5875 if (arm_tls_symbol_p (x))
5876 return legitimize_tls_address (x, NULL_RTX);
5878 if (GET_CODE (x) == PLUS)
5880 rtx xop0 = XEXP (x, 0);
5881 rtx xop1 = XEXP (x, 1);
5883 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5884 xop0 = force_reg (SImode, xop0);
5886 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5887 xop1 = force_reg (SImode, xop1);
5889 if (ARM_BASE_REGISTER_RTX_P (xop0)
5890 && GET_CODE (xop1) == CONST_INT)
5892 HOST_WIDE_INT n, low_n;
5896 /* VFP addressing modes actually allow greater offsets, but for
5897 now we just stick with the lowest common denominator. */
5899 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5911 low_n = ((mode) == TImode ? 0
5912 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5916 base_reg = gen_reg_rtx (SImode);
5917 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5918 emit_move_insn (base_reg, val);
5919 x = plus_constant (base_reg, low_n);
5921 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5922 x = gen_rtx_PLUS (SImode, xop0, xop1);
5925 /* XXX We don't allow MINUS any more -- see comment in
5926 arm_legitimate_address_outer_p (). */
5927 else if (GET_CODE (x) == MINUS)
5929 rtx xop0 = XEXP (x, 0);
5930 rtx xop1 = XEXP (x, 1);
5932 if (CONSTANT_P (xop0))
5933 xop0 = force_reg (SImode, xop0);
5935 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5936 xop1 = force_reg (SImode, xop1);
5938 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5939 x = gen_rtx_MINUS (SImode, xop0, xop1);
5942 /* Make sure to take full advantage of the pre-indexed addressing mode
5943 with absolute addresses which often allows for the base register to
5944 be factorized for multiple adjacent memory references, and it might
5945 even allows for the mini pool to be avoided entirely. */
5946 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5949 HOST_WIDE_INT mask, base, index;
5952 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5953 use a 8-bit index. So let's use a 12-bit index for SImode only and
5954 hope that arm_gen_constant will enable ldrb to use more bits. */
5955 bits = (mode == SImode) ? 12 : 8;
5956 mask = (1 << bits) - 1;
5957 base = INTVAL (x) & ~mask;
5958 index = INTVAL (x) & mask;
5959 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5961 /* It'll most probably be more efficient to generate the base
5962 with more bits set and use a negative index instead. */
5966 base_reg = force_reg (SImode, GEN_INT (base));
5967 x = plus_constant (base_reg, index);
5972 /* We need to find and carefully transform any SYMBOL and LABEL
5973 references; so go back to the original address expression. */
5974 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
5976 if (new_x != orig_x)
5984 /* Try machine-dependent ways of modifying an illegitimate Thumb address
5985 to be legitimate. If we find one, return the new, valid address. */
5987 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5989 if (arm_tls_symbol_p (x))
5990 return legitimize_tls_address (x, NULL_RTX);
5992 if (GET_CODE (x) == PLUS
5993 && GET_CODE (XEXP (x, 1)) == CONST_INT
5994 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
5995 || INTVAL (XEXP (x, 1)) < 0))
5997 rtx xop0 = XEXP (x, 0);
5998 rtx xop1 = XEXP (x, 1);
5999 HOST_WIDE_INT offset = INTVAL (xop1);
6001 /* Try and fold the offset into a biasing of the base register and
6002 then offsetting that. Don't do this when optimizing for space
6003 since it can cause too many CSEs. */
6004 if (optimize_size && offset >= 0
6005 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6007 HOST_WIDE_INT delta;
6010 delta = offset - (256 - GET_MODE_SIZE (mode));
6011 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6012 delta = 31 * GET_MODE_SIZE (mode);
6014 delta = offset & (~31 * GET_MODE_SIZE (mode));
6016 xop0 = force_operand (plus_constant (xop0, offset - delta),
6018 x = plus_constant (xop0, delta);
6020 else if (offset < 0 && offset > -256)
6021 /* Small negative offsets are best done with a subtract before the
6022 dereference, forcing these into a register normally takes two
6024 x = force_operand (x, NULL_RTX);
6027 /* For the remaining cases, force the constant into a register. */
6028 xop1 = force_reg (SImode, xop1);
6029 x = gen_rtx_PLUS (SImode, xop0, xop1);
6032 else if (GET_CODE (x) == PLUS
6033 && s_register_operand (XEXP (x, 1), SImode)
6034 && !s_register_operand (XEXP (x, 0), SImode))
6036 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6038 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6043 /* We need to find and carefully transform any SYMBOL and LABEL
6044 references; so go back to the original address expression. */
6045 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6047 if (new_x != orig_x)
6055 thumb_legitimize_reload_address (rtx *x_p,
6056 enum machine_mode mode,
6057 int opnum, int type,
6058 int ind_levels ATTRIBUTE_UNUSED)
6062 if (GET_CODE (x) == PLUS
6063 && GET_MODE_SIZE (mode) < 4
6064 && REG_P (XEXP (x, 0))
6065 && XEXP (x, 0) == stack_pointer_rtx
6066 && GET_CODE (XEXP (x, 1)) == CONST_INT
6067 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6072 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6073 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6077 /* If both registers are hi-regs, then it's better to reload the
6078 entire expression rather than each register individually. That
6079 only requires one reload register rather than two. */
6080 if (GET_CODE (x) == PLUS
6081 && REG_P (XEXP (x, 0))
6082 && REG_P (XEXP (x, 1))
6083 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6084 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6089 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6090 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6097 /* Test for various thread-local symbols. */
6099 /* Return TRUE if X is a thread-local symbol. */
6102 arm_tls_symbol_p (rtx x)
6104 if (! TARGET_HAVE_TLS)
6107 if (GET_CODE (x) != SYMBOL_REF)
6110 return SYMBOL_REF_TLS_MODEL (x) != 0;
6113 /* Helper for arm_tls_referenced_p. */
6116 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6118 if (GET_CODE (*x) == SYMBOL_REF)
6119 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6121 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6122 TLS offsets, not real symbol references. */
6123 if (GET_CODE (*x) == UNSPEC
6124 && XINT (*x, 1) == UNSPEC_TLS)
6130 /* Return TRUE if X contains any TLS symbol references. */
6133 arm_tls_referenced_p (rtx x)
6135 if (! TARGET_HAVE_TLS)
6138 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6141 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6144 arm_cannot_force_const_mem (rtx x)
6148 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6150 split_const (x, &base, &offset);
6151 if (GET_CODE (base) == SYMBOL_REF
6152 && !offset_within_block_p (base, INTVAL (offset)))
6155 return arm_tls_referenced_p (x);
6158 #define REG_OR_SUBREG_REG(X) \
6159 (GET_CODE (X) == REG \
6160 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6162 #define REG_OR_SUBREG_RTX(X) \
6163 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6165 #ifndef COSTS_N_INSNS
6166 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6169 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6171 enum machine_mode mode = GET_MODE (x);
6184 return COSTS_N_INSNS (1);
6187 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6190 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6197 return COSTS_N_INSNS (2) + cycles;
6199 return COSTS_N_INSNS (1) + 16;
6202 return (COSTS_N_INSNS (1)
6203 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6204 + GET_CODE (SET_DEST (x)) == MEM));
6209 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6211 if (thumb_shiftable_const (INTVAL (x)))
6212 return COSTS_N_INSNS (2);
6213 return COSTS_N_INSNS (3);
6215 else if ((outer == PLUS || outer == COMPARE)
6216 && INTVAL (x) < 256 && INTVAL (x) > -256)
6218 else if (outer == AND
6219 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6220 return COSTS_N_INSNS (1);
6221 else if (outer == ASHIFT || outer == ASHIFTRT
6222 || outer == LSHIFTRT)
6224 return COSTS_N_INSNS (2);
6230 return COSTS_N_INSNS (3);
6248 /* XXX another guess. */
6249 /* Memory costs quite a lot for the first word, but subsequent words
6250 load at the equivalent of a single insn each. */
6251 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6252 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6257 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6262 /* XXX still guessing. */
6263 switch (GET_MODE (XEXP (x, 0)))
6266 return (1 + (mode == DImode ? 4 : 0)
6267 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6270 return (4 + (mode == DImode ? 4 : 0)
6271 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6274 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6286 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6288 enum machine_mode mode = GET_MODE (x);
6289 enum rtx_code subcode;
6291 enum rtx_code code = GET_CODE (x);
6298 /* Memory costs quite a lot for the first word, but subsequent words
6299 load at the equivalent of a single insn each. */
6300 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6307 if (TARGET_HARD_FLOAT && mode == SFmode)
6308 *total = COSTS_N_INSNS (2);
6309 else if (TARGET_HARD_FLOAT && mode == DFmode)
6310 *total = COSTS_N_INSNS (4);
6312 *total = COSTS_N_INSNS (20);
6316 if (GET_CODE (XEXP (x, 1)) == REG)
6317 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6318 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6319 *total = rtx_cost (XEXP (x, 1), code, speed);
6325 *total += COSTS_N_INSNS (4);
6330 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6331 *total += rtx_cost (XEXP (x, 0), code, speed);
6334 *total += COSTS_N_INSNS (3);
6338 *total += COSTS_N_INSNS (1);
6339 /* Increase the cost of complex shifts because they aren't any faster,
6340 and reduce dual issue opportunities. */
6341 if (arm_tune_cortex_a9
6342 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6350 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6352 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6353 *total = COSTS_N_INSNS (1);
6355 *total = COSTS_N_INSNS (20);
6358 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6359 /* Thumb2 does not have RSB, so all arguments must be
6360 registers (subtracting a constant is canonicalized as
6361 addition of the negated constant). */
6367 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6368 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6369 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6371 *total += rtx_cost (XEXP (x, 1), code, speed);
6375 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6376 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6378 *total += rtx_cost (XEXP (x, 0), code, speed);
6385 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6387 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6389 *total = COSTS_N_INSNS (1);
6390 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6391 && arm_const_double_rtx (XEXP (x, 0)))
6393 *total += rtx_cost (XEXP (x, 1), code, speed);
6397 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6398 && arm_const_double_rtx (XEXP (x, 1)))
6400 *total += rtx_cost (XEXP (x, 0), code, speed);
6406 *total = COSTS_N_INSNS (20);
6410 *total = COSTS_N_INSNS (1);
6411 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6412 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6414 *total += rtx_cost (XEXP (x, 1), code, speed);
6418 subcode = GET_CODE (XEXP (x, 1));
6419 if (subcode == ASHIFT || subcode == ASHIFTRT
6420 || subcode == LSHIFTRT
6421 || subcode == ROTATE || subcode == ROTATERT)
6423 *total += rtx_cost (XEXP (x, 0), code, speed);
6424 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6428 /* A shift as a part of RSB costs no more than RSB itself. */
6429 if (GET_CODE (XEXP (x, 0)) == MULT
6430 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6433 *total += rtx_cost (XEXP (x, 1), code, speed);
6438 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6440 *total += rtx_cost (XEXP (x, 0), code, speed);
6441 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6445 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6446 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6448 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6449 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6450 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6451 *total += COSTS_N_INSNS (1);
6459 if (code == PLUS && arm_arch6 && mode == SImode
6460 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6461 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6463 *total = COSTS_N_INSNS (1);
6464 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6466 *total += rtx_cost (XEXP (x, 1), code, speed);
6470 /* MLA: All arguments must be registers. We filter out
6471 multiplication by a power of two, so that we fall down into
6473 if (GET_CODE (XEXP (x, 0)) == MULT
6474 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6476 /* The cost comes from the cost of the multiply. */
6480 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6482 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6484 *total = COSTS_N_INSNS (1);
6485 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6486 && arm_const_double_rtx (XEXP (x, 1)))
6488 *total += rtx_cost (XEXP (x, 0), code, speed);
6495 *total = COSTS_N_INSNS (20);
6499 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6500 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6502 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6503 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6504 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6505 *total += COSTS_N_INSNS (1);
6511 case AND: case XOR: case IOR:
6514 /* Normally the frame registers will be spilt into reg+const during
6515 reload, so it is a bad idea to combine them with other instructions,
6516 since then they might not be moved outside of loops. As a compromise
6517 we allow integration with ops that have a constant as their second
6519 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6520 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6521 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6522 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6523 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6528 *total += COSTS_N_INSNS (2);
6529 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6530 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6532 *total += rtx_cost (XEXP (x, 0), code, speed);
6539 *total += COSTS_N_INSNS (1);
6540 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6541 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6543 *total += rtx_cost (XEXP (x, 0), code, speed);
6546 subcode = GET_CODE (XEXP (x, 0));
6547 if (subcode == ASHIFT || subcode == ASHIFTRT
6548 || subcode == LSHIFTRT
6549 || subcode == ROTATE || subcode == ROTATERT)
6551 *total += rtx_cost (XEXP (x, 1), code, speed);
6552 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6557 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6559 *total += rtx_cost (XEXP (x, 1), code, speed);
6560 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6564 if (subcode == UMIN || subcode == UMAX
6565 || subcode == SMIN || subcode == SMAX)
6567 *total = COSTS_N_INSNS (3);
6574 /* This should have been handled by the CPU specific routines. */
6578 if (arm_arch3m && mode == SImode
6579 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6580 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6581 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6582 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6583 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6584 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6586 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6589 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6593 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6595 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6597 *total = COSTS_N_INSNS (1);
6600 *total = COSTS_N_INSNS (2);
6606 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6607 if (mode == SImode && code == NOT)
6609 subcode = GET_CODE (XEXP (x, 0));
6610 if (subcode == ASHIFT || subcode == ASHIFTRT
6611 || subcode == LSHIFTRT
6612 || subcode == ROTATE || subcode == ROTATERT
6614 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6616 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6617 /* Register shifts cost an extra cycle. */
6618 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6619 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6628 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6630 *total = COSTS_N_INSNS (4);
6634 operand = XEXP (x, 0);
6636 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6637 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6638 && GET_CODE (XEXP (operand, 0)) == REG
6639 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6640 *total += COSTS_N_INSNS (1);
6641 *total += (rtx_cost (XEXP (x, 1), code, speed)
6642 + rtx_cost (XEXP (x, 2), code, speed));
6646 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6648 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6654 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6655 && mode == SImode && XEXP (x, 1) == const0_rtx)
6657 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6663 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6664 && mode == SImode && XEXP (x, 1) == const0_rtx)
6666 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6686 /* SCC insns. In the case where the comparison has already been
6687 performed, then they cost 2 instructions. Otherwise they need
6688 an additional comparison before them. */
6689 *total = COSTS_N_INSNS (2);
6690 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6697 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6703 *total += COSTS_N_INSNS (1);
6704 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6705 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6707 *total += rtx_cost (XEXP (x, 0), code, speed);
6711 subcode = GET_CODE (XEXP (x, 0));
6712 if (subcode == ASHIFT || subcode == ASHIFTRT
6713 || subcode == LSHIFTRT
6714 || subcode == ROTATE || subcode == ROTATERT)
6716 *total += rtx_cost (XEXP (x, 1), code, speed);
6717 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6722 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6724 *total += rtx_cost (XEXP (x, 1), code, speed);
6725 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6735 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6736 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6737 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6738 *total += rtx_cost (XEXP (x, 1), code, speed);
6742 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6744 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6746 *total = COSTS_N_INSNS (1);
6749 *total = COSTS_N_INSNS (20);
6752 *total = COSTS_N_INSNS (1);
6754 *total += COSTS_N_INSNS (3);
6758 if (GET_MODE_CLASS (mode) == MODE_INT)
6762 *total += COSTS_N_INSNS (1);
6764 if (GET_MODE (XEXP (x, 0)) != SImode)
6768 if (GET_CODE (XEXP (x, 0)) != MEM)
6769 *total += COSTS_N_INSNS (1);
6771 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6772 *total += COSTS_N_INSNS (2);
6781 if (GET_MODE_CLASS (mode) == MODE_INT)
6784 *total += COSTS_N_INSNS (1);
6786 if (GET_MODE (XEXP (x, 0)) != SImode)
6790 if (GET_CODE (XEXP (x, 0)) != MEM)
6791 *total += COSTS_N_INSNS (1);
6793 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6794 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6801 switch (GET_MODE (XEXP (x, 0)))
6808 *total = COSTS_N_INSNS (1);
6818 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6822 if (const_ok_for_arm (INTVAL (x))
6823 || const_ok_for_arm (~INTVAL (x)))
6824 *total = COSTS_N_INSNS (1);
6826 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6827 INTVAL (x), NULL_RTX,
6834 *total = COSTS_N_INSNS (3);
6838 *total = COSTS_N_INSNS (1);
6842 *total = COSTS_N_INSNS (1);
6843 *total += rtx_cost (XEXP (x, 0), code, speed);
6847 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
6848 *total = COSTS_N_INSNS (1);
6850 *total = COSTS_N_INSNS (4);
6854 *total = COSTS_N_INSNS (4);
6859 /* RTX costs when optimizing for size. */
6861 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6864 enum machine_mode mode = GET_MODE (x);
6867 /* XXX TBD. For now, use the standard costs. */
6868 *total = thumb1_rtx_costs (x, code, outer_code);
6872 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6876 /* A memory access costs 1 insn if the mode is small, or the address is
6877 a single register, otherwise it costs one insn per word. */
6878 if (REG_P (XEXP (x, 0)))
6879 *total = COSTS_N_INSNS (1);
6881 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6888 /* Needs a libcall, so it costs about this. */
6889 *total = COSTS_N_INSNS (2);
6893 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6895 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6903 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6905 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6908 else if (mode == SImode)
6910 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6911 /* Slightly disparage register shifts, but not by much. */
6912 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6913 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6917 /* Needs a libcall. */
6918 *total = COSTS_N_INSNS (2);
6922 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6924 *total = COSTS_N_INSNS (1);
6930 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6931 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6933 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6934 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6935 || subcode1 == ROTATE || subcode1 == ROTATERT
6936 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6937 || subcode1 == ASHIFTRT)
6939 /* It's just the cost of the two operands. */
6944 *total = COSTS_N_INSNS (1);
6948 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6952 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6954 *total = COSTS_N_INSNS (1);
6958 /* A shift as a part of ADD costs nothing. */
6959 if (GET_CODE (XEXP (x, 0)) == MULT
6960 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6962 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
6963 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
6964 *total += rtx_cost (XEXP (x, 1), code, false);
6969 case AND: case XOR: case IOR:
6972 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
6974 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
6975 || subcode == LSHIFTRT || subcode == ASHIFTRT
6976 || (code == AND && subcode == NOT))
6978 /* It's just the cost of the two operands. */
6984 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6988 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6992 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
6994 *total = COSTS_N_INSNS (1);
7000 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7009 if (cc_register (XEXP (x, 0), VOIDmode))
7012 *total = COSTS_N_INSNS (1);
7016 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
7017 *total = COSTS_N_INSNS (1);
7019 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7024 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7026 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7027 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7030 *total += COSTS_N_INSNS (1);
7035 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7037 switch (GET_MODE (XEXP (x, 0)))
7040 *total += COSTS_N_INSNS (1);
7044 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7050 *total += COSTS_N_INSNS (2);
7055 *total += COSTS_N_INSNS (1);
7060 if (const_ok_for_arm (INTVAL (x)))
7061 /* A multiplication by a constant requires another instruction
7062 to load the constant to a register. */
7063 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7065 else if (const_ok_for_arm (~INTVAL (x)))
7066 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7067 else if (const_ok_for_arm (-INTVAL (x)))
7069 if (outer_code == COMPARE || outer_code == PLUS
7070 || outer_code == MINUS)
7073 *total = COSTS_N_INSNS (1);
7076 *total = COSTS_N_INSNS (2);
7082 *total = COSTS_N_INSNS (2);
7086 *total = COSTS_N_INSNS (4);
7091 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7092 cost of these slightly. */
7093 *total = COSTS_N_INSNS (1) + 1;
7097 if (mode != VOIDmode)
7098 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7100 *total = COSTS_N_INSNS (4); /* How knows? */
7105 /* RTX costs when optimizing for size. */
7107 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7111 return arm_size_rtx_costs (x, (enum rtx_code) code,
7112 (enum rtx_code) outer_code, total);
7114 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7115 (enum rtx_code) outer_code,
7119 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7120 supported on any "slowmul" cores, so it can be ignored. */
7123 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7124 int *total, bool speed)
7126 enum machine_mode mode = GET_MODE (x);
7130 *total = thumb1_rtx_costs (x, code, outer_code);
7137 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7140 *total = COSTS_N_INSNS (20);
7144 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7146 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7147 & (unsigned HOST_WIDE_INT) 0xffffffff);
7148 int cost, const_ok = const_ok_for_arm (i);
7149 int j, booth_unit_size;
7151 /* Tune as appropriate. */
7152 cost = const_ok ? 4 : 8;
7153 booth_unit_size = 2;
7154 for (j = 0; i && j < 32; j += booth_unit_size)
7156 i >>= booth_unit_size;
7160 *total = COSTS_N_INSNS (cost);
7161 *total += rtx_cost (XEXP (x, 0), code, speed);
7165 *total = COSTS_N_INSNS (20);
7169 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7174 /* RTX cost for cores with a fast multiply unit (M variants). */
7177 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7178 int *total, bool speed)
7180 enum machine_mode mode = GET_MODE (x);
7184 *total = thumb1_rtx_costs (x, code, outer_code);
7188 /* ??? should thumb2 use different costs? */
7192 /* There is no point basing this on the tuning, since it is always the
7193 fast variant if it exists at all. */
7195 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7196 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7197 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7199 *total = COSTS_N_INSNS(2);
7206 *total = COSTS_N_INSNS (5);
7210 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7212 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7213 & (unsigned HOST_WIDE_INT) 0xffffffff);
7214 int cost, const_ok = const_ok_for_arm (i);
7215 int j, booth_unit_size;
7217 /* Tune as appropriate. */
7218 cost = const_ok ? 4 : 8;
7219 booth_unit_size = 8;
7220 for (j = 0; i && j < 32; j += booth_unit_size)
7222 i >>= booth_unit_size;
7226 *total = COSTS_N_INSNS(cost);
7232 *total = COSTS_N_INSNS (4);
7236 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7238 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7240 *total = COSTS_N_INSNS (1);
7245 /* Requires a lib call */
7246 *total = COSTS_N_INSNS (20);
7250 return arm_rtx_costs_1 (x, outer_code, total, speed);
7255 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7256 so it can be ignored. */
7259 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7261 enum machine_mode mode = GET_MODE (x);
7265 *total = thumb1_rtx_costs (x, code, outer_code);
7272 if (GET_CODE (XEXP (x, 0)) != MULT)
7273 return arm_rtx_costs_1 (x, outer_code, total, speed);
7275 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7276 will stall until the multiplication is complete. */
7277 *total = COSTS_N_INSNS (3);
7281 /* There is no point basing this on the tuning, since it is always the
7282 fast variant if it exists at all. */
7284 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7285 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7286 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7288 *total = COSTS_N_INSNS (2);
7295 *total = COSTS_N_INSNS (5);
7299 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7301 /* If operand 1 is a constant we can more accurately
7302 calculate the cost of the multiply. The multiplier can
7303 retire 15 bits on the first cycle and a further 12 on the
7304 second. We do, of course, have to load the constant into
7305 a register first. */
7306 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7307 /* There's a general overhead of one cycle. */
7309 unsigned HOST_WIDE_INT masked_const;
7314 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7316 masked_const = i & 0xffff8000;
7317 if (masked_const != 0)
7320 masked_const = i & 0xf8000000;
7321 if (masked_const != 0)
7324 *total = COSTS_N_INSNS (cost);
7330 *total = COSTS_N_INSNS (3);
7334 /* Requires a lib call */
7335 *total = COSTS_N_INSNS (20);
7339 return arm_rtx_costs_1 (x, outer_code, total, speed);
7344 /* RTX costs for 9e (and later) cores. */
7347 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7348 int *total, bool speed)
7350 enum machine_mode mode = GET_MODE (x);
7357 *total = COSTS_N_INSNS (3);
7361 *total = thumb1_rtx_costs (x, code, outer_code);
7369 /* There is no point basing this on the tuning, since it is always the
7370 fast variant if it exists at all. */
7372 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7373 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7374 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7376 *total = COSTS_N_INSNS (2);
7383 *total = COSTS_N_INSNS (5);
7389 *total = COSTS_N_INSNS (2);
7393 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7395 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
7397 *total = COSTS_N_INSNS (1);
7402 *total = COSTS_N_INSNS (20);
7406 return arm_rtx_costs_1 (x, outer_code, total, speed);
7409 /* All address computations that can be done are free, but rtx cost returns
7410 the same for practically all of them. So we weight the different types
7411 of address here in the order (most pref first):
7412 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7414 arm_arm_address_cost (rtx x)
7416 enum rtx_code c = GET_CODE (x);
7418 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7420 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7425 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7428 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7438 arm_thumb_address_cost (rtx x)
7440 enum rtx_code c = GET_CODE (x);
7445 && GET_CODE (XEXP (x, 0)) == REG
7446 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7453 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7455 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7459 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7463 /* Some true dependencies can have a higher cost depending
7464 on precisely how certain input operands are used. */
7466 && REG_NOTE_KIND (link) == 0
7467 && recog_memoized (insn) >= 0
7468 && recog_memoized (dep) >= 0)
7470 int shift_opnum = get_attr_shift (insn);
7471 enum attr_type attr_type = get_attr_type (dep);
7473 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7474 operand for INSN. If we have a shifted input operand and the
7475 instruction we depend on is another ALU instruction, then we may
7476 have to account for an additional stall. */
7477 if (shift_opnum != 0
7478 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7480 rtx shifted_operand;
7483 /* Get the shifted operand. */
7484 extract_insn (insn);
7485 shifted_operand = recog_data.operand[shift_opnum];
7487 /* Iterate over all the operands in DEP. If we write an operand
7488 that overlaps with SHIFTED_OPERAND, then we have increase the
7489 cost of this dependency. */
7491 preprocess_constraints ();
7492 for (opno = 0; opno < recog_data.n_operands; opno++)
7494 /* We can ignore strict inputs. */
7495 if (recog_data.operand_type[opno] == OP_IN)
7498 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7505 /* XXX This is not strictly true for the FPA. */
7506 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7507 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7510 /* Call insns don't incur a stall, even if they follow a load. */
7511 if (REG_NOTE_KIND (link) == 0
7512 && GET_CODE (insn) == CALL_INSN)
7515 if ((i_pat = single_set (insn)) != NULL
7516 && GET_CODE (SET_SRC (i_pat)) == MEM
7517 && (d_pat = single_set (dep)) != NULL
7518 && GET_CODE (SET_DEST (d_pat)) == MEM)
7520 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7521 /* This is a load after a store, there is no conflict if the load reads
7522 from a cached area. Assume that loads from the stack, and from the
7523 constant pool are cached, and that others will miss. This is a
7526 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7527 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7528 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7529 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7536 static int fp_consts_inited = 0;
7538 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7539 static const char * const strings_fp[8] =
7542 "4", "5", "0.5", "10"
7545 static REAL_VALUE_TYPE values_fp[8];
7548 init_fp_table (void)
7554 fp_consts_inited = 1;
7556 fp_consts_inited = 8;
7558 for (i = 0; i < fp_consts_inited; i++)
7560 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7565 /* Return TRUE if rtx X is a valid immediate FP constant. */
7567 arm_const_double_rtx (rtx x)
7572 if (!fp_consts_inited)
7575 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7576 if (REAL_VALUE_MINUS_ZERO (r))
7579 for (i = 0; i < fp_consts_inited; i++)
7580 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7586 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7588 neg_const_double_rtx_ok_for_fpa (rtx x)
7593 if (!fp_consts_inited)
7596 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7597 r = REAL_VALUE_NEGATE (r);
7598 if (REAL_VALUE_MINUS_ZERO (r))
7601 for (i = 0; i < 8; i++)
7602 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7609 /* VFPv3 has a fairly wide range of representable immediates, formed from
7610 "quarter-precision" floating-point values. These can be evaluated using this
7611 formula (with ^ for exponentiation):
7615 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7616 16 <= n <= 31 and 0 <= r <= 7.
7618 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7620 - A (most-significant) is the sign bit.
7621 - BCD are the exponent (encoded as r XOR 3).
7622 - EFGH are the mantissa (encoded as n - 16).
7625 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7626 fconst[sd] instruction, or -1 if X isn't suitable. */
7628 vfp3_const_double_index (rtx x)
7630 REAL_VALUE_TYPE r, m;
7632 unsigned HOST_WIDE_INT mantissa, mant_hi;
7633 unsigned HOST_WIDE_INT mask;
7634 HOST_WIDE_INT m1, m2;
7635 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7637 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7640 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7642 /* We can't represent these things, so detect them first. */
7643 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7646 /* Extract sign, exponent and mantissa. */
7647 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7648 r = REAL_VALUE_ABS (r);
7649 exponent = REAL_EXP (&r);
7650 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7651 highest (sign) bit, with a fixed binary point at bit point_pos.
7652 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7653 bits for the mantissa, this may fail (low bits would be lost). */
7654 real_ldexp (&m, &r, point_pos - exponent);
7655 REAL_VALUE_TO_INT (&m1, &m2, m);
7659 /* If there are bits set in the low part of the mantissa, we can't
7660 represent this value. */
7664 /* Now make it so that mantissa contains the most-significant bits, and move
7665 the point_pos to indicate that the least-significant bits have been
7667 point_pos -= HOST_BITS_PER_WIDE_INT;
7670 /* We can permit four significant bits of mantissa only, plus a high bit
7671 which is always 1. */
7672 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7673 if ((mantissa & mask) != 0)
7676 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7677 mantissa >>= point_pos - 5;
7679 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7680 floating-point immediate zero with Neon using an integer-zero load, but
7681 that case is handled elsewhere.) */
7685 gcc_assert (mantissa >= 16 && mantissa <= 31);
7687 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7688 normalized significands are in the range [1, 2). (Our mantissa is shifted
7689 left 4 places at this point relative to normalized IEEE754 values). GCC
7690 internally uses [0.5, 1) (see real.c), so the exponent returned from
7691 REAL_EXP must be altered. */
7692 exponent = 5 - exponent;
7694 if (exponent < 0 || exponent > 7)
7697 /* Sign, mantissa and exponent are now in the correct form to plug into the
7698 formula described in the comment above. */
7699 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7702 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7704 vfp3_const_double_rtx (rtx x)
7709 return vfp3_const_double_index (x) != -1;
7712 /* Recognize immediates which can be used in various Neon instructions. Legal
7713 immediates are described by the following table (for VMVN variants, the
7714 bitwise inverse of the constant shown is recognized. In either case, VMOV
7715 is output and the correct instruction to use for a given constant is chosen
7716 by the assembler). The constant shown is replicated across all elements of
7717 the destination vector.
7719 insn elems variant constant (binary)
7720 ---- ----- ------- -----------------
7721 vmov i32 0 00000000 00000000 00000000 abcdefgh
7722 vmov i32 1 00000000 00000000 abcdefgh 00000000
7723 vmov i32 2 00000000 abcdefgh 00000000 00000000
7724 vmov i32 3 abcdefgh 00000000 00000000 00000000
7725 vmov i16 4 00000000 abcdefgh
7726 vmov i16 5 abcdefgh 00000000
7727 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7728 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7729 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7730 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7731 vmvn i16 10 00000000 abcdefgh
7732 vmvn i16 11 abcdefgh 00000000
7733 vmov i32 12 00000000 00000000 abcdefgh 11111111
7734 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7735 vmov i32 14 00000000 abcdefgh 11111111 11111111
7736 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7738 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7739 eeeeeeee ffffffff gggggggg hhhhhhhh
7740 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7742 For case 18, B = !b. Representable values are exactly those accepted by
7743 vfp3_const_double_index, but are output as floating-point numbers rather
7746 Variants 0-5 (inclusive) may also be used as immediates for the second
7747 operand of VORR/VBIC instructions.
7749 The INVERSE argument causes the bitwise inverse of the given operand to be
7750 recognized instead (used for recognizing legal immediates for the VAND/VORN
7751 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7752 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7753 output, rather than the real insns vbic/vorr).
7755 INVERSE makes no difference to the recognition of float vectors.
7757 The return value is the variant of immediate as shown in the above table, or
7758 -1 if the given value doesn't match any of the listed patterns.
7761 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7762 rtx *modconst, int *elementwidth)
7764 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7766 for (i = 0; i < idx; i += (STRIDE)) \
7771 immtype = (CLASS); \
7772 elsize = (ELSIZE); \
7776 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7777 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7778 unsigned char bytes[16];
7779 int immtype = -1, matches;
7780 unsigned int invmask = inverse ? 0xff : 0;
7782 /* Vectors of float constants. */
7783 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7785 rtx el0 = CONST_VECTOR_ELT (op, 0);
7788 if (!vfp3_const_double_rtx (el0))
7791 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7793 for (i = 1; i < n_elts; i++)
7795 rtx elt = CONST_VECTOR_ELT (op, i);
7798 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7800 if (!REAL_VALUES_EQUAL (r0, re))
7805 *modconst = CONST_VECTOR_ELT (op, 0);
7813 /* Splat vector constant out into a byte vector. */
7814 for (i = 0; i < n_elts; i++)
7816 rtx el = CONST_VECTOR_ELT (op, i);
7817 unsigned HOST_WIDE_INT elpart;
7818 unsigned int part, parts;
7820 if (GET_CODE (el) == CONST_INT)
7822 elpart = INTVAL (el);
7825 else if (GET_CODE (el) == CONST_DOUBLE)
7827 elpart = CONST_DOUBLE_LOW (el);
7833 for (part = 0; part < parts; part++)
7836 for (byte = 0; byte < innersize; byte++)
7838 bytes[idx++] = (elpart & 0xff) ^ invmask;
7839 elpart >>= BITS_PER_UNIT;
7841 if (GET_CODE (el) == CONST_DOUBLE)
7842 elpart = CONST_DOUBLE_HIGH (el);
7847 gcc_assert (idx == GET_MODE_SIZE (mode));
7851 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7852 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7854 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7855 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7857 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7858 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7860 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7861 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7863 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7865 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7867 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7868 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7870 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7871 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7873 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7874 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7876 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7877 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7879 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7881 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7883 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7884 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7886 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7887 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7889 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7890 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7892 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7893 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7895 CHECK (1, 8, 16, bytes[i] == bytes[0]);
7897 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7898 && bytes[i] == bytes[(i + 8) % idx]);
7906 *elementwidth = elsize;
7910 unsigned HOST_WIDE_INT imm = 0;
7912 /* Un-invert bytes of recognized vector, if necessary. */
7914 for (i = 0; i < idx; i++)
7915 bytes[i] ^= invmask;
7919 /* FIXME: Broken on 32-bit H_W_I hosts. */
7920 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7922 for (i = 0; i < 8; i++)
7923 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7924 << (i * BITS_PER_UNIT);
7926 *modconst = GEN_INT (imm);
7930 unsigned HOST_WIDE_INT imm = 0;
7932 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7933 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7935 *modconst = GEN_INT (imm);
7943 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
7944 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
7945 float elements), and a modified constant (whatever should be output for a
7946 VMOV) in *MODCONST. */
7949 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
7950 rtx *modconst, int *elementwidth)
7954 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
7960 *modconst = tmpconst;
7963 *elementwidth = tmpwidth;
7968 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
7969 the immediate is valid, write a constant suitable for using as an operand
7970 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
7971 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
7974 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
7975 rtx *modconst, int *elementwidth)
7979 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
7981 if (retval < 0 || retval > 5)
7985 *modconst = tmpconst;
7988 *elementwidth = tmpwidth;
7993 /* Return a string suitable for output of Neon immediate logic operation
7997 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
7998 int inverse, int quad)
8000 int width, is_valid;
8001 static char templ[40];
8003 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8005 gcc_assert (is_valid != 0);
8008 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8010 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8015 /* Output a sequence of pairwise operations to implement a reduction.
8016 NOTE: We do "too much work" here, because pairwise operations work on two
8017 registers-worth of operands in one go. Unfortunately we can't exploit those
8018 extra calculations to do the full operation in fewer steps, I don't think.
8019 Although all vector elements of the result but the first are ignored, we
8020 actually calculate the same result in each of the elements. An alternative
8021 such as initially loading a vector with zero to use as each of the second
8022 operands would use up an additional register and take an extra instruction,
8023 for no particular gain. */
8026 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8027 rtx (*reduc) (rtx, rtx, rtx))
8029 enum machine_mode inner = GET_MODE_INNER (mode);
8030 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8033 for (i = parts / 2; i >= 1; i /= 2)
8035 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8036 emit_insn (reduc (dest, tmpsum, tmpsum));
8041 /* Initialize a vector with non-constant elements. FIXME: We can do better
8042 than the current implementation (building a vector on the stack and then
8043 loading it) in many cases. See rs6000.c. */
8046 neon_expand_vector_init (rtx target, rtx vals)
8048 enum machine_mode mode = GET_MODE (target);
8049 enum machine_mode inner = GET_MODE_INNER (mode);
8050 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
8053 gcc_assert (VECTOR_MODE_P (mode));
8055 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8056 for (i = 0; i < n_elts; i++)
8057 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
8058 XVECEXP (vals, 0, i));
8060 emit_move_insn (target, mem);
8063 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8064 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8065 reported source locations are bogus. */
8068 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8073 gcc_assert (GET_CODE (operand) == CONST_INT);
8075 lane = INTVAL (operand);
8077 if (lane < low || lane >= high)
8081 /* Bounds-check lanes. */
8084 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8086 bounds_check (operand, low, high, "lane out of range");
8089 /* Bounds-check constants. */
8092 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8094 bounds_check (operand, low, high, "constant out of range");
8098 neon_element_bits (enum machine_mode mode)
8101 return GET_MODE_BITSIZE (mode);
8103 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8107 /* Predicates for `match_operand' and `match_operator'. */
8109 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8111 cirrus_memory_offset (rtx op)
8113 /* Reject eliminable registers. */
8114 if (! (reload_in_progress || reload_completed)
8115 && ( reg_mentioned_p (frame_pointer_rtx, op)
8116 || reg_mentioned_p (arg_pointer_rtx, op)
8117 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8118 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8119 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8120 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8123 if (GET_CODE (op) == MEM)
8129 /* Match: (mem (reg)). */
8130 if (GET_CODE (ind) == REG)
8136 if (GET_CODE (ind) == PLUS
8137 && GET_CODE (XEXP (ind, 0)) == REG
8138 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8139 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8146 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8147 WB is true if full writeback address modes are allowed and is false
8148 if limited writeback address modes (POST_INC and PRE_DEC) are
8152 arm_coproc_mem_operand (rtx op, bool wb)
8156 /* Reject eliminable registers. */
8157 if (! (reload_in_progress || reload_completed)
8158 && ( reg_mentioned_p (frame_pointer_rtx, op)
8159 || reg_mentioned_p (arg_pointer_rtx, op)
8160 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8161 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8162 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8163 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8166 /* Constants are converted into offsets from labels. */
8167 if (GET_CODE (op) != MEM)
8172 if (reload_completed
8173 && (GET_CODE (ind) == LABEL_REF
8174 || (GET_CODE (ind) == CONST
8175 && GET_CODE (XEXP (ind, 0)) == PLUS
8176 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8177 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8180 /* Match: (mem (reg)). */
8181 if (GET_CODE (ind) == REG)
8182 return arm_address_register_rtx_p (ind, 0);
8184 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8185 acceptable in any case (subject to verification by
8186 arm_address_register_rtx_p). We need WB to be true to accept
8187 PRE_INC and POST_DEC. */
8188 if (GET_CODE (ind) == POST_INC
8189 || GET_CODE (ind) == PRE_DEC
8191 && (GET_CODE (ind) == PRE_INC
8192 || GET_CODE (ind) == POST_DEC)))
8193 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8196 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8197 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8198 && GET_CODE (XEXP (ind, 1)) == PLUS
8199 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8200 ind = XEXP (ind, 1);
8205 if (GET_CODE (ind) == PLUS
8206 && GET_CODE (XEXP (ind, 0)) == REG
8207 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8208 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8209 && INTVAL (XEXP (ind, 1)) > -1024
8210 && INTVAL (XEXP (ind, 1)) < 1024
8211 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8217 /* Return TRUE if OP is a memory operand which we can load or store a vector
8218 to/from. TYPE is one of the following values:
8219 0 - Vector load/stor (vldr)
8220 1 - Core registers (ldm)
8221 2 - Element/structure loads (vld1)
8224 neon_vector_mem_operand (rtx op, int type)
8228 /* Reject eliminable registers. */
8229 if (! (reload_in_progress || reload_completed)
8230 && ( reg_mentioned_p (frame_pointer_rtx, op)
8231 || reg_mentioned_p (arg_pointer_rtx, op)
8232 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8233 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8234 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8235 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8238 /* Constants are converted into offsets from labels. */
8239 if (GET_CODE (op) != MEM)
8244 if (reload_completed
8245 && (GET_CODE (ind) == LABEL_REF
8246 || (GET_CODE (ind) == CONST
8247 && GET_CODE (XEXP (ind, 0)) == PLUS
8248 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8249 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8252 /* Match: (mem (reg)). */
8253 if (GET_CODE (ind) == REG)
8254 return arm_address_register_rtx_p (ind, 0);
8256 /* Allow post-increment with Neon registers. */
8257 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8258 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8260 /* FIXME: vld1 allows register post-modify. */
8266 && GET_CODE (ind) == PLUS
8267 && GET_CODE (XEXP (ind, 0)) == REG
8268 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8269 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8270 && INTVAL (XEXP (ind, 1)) > -1024
8271 && INTVAL (XEXP (ind, 1)) < 1016
8272 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8278 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8281 neon_struct_mem_operand (rtx op)
8285 /* Reject eliminable registers. */
8286 if (! (reload_in_progress || reload_completed)
8287 && ( reg_mentioned_p (frame_pointer_rtx, op)
8288 || reg_mentioned_p (arg_pointer_rtx, op)
8289 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8290 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8291 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8292 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8295 /* Constants are converted into offsets from labels. */
8296 if (GET_CODE (op) != MEM)
8301 if (reload_completed
8302 && (GET_CODE (ind) == LABEL_REF
8303 || (GET_CODE (ind) == CONST
8304 && GET_CODE (XEXP (ind, 0)) == PLUS
8305 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8306 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8309 /* Match: (mem (reg)). */
8310 if (GET_CODE (ind) == REG)
8311 return arm_address_register_rtx_p (ind, 0);
8316 /* Return true if X is a register that will be eliminated later on. */
8318 arm_eliminable_register (rtx x)
8320 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8321 || REGNO (x) == ARG_POINTER_REGNUM
8322 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8323 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8326 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8327 coprocessor registers. Otherwise return NO_REGS. */
8330 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8334 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8336 return GENERAL_REGS;
8340 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8341 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8342 && neon_vector_mem_operand (x, 0))
8345 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8348 return GENERAL_REGS;
8351 /* Values which must be returned in the most-significant end of the return
8355 arm_return_in_msb (const_tree valtype)
8357 return (TARGET_AAPCS_BASED
8359 && (AGGREGATE_TYPE_P (valtype)
8360 || TREE_CODE (valtype) == COMPLEX_TYPE));
8363 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8364 Use by the Cirrus Maverick code which has to workaround
8365 a hardware bug triggered by such instructions. */
8367 arm_memory_load_p (rtx insn)
8369 rtx body, lhs, rhs;;
8371 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8374 body = PATTERN (insn);
8376 if (GET_CODE (body) != SET)
8379 lhs = XEXP (body, 0);
8380 rhs = XEXP (body, 1);
8382 lhs = REG_OR_SUBREG_RTX (lhs);
8384 /* If the destination is not a general purpose
8385 register we do not have to worry. */
8386 if (GET_CODE (lhs) != REG
8387 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8390 /* As well as loads from memory we also have to react
8391 to loads of invalid constants which will be turned
8392 into loads from the minipool. */
8393 return (GET_CODE (rhs) == MEM
8394 || GET_CODE (rhs) == SYMBOL_REF
8395 || note_invalid_constants (insn, -1, false));
8398 /* Return TRUE if INSN is a Cirrus instruction. */
8400 arm_cirrus_insn_p (rtx insn)
8402 enum attr_cirrus attr;
8404 /* get_attr cannot accept USE or CLOBBER. */
8406 || GET_CODE (insn) != INSN
8407 || GET_CODE (PATTERN (insn)) == USE
8408 || GET_CODE (PATTERN (insn)) == CLOBBER)
8411 attr = get_attr_cirrus (insn);
8413 return attr != CIRRUS_NOT;
8416 /* Cirrus reorg for invalid instruction combinations. */
8418 cirrus_reorg (rtx first)
8420 enum attr_cirrus attr;
8421 rtx body = PATTERN (first);
8425 /* Any branch must be followed by 2 non Cirrus instructions. */
8426 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8429 t = next_nonnote_insn (first);
8431 if (arm_cirrus_insn_p (t))
8434 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8438 emit_insn_after (gen_nop (), first);
8443 /* (float (blah)) is in parallel with a clobber. */
8444 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8445 body = XVECEXP (body, 0, 0);
8447 if (GET_CODE (body) == SET)
8449 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8451 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8452 be followed by a non Cirrus insn. */
8453 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8455 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8456 emit_insn_after (gen_nop (), first);
8460 else if (arm_memory_load_p (first))
8462 unsigned int arm_regno;
8464 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8465 ldr/cfmv64hr combination where the Rd field is the same
8466 in both instructions must be split with a non Cirrus
8473 /* Get Arm register number for ldr insn. */
8474 if (GET_CODE (lhs) == REG)
8475 arm_regno = REGNO (lhs);
8478 gcc_assert (GET_CODE (rhs) == REG);
8479 arm_regno = REGNO (rhs);
8483 first = next_nonnote_insn (first);
8485 if (! arm_cirrus_insn_p (first))
8488 body = PATTERN (first);
8490 /* (float (blah)) is in parallel with a clobber. */
8491 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8492 body = XVECEXP (body, 0, 0);
8494 if (GET_CODE (body) == FLOAT)
8495 body = XEXP (body, 0);
8497 if (get_attr_cirrus (first) == CIRRUS_MOVE
8498 && GET_CODE (XEXP (body, 1)) == REG
8499 && arm_regno == REGNO (XEXP (body, 1)))
8500 emit_insn_after (gen_nop (), first);
8506 /* get_attr cannot accept USE or CLOBBER. */
8508 || GET_CODE (first) != INSN
8509 || GET_CODE (PATTERN (first)) == USE
8510 || GET_CODE (PATTERN (first)) == CLOBBER)
8513 attr = get_attr_cirrus (first);
8515 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8516 must be followed by a non-coprocessor instruction. */
8517 if (attr == CIRRUS_COMPARE)
8521 t = next_nonnote_insn (first);
8523 if (arm_cirrus_insn_p (t))
8526 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8530 emit_insn_after (gen_nop (), first);
8536 /* Return TRUE if X references a SYMBOL_REF. */
8538 symbol_mentioned_p (rtx x)
8543 if (GET_CODE (x) == SYMBOL_REF)
8546 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8547 are constant offsets, not symbols. */
8548 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8551 fmt = GET_RTX_FORMAT (GET_CODE (x));
8553 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8559 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8560 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8563 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8570 /* Return TRUE if X references a LABEL_REF. */
8572 label_mentioned_p (rtx x)
8577 if (GET_CODE (x) == LABEL_REF)
8580 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8581 instruction, but they are constant offsets, not symbols. */
8582 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8585 fmt = GET_RTX_FORMAT (GET_CODE (x));
8586 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8592 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8593 if (label_mentioned_p (XVECEXP (x, i, j)))
8596 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8604 tls_mentioned_p (rtx x)
8606 switch (GET_CODE (x))
8609 return tls_mentioned_p (XEXP (x, 0));
8612 if (XINT (x, 1) == UNSPEC_TLS)
8620 /* Must not copy a SET whose source operand is PC-relative. */
8623 arm_cannot_copy_insn_p (rtx insn)
8625 rtx pat = PATTERN (insn);
8627 if (GET_CODE (pat) == SET)
8629 rtx rhs = SET_SRC (pat);
8631 if (GET_CODE (rhs) == UNSPEC
8632 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8635 if (GET_CODE (rhs) == MEM
8636 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8637 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8647 enum rtx_code code = GET_CODE (x);
8664 /* Return 1 if memory locations are adjacent. */
8666 adjacent_mem_locations (rtx a, rtx b)
8668 /* We don't guarantee to preserve the order of these memory refs. */
8669 if (volatile_refs_p (a) || volatile_refs_p (b))
8672 if ((GET_CODE (XEXP (a, 0)) == REG
8673 || (GET_CODE (XEXP (a, 0)) == PLUS
8674 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8675 && (GET_CODE (XEXP (b, 0)) == REG
8676 || (GET_CODE (XEXP (b, 0)) == PLUS
8677 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8679 HOST_WIDE_INT val0 = 0, val1 = 0;
8683 if (GET_CODE (XEXP (a, 0)) == PLUS)
8685 reg0 = XEXP (XEXP (a, 0), 0);
8686 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8691 if (GET_CODE (XEXP (b, 0)) == PLUS)
8693 reg1 = XEXP (XEXP (b, 0), 0);
8694 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8699 /* Don't accept any offset that will require multiple
8700 instructions to handle, since this would cause the
8701 arith_adjacentmem pattern to output an overlong sequence. */
8702 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8705 /* Don't allow an eliminable register: register elimination can make
8706 the offset too large. */
8707 if (arm_eliminable_register (reg0))
8710 val_diff = val1 - val0;
8714 /* If the target has load delay slots, then there's no benefit
8715 to using an ldm instruction unless the offset is zero and
8716 we are optimizing for size. */
8717 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8718 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8719 && (val_diff == 4 || val_diff == -4));
8722 return ((REGNO (reg0) == REGNO (reg1))
8723 && (val_diff == 4 || val_diff == -4));
8730 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8731 HOST_WIDE_INT *load_offset)
8733 int unsorted_regs[4];
8734 HOST_WIDE_INT unsorted_offsets[4];
8739 /* Can only handle 2, 3, or 4 insns at present,
8740 though could be easily extended if required. */
8741 gcc_assert (nops >= 2 && nops <= 4);
8743 memset (order, 0, 4 * sizeof (int));
8745 /* Loop over the operands and check that the memory references are
8746 suitable (i.e. immediate offsets from the same base register). At
8747 the same time, extract the target register, and the memory
8749 for (i = 0; i < nops; i++)
8754 /* Convert a subreg of a mem into the mem itself. */
8755 if (GET_CODE (operands[nops + i]) == SUBREG)
8756 operands[nops + i] = alter_subreg (operands + (nops + i));
8758 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8760 /* Don't reorder volatile memory references; it doesn't seem worth
8761 looking for the case where the order is ok anyway. */
8762 if (MEM_VOLATILE_P (operands[nops + i]))
8765 offset = const0_rtx;
8767 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8768 || (GET_CODE (reg) == SUBREG
8769 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8770 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8771 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8773 || (GET_CODE (reg) == SUBREG
8774 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8775 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8780 base_reg = REGNO (reg);
8781 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8782 ? REGNO (operands[i])
8783 : REGNO (SUBREG_REG (operands[i])));
8788 if (base_reg != (int) REGNO (reg))
8789 /* Not addressed from the same base register. */
8792 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8793 ? REGNO (operands[i])
8794 : REGNO (SUBREG_REG (operands[i])));
8795 if (unsorted_regs[i] < unsorted_regs[order[0]])
8799 /* If it isn't an integer register, or if it overwrites the
8800 base register but isn't the last insn in the list, then
8801 we can't do this. */
8802 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
8803 || (i != nops - 1 && unsorted_regs[i] == base_reg))
8806 unsorted_offsets[i] = INTVAL (offset);
8809 /* Not a suitable memory address. */
8813 /* All the useful information has now been extracted from the
8814 operands into unsorted_regs and unsorted_offsets; additionally,
8815 order[0] has been set to the lowest numbered register in the
8816 list. Sort the registers into order, and check that the memory
8817 offsets are ascending and adjacent. */
8819 for (i = 1; i < nops; i++)
8823 order[i] = order[i - 1];
8824 for (j = 0; j < nops; j++)
8825 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
8826 && (order[i] == order[i - 1]
8827 || unsorted_regs[j] < unsorted_regs[order[i]]))
8830 /* Have we found a suitable register? if not, one must be used more
8832 if (order[i] == order[i - 1])
8835 /* Is the memory address adjacent and ascending? */
8836 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
8844 for (i = 0; i < nops; i++)
8845 regs[i] = unsorted_regs[order[i]];
8847 *load_offset = unsorted_offsets[order[0]];
8850 if (unsorted_offsets[order[0]] == 0)
8851 return 1; /* ldmia */
8853 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
8854 return 2; /* ldmib */
8856 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
8857 return 3; /* ldmda */
8859 if (unsorted_offsets[order[nops - 1]] == -4)
8860 return 4; /* ldmdb */
8862 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
8863 if the offset isn't small enough. The reason 2 ldrs are faster
8864 is because these ARMs are able to do more than one cache access
8865 in a single cycle. The ARM9 and StrongARM have Harvard caches,
8866 whilst the ARM8 has a double bandwidth cache. This means that
8867 these cores can do both an instruction fetch and a data fetch in
8868 a single cycle, so the trick of calculating the address into a
8869 scratch register (one of the result regs) and then doing a load
8870 multiple actually becomes slower (and no smaller in code size).
8871 That is the transformation
8873 ldr rd1, [rbase + offset]
8874 ldr rd2, [rbase + offset + 4]
8878 add rd1, rbase, offset
8879 ldmia rd1, {rd1, rd2}
8881 produces worse code -- '3 cycles + any stalls on rd2' instead of
8882 '2 cycles + any stalls on rd2'. On ARMs with only one cache
8883 access per cycle, the first sequence could never complete in less
8884 than 6 cycles, whereas the ldm sequence would only take 5 and
8885 would make better use of sequential accesses if not hitting the
8888 We cheat here and test 'arm_ld_sched' which we currently know to
8889 only be true for the ARM8, ARM9 and StrongARM. If this ever
8890 changes, then the test below needs to be reworked. */
8891 if (nops == 2 && arm_ld_sched)
8894 /* Can't do it without setting up the offset, only do this if it takes
8895 no more than one insn. */
8896 return (const_ok_for_arm (unsorted_offsets[order[0]])
8897 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
8901 emit_ldm_seq (rtx *operands, int nops)
8905 HOST_WIDE_INT offset;
8909 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8912 strcpy (buf, "ldm%(ia%)\t");
8916 strcpy (buf, "ldm%(ib%)\t");
8920 strcpy (buf, "ldm%(da%)\t");
8924 strcpy (buf, "ldm%(db%)\t");
8929 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8930 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8933 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
8934 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
8936 output_asm_insn (buf, operands);
8938 strcpy (buf, "ldm%(ia%)\t");
8945 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8946 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8948 for (i = 1; i < nops; i++)
8949 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8950 reg_names[regs[i]]);
8952 strcat (buf, "}\t%@ phole ldm");
8954 output_asm_insn (buf, operands);
8959 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8960 HOST_WIDE_INT * load_offset)
8962 int unsorted_regs[4];
8963 HOST_WIDE_INT unsorted_offsets[4];
8968 /* Can only handle 2, 3, or 4 insns at present, though could be easily
8969 extended if required. */
8970 gcc_assert (nops >= 2 && nops <= 4);
8972 memset (order, 0, 4 * sizeof (int));
8974 /* Loop over the operands and check that the memory references are
8975 suitable (i.e. immediate offsets from the same base register). At
8976 the same time, extract the target register, and the memory
8978 for (i = 0; i < nops; i++)
8983 /* Convert a subreg of a mem into the mem itself. */
8984 if (GET_CODE (operands[nops + i]) == SUBREG)
8985 operands[nops + i] = alter_subreg (operands + (nops + i));
8987 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8989 /* Don't reorder volatile memory references; it doesn't seem worth
8990 looking for the case where the order is ok anyway. */
8991 if (MEM_VOLATILE_P (operands[nops + i]))
8994 offset = const0_rtx;
8996 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8997 || (GET_CODE (reg) == SUBREG
8998 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8999 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9000 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9002 || (GET_CODE (reg) == SUBREG
9003 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9004 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9009 base_reg = REGNO (reg);
9010 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9011 ? REGNO (operands[i])
9012 : REGNO (SUBREG_REG (operands[i])));
9017 if (base_reg != (int) REGNO (reg))
9018 /* Not addressed from the same base register. */
9021 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9022 ? REGNO (operands[i])
9023 : REGNO (SUBREG_REG (operands[i])));
9024 if (unsorted_regs[i] < unsorted_regs[order[0]])
9028 /* If it isn't an integer register, then we can't do this. */
9029 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9032 unsorted_offsets[i] = INTVAL (offset);
9035 /* Not a suitable memory address. */
9039 /* All the useful information has now been extracted from the
9040 operands into unsorted_regs and unsorted_offsets; additionally,
9041 order[0] has been set to the lowest numbered register in the
9042 list. Sort the registers into order, and check that the memory
9043 offsets are ascending and adjacent. */
9045 for (i = 1; i < nops; i++)
9049 order[i] = order[i - 1];
9050 for (j = 0; j < nops; j++)
9051 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9052 && (order[i] == order[i - 1]
9053 || unsorted_regs[j] < unsorted_regs[order[i]]))
9056 /* Have we found a suitable register? if not, one must be used more
9058 if (order[i] == order[i - 1])
9061 /* Is the memory address adjacent and ascending? */
9062 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9070 for (i = 0; i < nops; i++)
9071 regs[i] = unsorted_regs[order[i]];
9073 *load_offset = unsorted_offsets[order[0]];
9076 if (unsorted_offsets[order[0]] == 0)
9077 return 1; /* stmia */
9079 if (unsorted_offsets[order[0]] == 4)
9080 return 2; /* stmib */
9082 if (unsorted_offsets[order[nops - 1]] == 0)
9083 return 3; /* stmda */
9085 if (unsorted_offsets[order[nops - 1]] == -4)
9086 return 4; /* stmdb */
9092 emit_stm_seq (rtx *operands, int nops)
9096 HOST_WIDE_INT offset;
9100 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9103 strcpy (buf, "stm%(ia%)\t");
9107 strcpy (buf, "stm%(ib%)\t");
9111 strcpy (buf, "stm%(da%)\t");
9115 strcpy (buf, "stm%(db%)\t");
9122 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9123 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9125 for (i = 1; i < nops; i++)
9126 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9127 reg_names[regs[i]]);
9129 strcat (buf, "}\t%@ phole stm");
9131 output_asm_insn (buf, operands);
9135 /* Routines for use in generating RTL. */
9138 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9139 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9141 HOST_WIDE_INT offset = *offsetp;
9144 int sign = up ? 1 : -1;
9147 /* XScale has load-store double instructions, but they have stricter
9148 alignment requirements than load-store multiple, so we cannot
9151 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9152 the pipeline until completion.
9160 An ldr instruction takes 1-3 cycles, but does not block the
9169 Best case ldr will always win. However, the more ldr instructions
9170 we issue, the less likely we are to be able to schedule them well.
9171 Using ldr instructions also increases code size.
9173 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9174 for counts of 3 or 4 regs. */
9175 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9181 for (i = 0; i < count; i++)
9183 addr = plus_constant (from, i * 4 * sign);
9184 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9185 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9191 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9201 result = gen_rtx_PARALLEL (VOIDmode,
9202 rtvec_alloc (count + (write_back ? 1 : 0)));
9205 XVECEXP (result, 0, 0)
9206 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9211 for (j = 0; i < count; i++, j++)
9213 addr = plus_constant (from, j * 4 * sign);
9214 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9215 XVECEXP (result, 0, i)
9216 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9227 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9228 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9230 HOST_WIDE_INT offset = *offsetp;
9233 int sign = up ? 1 : -1;
9236 /* See arm_gen_load_multiple for discussion of
9237 the pros/cons of ldm/stm usage for XScale. */
9238 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9244 for (i = 0; i < count; i++)
9246 addr = plus_constant (to, i * 4 * sign);
9247 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9248 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9254 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9264 result = gen_rtx_PARALLEL (VOIDmode,
9265 rtvec_alloc (count + (write_back ? 1 : 0)));
9268 XVECEXP (result, 0, 0)
9269 = gen_rtx_SET (VOIDmode, to,
9270 plus_constant (to, count * 4 * sign));
9275 for (j = 0; i < count; i++, j++)
9277 addr = plus_constant (to, j * 4 * sign);
9278 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9279 XVECEXP (result, 0, i)
9280 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9291 arm_gen_movmemqi (rtx *operands)
9293 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9294 HOST_WIDE_INT srcoffset, dstoffset;
9296 rtx src, dst, srcbase, dstbase;
9297 rtx part_bytes_reg = NULL;
9300 if (GET_CODE (operands[2]) != CONST_INT
9301 || GET_CODE (operands[3]) != CONST_INT
9302 || INTVAL (operands[2]) > 64
9303 || INTVAL (operands[3]) & 3)
9306 dstbase = operands[0];
9307 srcbase = operands[1];
9309 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9310 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9312 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9313 out_words_to_go = INTVAL (operands[2]) / 4;
9314 last_bytes = INTVAL (operands[2]) & 3;
9315 dstoffset = srcoffset = 0;
9317 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9318 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9320 for (i = 0; in_words_to_go >= 2; i+=4)
9322 if (in_words_to_go > 4)
9323 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9324 srcbase, &srcoffset));
9326 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9327 FALSE, srcbase, &srcoffset));
9329 if (out_words_to_go)
9331 if (out_words_to_go > 4)
9332 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9333 dstbase, &dstoffset));
9334 else if (out_words_to_go != 1)
9335 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9339 dstbase, &dstoffset));
9342 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9343 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9344 if (last_bytes != 0)
9346 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9352 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9353 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9356 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9357 if (out_words_to_go)
9361 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9362 sreg = copy_to_reg (mem);
9364 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9365 emit_move_insn (mem, sreg);
9368 gcc_assert (!in_words_to_go); /* Sanity check */
9373 gcc_assert (in_words_to_go > 0);
9375 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9376 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9379 gcc_assert (!last_bytes || part_bytes_reg);
9381 if (BYTES_BIG_ENDIAN && last_bytes)
9383 rtx tmp = gen_reg_rtx (SImode);
9385 /* The bytes we want are in the top end of the word. */
9386 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9387 GEN_INT (8 * (4 - last_bytes))));
9388 part_bytes_reg = tmp;
9392 mem = adjust_automodify_address (dstbase, QImode,
9393 plus_constant (dst, last_bytes - 1),
9394 dstoffset + last_bytes - 1);
9395 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9399 tmp = gen_reg_rtx (SImode);
9400 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9401 part_bytes_reg = tmp;
9410 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9411 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9415 rtx tmp = gen_reg_rtx (SImode);
9416 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9417 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9418 part_bytes_reg = tmp;
9425 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9426 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9433 /* Select a dominance comparison mode if possible for a test of the general
9434 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9435 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9436 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9437 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9438 In all cases OP will be either EQ or NE, but we don't need to know which
9439 here. If we are unable to support a dominance comparison we return
9440 CC mode. This will then fail to match for the RTL expressions that
9441 generate this call. */
9443 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9445 enum rtx_code cond1, cond2;
9448 /* Currently we will probably get the wrong result if the individual
9449 comparisons are not simple. This also ensures that it is safe to
9450 reverse a comparison if necessary. */
9451 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9453 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9457 /* The if_then_else variant of this tests the second condition if the
9458 first passes, but is true if the first fails. Reverse the first
9459 condition to get a true "inclusive-or" expression. */
9460 if (cond_or == DOM_CC_NX_OR_Y)
9461 cond1 = reverse_condition (cond1);
9463 /* If the comparisons are not equal, and one doesn't dominate the other,
9464 then we can't do this. */
9466 && !comparison_dominates_p (cond1, cond2)
9467 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9472 enum rtx_code temp = cond1;
9480 if (cond_or == DOM_CC_X_AND_Y)
9485 case EQ: return CC_DEQmode;
9486 case LE: return CC_DLEmode;
9487 case LEU: return CC_DLEUmode;
9488 case GE: return CC_DGEmode;
9489 case GEU: return CC_DGEUmode;
9490 default: gcc_unreachable ();
9494 if (cond_or == DOM_CC_X_AND_Y)
9510 if (cond_or == DOM_CC_X_AND_Y)
9526 if (cond_or == DOM_CC_X_AND_Y)
9542 if (cond_or == DOM_CC_X_AND_Y)
9557 /* The remaining cases only occur when both comparisons are the
9560 gcc_assert (cond1 == cond2);
9564 gcc_assert (cond1 == cond2);
9568 gcc_assert (cond1 == cond2);
9572 gcc_assert (cond1 == cond2);
9576 gcc_assert (cond1 == cond2);
9585 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9587 /* All floating point compares return CCFP if it is an equality
9588 comparison, and CCFPE otherwise. */
9589 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9609 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9618 /* A compare with a shifted operand. Because of canonicalization, the
9619 comparison will have to be swapped when we emit the assembler. */
9620 if (GET_MODE (y) == SImode
9621 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9622 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9623 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9624 || GET_CODE (x) == ROTATERT))
9627 /* This operation is performed swapped, but since we only rely on the Z
9628 flag we don't need an additional mode. */
9629 if (GET_MODE (y) == SImode
9630 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9631 && GET_CODE (x) == NEG
9632 && (op == EQ || op == NE))
9635 /* This is a special case that is used by combine to allow a
9636 comparison of a shifted byte load to be split into a zero-extend
9637 followed by a comparison of the shifted integer (only valid for
9638 equalities and unsigned inequalities). */
9639 if (GET_MODE (x) == SImode
9640 && GET_CODE (x) == ASHIFT
9641 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9642 && GET_CODE (XEXP (x, 0)) == SUBREG
9643 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9644 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9645 && (op == EQ || op == NE
9646 || op == GEU || op == GTU || op == LTU || op == LEU)
9647 && GET_CODE (y) == CONST_INT)
9650 /* A construct for a conditional compare, if the false arm contains
9651 0, then both conditions must be true, otherwise either condition
9652 must be true. Not all conditions are possible, so CCmode is
9653 returned if it can't be done. */
9654 if (GET_CODE (x) == IF_THEN_ELSE
9655 && (XEXP (x, 2) == const0_rtx
9656 || XEXP (x, 2) == const1_rtx)
9657 && COMPARISON_P (XEXP (x, 0))
9658 && COMPARISON_P (XEXP (x, 1)))
9659 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9660 INTVAL (XEXP (x, 2)));
9662 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
9663 if (GET_CODE (x) == AND
9664 && COMPARISON_P (XEXP (x, 0))
9665 && COMPARISON_P (XEXP (x, 1)))
9666 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9669 if (GET_CODE (x) == IOR
9670 && COMPARISON_P (XEXP (x, 0))
9671 && COMPARISON_P (XEXP (x, 1)))
9672 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9675 /* An operation (on Thumb) where we want to test for a single bit.
9676 This is done by shifting that bit up into the top bit of a
9677 scratch register; we can then branch on the sign bit. */
9679 && GET_MODE (x) == SImode
9680 && (op == EQ || op == NE)
9681 && GET_CODE (x) == ZERO_EXTRACT
9682 && XEXP (x, 1) == const1_rtx)
9685 /* An operation that sets the condition codes as a side-effect, the
9686 V flag is not set correctly, so we can only use comparisons where
9687 this doesn't matter. (For LT and GE we can use "mi" and "pl"
9689 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
9690 if (GET_MODE (x) == SImode
9692 && (op == EQ || op == NE || op == LT || op == GE)
9693 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9694 || GET_CODE (x) == AND || GET_CODE (x) == IOR
9695 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9696 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9697 || GET_CODE (x) == LSHIFTRT
9698 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9699 || GET_CODE (x) == ROTATERT
9700 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9703 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9706 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9707 && GET_CODE (x) == PLUS
9708 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9714 /* X and Y are two things to compare using CODE. Emit the compare insn and
9715 return the rtx for register 0 in the proper mode. FP means this is a
9716 floating point compare: I don't think that it is needed on the arm. */
9718 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9720 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9721 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9723 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9728 /* Generate a sequence of insns that will generate the correct return
9729 address mask depending on the physical architecture that the program
9732 arm_gen_return_addr_mask (void)
9734 rtx reg = gen_reg_rtx (Pmode);
9736 emit_insn (gen_return_addr_mask (reg));
9741 arm_reload_in_hi (rtx *operands)
9743 rtx ref = operands[1];
9745 HOST_WIDE_INT offset = 0;
9747 if (GET_CODE (ref) == SUBREG)
9749 offset = SUBREG_BYTE (ref);
9750 ref = SUBREG_REG (ref);
9753 if (GET_CODE (ref) == REG)
9755 /* We have a pseudo which has been spilt onto the stack; there
9756 are two cases here: the first where there is a simple
9757 stack-slot replacement and a second where the stack-slot is
9758 out of range, or is used as a subreg. */
9759 if (reg_equiv_mem[REGNO (ref)])
9761 ref = reg_equiv_mem[REGNO (ref)];
9762 base = find_replacement (&XEXP (ref, 0));
9765 /* The slot is out of range, or was dressed up in a SUBREG. */
9766 base = reg_equiv_address[REGNO (ref)];
9769 base = find_replacement (&XEXP (ref, 0));
9771 /* Handle the case where the address is too complex to be offset by 1. */
9772 if (GET_CODE (base) == MINUS
9773 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9775 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9777 emit_set_insn (base_plus, base);
9780 else if (GET_CODE (base) == PLUS)
9782 /* The addend must be CONST_INT, or we would have dealt with it above. */
9783 HOST_WIDE_INT hi, lo;
9785 offset += INTVAL (XEXP (base, 1));
9786 base = XEXP (base, 0);
9788 /* Rework the address into a legal sequence of insns. */
9789 /* Valid range for lo is -4095 -> 4095 */
9792 : -((-offset) & 0xfff));
9794 /* Corner case, if lo is the max offset then we would be out of range
9795 once we have added the additional 1 below, so bump the msb into the
9796 pre-loading insn(s). */
9800 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9801 ^ (HOST_WIDE_INT) 0x80000000)
9802 - (HOST_WIDE_INT) 0x80000000);
9804 gcc_assert (hi + lo == offset);
9808 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9810 /* Get the base address; addsi3 knows how to handle constants
9811 that require more than one insn. */
9812 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9818 /* Operands[2] may overlap operands[0] (though it won't overlap
9819 operands[1]), that's why we asked for a DImode reg -- so we can
9820 use the bit that does not overlap. */
9821 if (REGNO (operands[2]) == REGNO (operands[0]))
9822 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9824 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9826 emit_insn (gen_zero_extendqisi2 (scratch,
9827 gen_rtx_MEM (QImode,
9828 plus_constant (base,
9830 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
9831 gen_rtx_MEM (QImode,
9832 plus_constant (base,
9834 if (!BYTES_BIG_ENDIAN)
9835 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9836 gen_rtx_IOR (SImode,
9839 gen_rtx_SUBREG (SImode, operands[0], 0),
9843 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
9844 gen_rtx_IOR (SImode,
9845 gen_rtx_ASHIFT (SImode, scratch,
9847 gen_rtx_SUBREG (SImode, operands[0], 0)));
9850 /* Handle storing a half-word to memory during reload by synthesizing as two
9851 byte stores. Take care not to clobber the input values until after we
9852 have moved them somewhere safe. This code assumes that if the DImode
9853 scratch in operands[2] overlaps either the input value or output address
9854 in some way, then that value must die in this insn (we absolutely need
9855 two scratch registers for some corner cases). */
9857 arm_reload_out_hi (rtx *operands)
9859 rtx ref = operands[0];
9860 rtx outval = operands[1];
9862 HOST_WIDE_INT offset = 0;
9864 if (GET_CODE (ref) == SUBREG)
9866 offset = SUBREG_BYTE (ref);
9867 ref = SUBREG_REG (ref);
9870 if (GET_CODE (ref) == REG)
9872 /* We have a pseudo which has been spilt onto the stack; there
9873 are two cases here: the first where there is a simple
9874 stack-slot replacement and a second where the stack-slot is
9875 out of range, or is used as a subreg. */
9876 if (reg_equiv_mem[REGNO (ref)])
9878 ref = reg_equiv_mem[REGNO (ref)];
9879 base = find_replacement (&XEXP (ref, 0));
9882 /* The slot is out of range, or was dressed up in a SUBREG. */
9883 base = reg_equiv_address[REGNO (ref)];
9886 base = find_replacement (&XEXP (ref, 0));
9888 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
9890 /* Handle the case where the address is too complex to be offset by 1. */
9891 if (GET_CODE (base) == MINUS
9892 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9894 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9896 /* Be careful not to destroy OUTVAL. */
9897 if (reg_overlap_mentioned_p (base_plus, outval))
9899 /* Updating base_plus might destroy outval, see if we can
9900 swap the scratch and base_plus. */
9901 if (!reg_overlap_mentioned_p (scratch, outval))
9904 scratch = base_plus;
9909 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9911 /* Be conservative and copy OUTVAL into the scratch now,
9912 this should only be necessary if outval is a subreg
9913 of something larger than a word. */
9914 /* XXX Might this clobber base? I can't see how it can,
9915 since scratch is known to overlap with OUTVAL, and
9916 must be wider than a word. */
9917 emit_insn (gen_movhi (scratch_hi, outval));
9918 outval = scratch_hi;
9922 emit_set_insn (base_plus, base);
9925 else if (GET_CODE (base) == PLUS)
9927 /* The addend must be CONST_INT, or we would have dealt with it above. */
9928 HOST_WIDE_INT hi, lo;
9930 offset += INTVAL (XEXP (base, 1));
9931 base = XEXP (base, 0);
9933 /* Rework the address into a legal sequence of insns. */
9934 /* Valid range for lo is -4095 -> 4095 */
9937 : -((-offset) & 0xfff));
9939 /* Corner case, if lo is the max offset then we would be out of range
9940 once we have added the additional 1 below, so bump the msb into the
9941 pre-loading insn(s). */
9945 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
9946 ^ (HOST_WIDE_INT) 0x80000000)
9947 - (HOST_WIDE_INT) 0x80000000);
9949 gcc_assert (hi + lo == offset);
9953 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9955 /* Be careful not to destroy OUTVAL. */
9956 if (reg_overlap_mentioned_p (base_plus, outval))
9958 /* Updating base_plus might destroy outval, see if we
9959 can swap the scratch and base_plus. */
9960 if (!reg_overlap_mentioned_p (scratch, outval))
9963 scratch = base_plus;
9968 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
9970 /* Be conservative and copy outval into scratch now,
9971 this should only be necessary if outval is a
9972 subreg of something larger than a word. */
9973 /* XXX Might this clobber base? I can't see how it
9974 can, since scratch is known to overlap with
9976 emit_insn (gen_movhi (scratch_hi, outval));
9977 outval = scratch_hi;
9981 /* Get the base address; addsi3 knows how to handle constants
9982 that require more than one insn. */
9983 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
9989 if (BYTES_BIG_ENDIAN)
9991 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
9992 plus_constant (base, offset + 1)),
9993 gen_lowpart (QImode, outval)));
9994 emit_insn (gen_lshrsi3 (scratch,
9995 gen_rtx_SUBREG (SImode, outval, 0),
9997 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
9998 gen_lowpart (QImode, scratch)));
10002 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10003 gen_lowpart (QImode, outval)));
10004 emit_insn (gen_lshrsi3 (scratch,
10005 gen_rtx_SUBREG (SImode, outval, 0),
10007 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10008 plus_constant (base, offset + 1)),
10009 gen_lowpart (QImode, scratch)));
10013 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10014 (padded to the size of a word) should be passed in a register. */
10017 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10019 if (TARGET_AAPCS_BASED)
10020 return must_pass_in_stack_var_size (mode, type);
10022 return must_pass_in_stack_var_size_or_pad (mode, type);
10026 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10027 Return true if an argument passed on the stack should be padded upwards,
10028 i.e. if the least-significant byte has useful data.
10029 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10030 aggregate types are placed in the lowest memory address. */
10033 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10035 if (!TARGET_AAPCS_BASED)
10036 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10038 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10045 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10046 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10047 byte of the register has useful data, and return the opposite if the
10048 most significant byte does.
10049 For AAPCS, small aggregates and small complex types are always padded
10053 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10054 tree type, int first ATTRIBUTE_UNUSED)
10056 if (TARGET_AAPCS_BASED
10057 && BYTES_BIG_ENDIAN
10058 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10059 && int_size_in_bytes (type) <= 4)
10062 /* Otherwise, use default padding. */
10063 return !BYTES_BIG_ENDIAN;
10067 /* Print a symbolic form of X to the debug file, F. */
10069 arm_print_value (FILE *f, rtx x)
10071 switch (GET_CODE (x))
10074 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10078 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10086 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10088 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10089 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10097 fprintf (f, "\"%s\"", XSTR (x, 0));
10101 fprintf (f, "`%s'", XSTR (x, 0));
10105 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10109 arm_print_value (f, XEXP (x, 0));
10113 arm_print_value (f, XEXP (x, 0));
10115 arm_print_value (f, XEXP (x, 1));
10123 fprintf (f, "????");
10128 /* Routines for manipulation of the constant pool. */
10130 /* Arm instructions cannot load a large constant directly into a
10131 register; they have to come from a pc relative load. The constant
10132 must therefore be placed in the addressable range of the pc
10133 relative load. Depending on the precise pc relative load
10134 instruction the range is somewhere between 256 bytes and 4k. This
10135 means that we often have to dump a constant inside a function, and
10136 generate code to branch around it.
10138 It is important to minimize this, since the branches will slow
10139 things down and make the code larger.
10141 Normally we can hide the table after an existing unconditional
10142 branch so that there is no interruption of the flow, but in the
10143 worst case the code looks like this:
10161 We fix this by performing a scan after scheduling, which notices
10162 which instructions need to have their operands fetched from the
10163 constant table and builds the table.
10165 The algorithm starts by building a table of all the constants that
10166 need fixing up and all the natural barriers in the function (places
10167 where a constant table can be dropped without breaking the flow).
10168 For each fixup we note how far the pc-relative replacement will be
10169 able to reach and the offset of the instruction into the function.
10171 Having built the table we then group the fixes together to form
10172 tables that are as large as possible (subject to addressing
10173 constraints) and emit each table of constants after the last
10174 barrier that is within range of all the instructions in the group.
10175 If a group does not contain a barrier, then we forcibly create one
10176 by inserting a jump instruction into the flow. Once the table has
10177 been inserted, the insns are then modified to reference the
10178 relevant entry in the pool.
10180 Possible enhancements to the algorithm (not implemented) are:
10182 1) For some processors and object formats, there may be benefit in
10183 aligning the pools to the start of cache lines; this alignment
10184 would need to be taken into account when calculating addressability
10187 /* These typedefs are located at the start of this file, so that
10188 they can be used in the prototypes there. This comment is to
10189 remind readers of that fact so that the following structures
10190 can be understood more easily.
10192 typedef struct minipool_node Mnode;
10193 typedef struct minipool_fixup Mfix; */
10195 struct minipool_node
10197 /* Doubly linked chain of entries. */
10200 /* The maximum offset into the code that this entry can be placed. While
10201 pushing fixes for forward references, all entries are sorted in order
10202 of increasing max_address. */
10203 HOST_WIDE_INT max_address;
10204 /* Similarly for an entry inserted for a backwards ref. */
10205 HOST_WIDE_INT min_address;
10206 /* The number of fixes referencing this entry. This can become zero
10207 if we "unpush" an entry. In this case we ignore the entry when we
10208 come to emit the code. */
10210 /* The offset from the start of the minipool. */
10211 HOST_WIDE_INT offset;
10212 /* The value in table. */
10214 /* The mode of value. */
10215 enum machine_mode mode;
10216 /* The size of the value. With iWMMXt enabled
10217 sizes > 4 also imply an alignment of 8-bytes. */
10221 struct minipool_fixup
10225 HOST_WIDE_INT address;
10227 enum machine_mode mode;
10231 HOST_WIDE_INT forwards;
10232 HOST_WIDE_INT backwards;
10235 /* Fixes less than a word need padding out to a word boundary. */
10236 #define MINIPOOL_FIX_SIZE(mode) \
10237 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10239 static Mnode * minipool_vector_head;
10240 static Mnode * minipool_vector_tail;
10241 static rtx minipool_vector_label;
10242 static int minipool_pad;
10244 /* The linked list of all minipool fixes required for this function. */
10245 Mfix * minipool_fix_head;
10246 Mfix * minipool_fix_tail;
10247 /* The fix entry for the current minipool, once it has been placed. */
10248 Mfix * minipool_barrier;
10250 /* Determines if INSN is the start of a jump table. Returns the end
10251 of the TABLE or NULL_RTX. */
10253 is_jump_table (rtx insn)
10257 if (GET_CODE (insn) == JUMP_INSN
10258 && JUMP_LABEL (insn) != NULL
10259 && ((table = next_real_insn (JUMP_LABEL (insn)))
10260 == next_real_insn (insn))
10262 && GET_CODE (table) == JUMP_INSN
10263 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10264 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10270 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10271 #define JUMP_TABLES_IN_TEXT_SECTION 0
10274 static HOST_WIDE_INT
10275 get_jump_table_size (rtx insn)
10277 /* ADDR_VECs only take room if read-only data does into the text
10279 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10281 rtx body = PATTERN (insn);
10282 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10283 HOST_WIDE_INT size;
10284 HOST_WIDE_INT modesize;
10286 modesize = GET_MODE_SIZE (GET_MODE (body));
10287 size = modesize * XVECLEN (body, elt);
10291 /* Round up size of TBB table to a halfword boundary. */
10292 size = (size + 1) & ~(HOST_WIDE_INT)1;
10295 /* No padding necessary for TBH. */
10298 /* Add two bytes for alignment on Thumb. */
10303 gcc_unreachable ();
10311 /* Move a minipool fix MP from its current location to before MAX_MP.
10312 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10313 constraints may need updating. */
10315 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10316 HOST_WIDE_INT max_address)
10318 /* The code below assumes these are different. */
10319 gcc_assert (mp != max_mp);
10321 if (max_mp == NULL)
10323 if (max_address < mp->max_address)
10324 mp->max_address = max_address;
10328 if (max_address > max_mp->max_address - mp->fix_size)
10329 mp->max_address = max_mp->max_address - mp->fix_size;
10331 mp->max_address = max_address;
10333 /* Unlink MP from its current position. Since max_mp is non-null,
10334 mp->prev must be non-null. */
10335 mp->prev->next = mp->next;
10336 if (mp->next != NULL)
10337 mp->next->prev = mp->prev;
10339 minipool_vector_tail = mp->prev;
10341 /* Re-insert it before MAX_MP. */
10343 mp->prev = max_mp->prev;
10346 if (mp->prev != NULL)
10347 mp->prev->next = mp;
10349 minipool_vector_head = mp;
10352 /* Save the new entry. */
10355 /* Scan over the preceding entries and adjust their addresses as
10357 while (mp->prev != NULL
10358 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10360 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10367 /* Add a constant to the minipool for a forward reference. Returns the
10368 node added or NULL if the constant will not fit in this pool. */
10370 add_minipool_forward_ref (Mfix *fix)
10372 /* If set, max_mp is the first pool_entry that has a lower
10373 constraint than the one we are trying to add. */
10374 Mnode * max_mp = NULL;
10375 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10378 /* If the minipool starts before the end of FIX->INSN then this FIX
10379 can not be placed into the current pool. Furthermore, adding the
10380 new constant pool entry may cause the pool to start FIX_SIZE bytes
10382 if (minipool_vector_head &&
10383 (fix->address + get_attr_length (fix->insn)
10384 >= minipool_vector_head->max_address - fix->fix_size))
10387 /* Scan the pool to see if a constant with the same value has
10388 already been added. While we are doing this, also note the
10389 location where we must insert the constant if it doesn't already
10391 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10393 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10394 && fix->mode == mp->mode
10395 && (GET_CODE (fix->value) != CODE_LABEL
10396 || (CODE_LABEL_NUMBER (fix->value)
10397 == CODE_LABEL_NUMBER (mp->value)))
10398 && rtx_equal_p (fix->value, mp->value))
10400 /* More than one fix references this entry. */
10402 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10405 /* Note the insertion point if necessary. */
10407 && mp->max_address > max_address)
10410 /* If we are inserting an 8-bytes aligned quantity and
10411 we have not already found an insertion point, then
10412 make sure that all such 8-byte aligned quantities are
10413 placed at the start of the pool. */
10414 if (ARM_DOUBLEWORD_ALIGN
10416 && fix->fix_size >= 8
10417 && mp->fix_size < 8)
10420 max_address = mp->max_address;
10424 /* The value is not currently in the minipool, so we need to create
10425 a new entry for it. If MAX_MP is NULL, the entry will be put on
10426 the end of the list since the placement is less constrained than
10427 any existing entry. Otherwise, we insert the new fix before
10428 MAX_MP and, if necessary, adjust the constraints on the other
10431 mp->fix_size = fix->fix_size;
10432 mp->mode = fix->mode;
10433 mp->value = fix->value;
10435 /* Not yet required for a backwards ref. */
10436 mp->min_address = -65536;
10438 if (max_mp == NULL)
10440 mp->max_address = max_address;
10442 mp->prev = minipool_vector_tail;
10444 if (mp->prev == NULL)
10446 minipool_vector_head = mp;
10447 minipool_vector_label = gen_label_rtx ();
10450 mp->prev->next = mp;
10452 minipool_vector_tail = mp;
10456 if (max_address > max_mp->max_address - mp->fix_size)
10457 mp->max_address = max_mp->max_address - mp->fix_size;
10459 mp->max_address = max_address;
10462 mp->prev = max_mp->prev;
10464 if (mp->prev != NULL)
10465 mp->prev->next = mp;
10467 minipool_vector_head = mp;
10470 /* Save the new entry. */
10473 /* Scan over the preceding entries and adjust their addresses as
10475 while (mp->prev != NULL
10476 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10478 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10486 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10487 HOST_WIDE_INT min_address)
10489 HOST_WIDE_INT offset;
10491 /* The code below assumes these are different. */
10492 gcc_assert (mp != min_mp);
10494 if (min_mp == NULL)
10496 if (min_address > mp->min_address)
10497 mp->min_address = min_address;
10501 /* We will adjust this below if it is too loose. */
10502 mp->min_address = min_address;
10504 /* Unlink MP from its current position. Since min_mp is non-null,
10505 mp->next must be non-null. */
10506 mp->next->prev = mp->prev;
10507 if (mp->prev != NULL)
10508 mp->prev->next = mp->next;
10510 minipool_vector_head = mp->next;
10512 /* Reinsert it after MIN_MP. */
10514 mp->next = min_mp->next;
10516 if (mp->next != NULL)
10517 mp->next->prev = mp;
10519 minipool_vector_tail = mp;
10525 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10527 mp->offset = offset;
10528 if (mp->refcount > 0)
10529 offset += mp->fix_size;
10531 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10532 mp->next->min_address = mp->min_address + mp->fix_size;
10538 /* Add a constant to the minipool for a backward reference. Returns the
10539 node added or NULL if the constant will not fit in this pool.
10541 Note that the code for insertion for a backwards reference can be
10542 somewhat confusing because the calculated offsets for each fix do
10543 not take into account the size of the pool (which is still under
10546 add_minipool_backward_ref (Mfix *fix)
10548 /* If set, min_mp is the last pool_entry that has a lower constraint
10549 than the one we are trying to add. */
10550 Mnode *min_mp = NULL;
10551 /* This can be negative, since it is only a constraint. */
10552 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10555 /* If we can't reach the current pool from this insn, or if we can't
10556 insert this entry at the end of the pool without pushing other
10557 fixes out of range, then we don't try. This ensures that we
10558 can't fail later on. */
10559 if (min_address >= minipool_barrier->address
10560 || (minipool_vector_tail->min_address + fix->fix_size
10561 >= minipool_barrier->address))
10564 /* Scan the pool to see if a constant with the same value has
10565 already been added. While we are doing this, also note the
10566 location where we must insert the constant if it doesn't already
10568 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10570 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10571 && fix->mode == mp->mode
10572 && (GET_CODE (fix->value) != CODE_LABEL
10573 || (CODE_LABEL_NUMBER (fix->value)
10574 == CODE_LABEL_NUMBER (mp->value)))
10575 && rtx_equal_p (fix->value, mp->value)
10576 /* Check that there is enough slack to move this entry to the
10577 end of the table (this is conservative). */
10578 && (mp->max_address
10579 > (minipool_barrier->address
10580 + minipool_vector_tail->offset
10581 + minipool_vector_tail->fix_size)))
10584 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10587 if (min_mp != NULL)
10588 mp->min_address += fix->fix_size;
10591 /* Note the insertion point if necessary. */
10592 if (mp->min_address < min_address)
10594 /* For now, we do not allow the insertion of 8-byte alignment
10595 requiring nodes anywhere but at the start of the pool. */
10596 if (ARM_DOUBLEWORD_ALIGN
10597 && fix->fix_size >= 8 && mp->fix_size < 8)
10602 else if (mp->max_address
10603 < minipool_barrier->address + mp->offset + fix->fix_size)
10605 /* Inserting before this entry would push the fix beyond
10606 its maximum address (which can happen if we have
10607 re-located a forwards fix); force the new fix to come
10609 if (ARM_DOUBLEWORD_ALIGN
10610 && fix->fix_size >= 8 && mp->fix_size < 8)
10615 min_address = mp->min_address + fix->fix_size;
10618 /* Do not insert a non-8-byte aligned quantity before 8-byte
10619 aligned quantities. */
10620 else if (ARM_DOUBLEWORD_ALIGN
10621 && fix->fix_size < 8
10622 && mp->fix_size >= 8)
10625 min_address = mp->min_address + fix->fix_size;
10630 /* We need to create a new entry. */
10632 mp->fix_size = fix->fix_size;
10633 mp->mode = fix->mode;
10634 mp->value = fix->value;
10636 mp->max_address = minipool_barrier->address + 65536;
10638 mp->min_address = min_address;
10640 if (min_mp == NULL)
10643 mp->next = minipool_vector_head;
10645 if (mp->next == NULL)
10647 minipool_vector_tail = mp;
10648 minipool_vector_label = gen_label_rtx ();
10651 mp->next->prev = mp;
10653 minipool_vector_head = mp;
10657 mp->next = min_mp->next;
10661 if (mp->next != NULL)
10662 mp->next->prev = mp;
10664 minipool_vector_tail = mp;
10667 /* Save the new entry. */
10675 /* Scan over the following entries and adjust their offsets. */
10676 while (mp->next != NULL)
10678 if (mp->next->min_address < mp->min_address + mp->fix_size)
10679 mp->next->min_address = mp->min_address + mp->fix_size;
10682 mp->next->offset = mp->offset + mp->fix_size;
10684 mp->next->offset = mp->offset;
10693 assign_minipool_offsets (Mfix *barrier)
10695 HOST_WIDE_INT offset = 0;
10698 minipool_barrier = barrier;
10700 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10702 mp->offset = offset;
10704 if (mp->refcount > 0)
10705 offset += mp->fix_size;
10709 /* Output the literal table */
10711 dump_minipool (rtx scan)
10717 if (ARM_DOUBLEWORD_ALIGN)
10718 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10719 if (mp->refcount > 0 && mp->fix_size >= 8)
10726 fprintf (dump_file,
10727 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10728 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10730 scan = emit_label_after (gen_label_rtx (), scan);
10731 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10732 scan = emit_label_after (minipool_vector_label, scan);
10734 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10736 if (mp->refcount > 0)
10740 fprintf (dump_file,
10741 ";; Offset %u, min %ld, max %ld ",
10742 (unsigned) mp->offset, (unsigned long) mp->min_address,
10743 (unsigned long) mp->max_address);
10744 arm_print_value (dump_file, mp->value);
10745 fputc ('\n', dump_file);
10748 switch (mp->fix_size)
10750 #ifdef HAVE_consttable_1
10752 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10756 #ifdef HAVE_consttable_2
10758 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10762 #ifdef HAVE_consttable_4
10764 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10768 #ifdef HAVE_consttable_8
10770 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10774 #ifdef HAVE_consttable_16
10776 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10781 gcc_unreachable ();
10789 minipool_vector_head = minipool_vector_tail = NULL;
10790 scan = emit_insn_after (gen_consttable_end (), scan);
10791 scan = emit_barrier_after (scan);
10794 /* Return the cost of forcibly inserting a barrier after INSN. */
10796 arm_barrier_cost (rtx insn)
10798 /* Basing the location of the pool on the loop depth is preferable,
10799 but at the moment, the basic block information seems to be
10800 corrupt by this stage of the compilation. */
10801 int base_cost = 50;
10802 rtx next = next_nonnote_insn (insn);
10804 if (next != NULL && GET_CODE (next) == CODE_LABEL)
10807 switch (GET_CODE (insn))
10810 /* It will always be better to place the table before the label, rather
10819 return base_cost - 10;
10822 return base_cost + 10;
10826 /* Find the best place in the insn stream in the range
10827 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
10828 Create the barrier by inserting a jump and add a new fix entry for
10831 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
10833 HOST_WIDE_INT count = 0;
10835 rtx from = fix->insn;
10836 /* The instruction after which we will insert the jump. */
10837 rtx selected = NULL;
10839 /* The address at which the jump instruction will be placed. */
10840 HOST_WIDE_INT selected_address;
10842 HOST_WIDE_INT max_count = max_address - fix->address;
10843 rtx label = gen_label_rtx ();
10845 selected_cost = arm_barrier_cost (from);
10846 selected_address = fix->address;
10848 while (from && count < max_count)
10853 /* This code shouldn't have been called if there was a natural barrier
10855 gcc_assert (GET_CODE (from) != BARRIER);
10857 /* Count the length of this insn. */
10858 count += get_attr_length (from);
10860 /* If there is a jump table, add its length. */
10861 tmp = is_jump_table (from);
10864 count += get_jump_table_size (tmp);
10866 /* Jump tables aren't in a basic block, so base the cost on
10867 the dispatch insn. If we select this location, we will
10868 still put the pool after the table. */
10869 new_cost = arm_barrier_cost (from);
10871 if (count < max_count
10872 && (!selected || new_cost <= selected_cost))
10875 selected_cost = new_cost;
10876 selected_address = fix->address + count;
10879 /* Continue after the dispatch table. */
10880 from = NEXT_INSN (tmp);
10884 new_cost = arm_barrier_cost (from);
10886 if (count < max_count
10887 && (!selected || new_cost <= selected_cost))
10890 selected_cost = new_cost;
10891 selected_address = fix->address + count;
10894 from = NEXT_INSN (from);
10897 /* Make sure that we found a place to insert the jump. */
10898 gcc_assert (selected);
10900 /* Create a new JUMP_INSN that branches around a barrier. */
10901 from = emit_jump_insn_after (gen_jump (label), selected);
10902 JUMP_LABEL (from) = label;
10903 barrier = emit_barrier_after (from);
10904 emit_label_after (label, barrier);
10906 /* Create a minipool barrier entry for the new barrier. */
10907 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
10908 new_fix->insn = barrier;
10909 new_fix->address = selected_address;
10910 new_fix->next = fix->next;
10911 fix->next = new_fix;
10916 /* Record that there is a natural barrier in the insn stream at
10919 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
10921 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10924 fix->address = address;
10927 if (minipool_fix_head != NULL)
10928 minipool_fix_tail->next = fix;
10930 minipool_fix_head = fix;
10932 minipool_fix_tail = fix;
10935 /* Record INSN, which will need fixing up to load a value from the
10936 minipool. ADDRESS is the offset of the insn since the start of the
10937 function; LOC is a pointer to the part of the insn which requires
10938 fixing; VALUE is the constant that must be loaded, which is of type
10941 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
10942 enum machine_mode mode, rtx value)
10944 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
10947 fix->address = address;
10950 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
10951 fix->value = value;
10952 fix->forwards = get_attr_pool_range (insn);
10953 fix->backwards = get_attr_neg_pool_range (insn);
10954 fix->minipool = NULL;
10956 /* If an insn doesn't have a range defined for it, then it isn't
10957 expecting to be reworked by this code. Better to stop now than
10958 to generate duff assembly code. */
10959 gcc_assert (fix->forwards || fix->backwards);
10961 /* If an entry requires 8-byte alignment then assume all constant pools
10962 require 4 bytes of padding. Trying to do this later on a per-pool
10963 basis is awkward because existing pool entries have to be modified. */
10964 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
10969 fprintf (dump_file,
10970 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
10971 GET_MODE_NAME (mode),
10972 INSN_UID (insn), (unsigned long) address,
10973 -1 * (long)fix->backwards, (long)fix->forwards);
10974 arm_print_value (dump_file, fix->value);
10975 fprintf (dump_file, "\n");
10978 /* Add it to the chain of fixes. */
10981 if (minipool_fix_head != NULL)
10982 minipool_fix_tail->next = fix;
10984 minipool_fix_head = fix;
10986 minipool_fix_tail = fix;
10989 /* Return the cost of synthesizing a 64-bit constant VAL inline.
10990 Returns the number of insns needed, or 99 if we don't know how to
10993 arm_const_double_inline_cost (rtx val)
10995 rtx lowpart, highpart;
10996 enum machine_mode mode;
10998 mode = GET_MODE (val);
11000 if (mode == VOIDmode)
11003 gcc_assert (GET_MODE_SIZE (mode) == 8);
11005 lowpart = gen_lowpart (SImode, val);
11006 highpart = gen_highpart_mode (SImode, mode, val);
11008 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11009 gcc_assert (GET_CODE (highpart) == CONST_INT);
11011 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11012 NULL_RTX, NULL_RTX, 0, 0)
11013 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11014 NULL_RTX, NULL_RTX, 0, 0));
11017 /* Return true if it is worthwhile to split a 64-bit constant into two
11018 32-bit operations. This is the case if optimizing for size, or
11019 if we have load delay slots, or if one 32-bit part can be done with
11020 a single data operation. */
11022 arm_const_double_by_parts (rtx val)
11024 enum machine_mode mode = GET_MODE (val);
11027 if (optimize_size || arm_ld_sched)
11030 if (mode == VOIDmode)
11033 part = gen_highpart_mode (SImode, mode, val);
11035 gcc_assert (GET_CODE (part) == CONST_INT);
11037 if (const_ok_for_arm (INTVAL (part))
11038 || const_ok_for_arm (~INTVAL (part)))
11041 part = gen_lowpart (SImode, val);
11043 gcc_assert (GET_CODE (part) == CONST_INT);
11045 if (const_ok_for_arm (INTVAL (part))
11046 || const_ok_for_arm (~INTVAL (part)))
11052 /* Scan INSN and note any of its operands that need fixing.
11053 If DO_PUSHES is false we do not actually push any of the fixups
11054 needed. The function returns TRUE if any fixups were needed/pushed.
11055 This is used by arm_memory_load_p() which needs to know about loads
11056 of constants that will be converted into minipool loads. */
11058 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11060 bool result = false;
11063 extract_insn (insn);
11065 if (!constrain_operands (1))
11066 fatal_insn_not_found (insn);
11068 if (recog_data.n_alternatives == 0)
11071 /* Fill in recog_op_alt with information about the constraints of
11073 preprocess_constraints ();
11075 for (opno = 0; opno < recog_data.n_operands; opno++)
11077 /* Things we need to fix can only occur in inputs. */
11078 if (recog_data.operand_type[opno] != OP_IN)
11081 /* If this alternative is a memory reference, then any mention
11082 of constants in this alternative is really to fool reload
11083 into allowing us to accept one there. We need to fix them up
11084 now so that we output the right code. */
11085 if (recog_op_alt[opno][which_alternative].memory_ok)
11087 rtx op = recog_data.operand[opno];
11089 if (CONSTANT_P (op))
11092 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11093 recog_data.operand_mode[opno], op);
11096 else if (GET_CODE (op) == MEM
11097 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11098 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11102 rtx cop = avoid_constant_pool_reference (op);
11104 /* Casting the address of something to a mode narrower
11105 than a word can cause avoid_constant_pool_reference()
11106 to return the pool reference itself. That's no good to
11107 us here. Lets just hope that we can use the
11108 constant pool value directly. */
11110 cop = get_pool_constant (XEXP (op, 0));
11112 push_minipool_fix (insn, address,
11113 recog_data.operand_loc[opno],
11114 recog_data.operand_mode[opno], cop);
11125 /* Gcc puts the pool in the wrong place for ARM, since we can only
11126 load addresses a limited distance around the pc. We do some
11127 special munging to move the constant pool values to the correct
11128 point in the code. */
11133 HOST_WIDE_INT address = 0;
11136 minipool_fix_head = minipool_fix_tail = NULL;
11138 /* The first insn must always be a note, or the code below won't
11139 scan it properly. */
11140 insn = get_insns ();
11141 gcc_assert (GET_CODE (insn) == NOTE);
11144 /* Scan all the insns and record the operands that will need fixing. */
11145 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11147 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11148 && (arm_cirrus_insn_p (insn)
11149 || GET_CODE (insn) == JUMP_INSN
11150 || arm_memory_load_p (insn)))
11151 cirrus_reorg (insn);
11153 if (GET_CODE (insn) == BARRIER)
11154 push_minipool_barrier (insn, address);
11155 else if (INSN_P (insn))
11159 note_invalid_constants (insn, address, true);
11160 address += get_attr_length (insn);
11162 /* If the insn is a vector jump, add the size of the table
11163 and skip the table. */
11164 if ((table = is_jump_table (insn)) != NULL)
11166 address += get_jump_table_size (table);
11172 fix = minipool_fix_head;
11174 /* Now scan the fixups and perform the required changes. */
11179 Mfix * last_added_fix;
11180 Mfix * last_barrier = NULL;
11183 /* Skip any further barriers before the next fix. */
11184 while (fix && GET_CODE (fix->insn) == BARRIER)
11187 /* No more fixes. */
11191 last_added_fix = NULL;
11193 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11195 if (GET_CODE (ftmp->insn) == BARRIER)
11197 if (ftmp->address >= minipool_vector_head->max_address)
11200 last_barrier = ftmp;
11202 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11205 last_added_fix = ftmp; /* Keep track of the last fix added. */
11208 /* If we found a barrier, drop back to that; any fixes that we
11209 could have reached but come after the barrier will now go in
11210 the next mini-pool. */
11211 if (last_barrier != NULL)
11213 /* Reduce the refcount for those fixes that won't go into this
11215 for (fdel = last_barrier->next;
11216 fdel && fdel != ftmp;
11219 fdel->minipool->refcount--;
11220 fdel->minipool = NULL;
11223 ftmp = last_barrier;
11227 /* ftmp is first fix that we can't fit into this pool and
11228 there no natural barriers that we could use. Insert a
11229 new barrier in the code somewhere between the previous
11230 fix and this one, and arrange to jump around it. */
11231 HOST_WIDE_INT max_address;
11233 /* The last item on the list of fixes must be a barrier, so
11234 we can never run off the end of the list of fixes without
11235 last_barrier being set. */
11238 max_address = minipool_vector_head->max_address;
11239 /* Check that there isn't another fix that is in range that
11240 we couldn't fit into this pool because the pool was
11241 already too large: we need to put the pool before such an
11242 instruction. The pool itself may come just after the
11243 fix because create_fix_barrier also allows space for a
11244 jump instruction. */
11245 if (ftmp->address < max_address)
11246 max_address = ftmp->address + 1;
11248 last_barrier = create_fix_barrier (last_added_fix, max_address);
11251 assign_minipool_offsets (last_barrier);
11255 if (GET_CODE (ftmp->insn) != BARRIER
11256 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11263 /* Scan over the fixes we have identified for this pool, fixing them
11264 up and adding the constants to the pool itself. */
11265 for (this_fix = fix; this_fix && ftmp != this_fix;
11266 this_fix = this_fix->next)
11267 if (GET_CODE (this_fix->insn) != BARRIER)
11270 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11271 minipool_vector_label),
11272 this_fix->minipool->offset);
11273 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11276 dump_minipool (last_barrier->insn);
11280 /* From now on we must synthesize any constants that we can't handle
11281 directly. This can happen if the RTL gets split during final
11282 instruction generation. */
11283 after_arm_reorg = 1;
11285 /* Free the minipool memory. */
11286 obstack_free (&minipool_obstack, minipool_startobj);
11289 /* Routines to output assembly language. */
11291 /* If the rtx is the correct value then return the string of the number.
11292 In this way we can ensure that valid double constants are generated even
11293 when cross compiling. */
11295 fp_immediate_constant (rtx x)
11300 if (!fp_consts_inited)
11303 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11304 for (i = 0; i < 8; i++)
11305 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11306 return strings_fp[i];
11308 gcc_unreachable ();
11311 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11312 static const char *
11313 fp_const_from_val (REAL_VALUE_TYPE *r)
11317 if (!fp_consts_inited)
11320 for (i = 0; i < 8; i++)
11321 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11322 return strings_fp[i];
11324 gcc_unreachable ();
11327 /* Output the operands of a LDM/STM instruction to STREAM.
11328 MASK is the ARM register set mask of which only bits 0-15 are important.
11329 REG is the base register, either the frame pointer or the stack pointer,
11330 INSTR is the possibly suffixed load or store instruction.
11331 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11334 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11335 unsigned long mask, int rfe)
11338 bool not_first = FALSE;
11340 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11341 fputc ('\t', stream);
11342 asm_fprintf (stream, instr, reg);
11343 fputc ('{', stream);
11345 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11346 if (mask & (1 << i))
11349 fprintf (stream, ", ");
11351 asm_fprintf (stream, "%r", i);
11356 fprintf (stream, "}^\n");
11358 fprintf (stream, "}\n");
11362 /* Output a FLDMD instruction to STREAM.
11363 BASE if the register containing the address.
11364 REG and COUNT specify the register range.
11365 Extra registers may be added to avoid hardware bugs.
11367 We output FLDMD even for ARMv5 VFP implementations. Although
11368 FLDMD is technically not supported until ARMv6, it is believed
11369 that all VFP implementations support its use in this context. */
11372 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11376 /* Workaround ARM10 VFPr1 bug. */
11377 if (count == 2 && !arm_arch6)
11384 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11385 load into multiple parts if we have to handle more than 16 registers. */
11388 vfp_output_fldmd (stream, base, reg, 16);
11389 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11393 fputc ('\t', stream);
11394 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11396 for (i = reg; i < reg + count; i++)
11399 fputs (", ", stream);
11400 asm_fprintf (stream, "d%d", i);
11402 fputs ("}\n", stream);
11407 /* Output the assembly for a store multiple. */
11410 vfp_output_fstmd (rtx * operands)
11417 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11418 p = strlen (pattern);
11420 gcc_assert (GET_CODE (operands[1]) == REG);
11422 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11423 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11425 p += sprintf (&pattern[p], ", d%d", base + i);
11427 strcpy (&pattern[p], "}");
11429 output_asm_insn (pattern, operands);
11434 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11435 number of bytes pushed. */
11438 vfp_emit_fstmd (int base_reg, int count)
11445 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11446 register pairs are stored by a store multiple insn. We avoid this
11447 by pushing an extra pair. */
11448 if (count == 2 && !arm_arch6)
11450 if (base_reg == LAST_VFP_REGNUM - 3)
11455 /* FSTMD may not store more than 16 doubleword registers at once. Split
11456 larger stores into multiple parts (up to a maximum of two, in
11461 /* NOTE: base_reg is an internal register number, so each D register
11463 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11464 saved += vfp_emit_fstmd (base_reg, 16);
11468 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11469 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11471 reg = gen_rtx_REG (DFmode, base_reg);
11474 XVECEXP (par, 0, 0)
11475 = gen_rtx_SET (VOIDmode,
11476 gen_frame_mem (BLKmode,
11477 gen_rtx_PRE_DEC (BLKmode,
11478 stack_pointer_rtx)),
11479 gen_rtx_UNSPEC (BLKmode,
11480 gen_rtvec (1, reg),
11481 UNSPEC_PUSH_MULT));
11483 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11484 plus_constant (stack_pointer_rtx, -(count * 8)));
11485 RTX_FRAME_RELATED_P (tmp) = 1;
11486 XVECEXP (dwarf, 0, 0) = tmp;
11488 tmp = gen_rtx_SET (VOIDmode,
11489 gen_frame_mem (DFmode, stack_pointer_rtx),
11491 RTX_FRAME_RELATED_P (tmp) = 1;
11492 XVECEXP (dwarf, 0, 1) = tmp;
11494 for (i = 1; i < count; i++)
11496 reg = gen_rtx_REG (DFmode, base_reg);
11498 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11500 tmp = gen_rtx_SET (VOIDmode,
11501 gen_frame_mem (DFmode,
11502 plus_constant (stack_pointer_rtx,
11505 RTX_FRAME_RELATED_P (tmp) = 1;
11506 XVECEXP (dwarf, 0, i + 1) = tmp;
11509 par = emit_insn (par);
11510 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11511 RTX_FRAME_RELATED_P (par) = 1;
11516 /* Emit a call instruction with pattern PAT. ADDR is the address of
11517 the call target. */
11520 arm_emit_call_insn (rtx pat, rtx addr)
11524 insn = emit_call_insn (pat);
11526 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11527 If the call might use such an entry, add a use of the PIC register
11528 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11529 if (TARGET_VXWORKS_RTP
11531 && GET_CODE (addr) == SYMBOL_REF
11532 && (SYMBOL_REF_DECL (addr)
11533 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11534 : !SYMBOL_REF_LOCAL_P (addr)))
11536 require_pic_register ();
11537 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11541 /* Output a 'call' insn. */
11543 output_call (rtx *operands)
11545 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11547 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11548 if (REGNO (operands[0]) == LR_REGNUM)
11550 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11551 output_asm_insn ("mov%?\t%0, %|lr", operands);
11554 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11556 if (TARGET_INTERWORK || arm_arch4t)
11557 output_asm_insn ("bx%?\t%0", operands);
11559 output_asm_insn ("mov%?\t%|pc, %0", operands);
11564 /* Output a 'call' insn that is a reference in memory. */
11566 output_call_mem (rtx *operands)
11568 if (TARGET_INTERWORK && !arm_arch5)
11570 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11571 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11572 output_asm_insn ("bx%?\t%|ip", operands);
11574 else if (regno_use_in (LR_REGNUM, operands[0]))
11576 /* LR is used in the memory address. We load the address in the
11577 first instruction. It's safe to use IP as the target of the
11578 load since the call will kill it anyway. */
11579 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11581 output_asm_insn ("blx%?\t%|ip", operands);
11584 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11586 output_asm_insn ("bx%?\t%|ip", operands);
11588 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11593 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11594 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11601 /* Output a move from arm registers to an fpa registers.
11602 OPERANDS[0] is an fpa register.
11603 OPERANDS[1] is the first registers of an arm register pair. */
11605 output_mov_long_double_fpa_from_arm (rtx *operands)
11607 int arm_reg0 = REGNO (operands[1]);
11610 gcc_assert (arm_reg0 != IP_REGNUM);
11612 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11613 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11614 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11616 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11617 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11622 /* Output a move from an fpa register to arm registers.
11623 OPERANDS[0] is the first registers of an arm register pair.
11624 OPERANDS[1] is an fpa register. */
11626 output_mov_long_double_arm_from_fpa (rtx *operands)
11628 int arm_reg0 = REGNO (operands[0]);
11631 gcc_assert (arm_reg0 != IP_REGNUM);
11633 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11634 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11635 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11637 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11638 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11642 /* Output a move from arm registers to arm registers of a long double
11643 OPERANDS[0] is the destination.
11644 OPERANDS[1] is the source. */
11646 output_mov_long_double_arm_from_arm (rtx *operands)
11648 /* We have to be careful here because the two might overlap. */
11649 int dest_start = REGNO (operands[0]);
11650 int src_start = REGNO (operands[1]);
11654 if (dest_start < src_start)
11656 for (i = 0; i < 3; i++)
11658 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11659 ops[1] = gen_rtx_REG (SImode, src_start + i);
11660 output_asm_insn ("mov%?\t%0, %1", ops);
11665 for (i = 2; i >= 0; i--)
11667 ops[0] = gen_rtx_REG (SImode, dest_start + i);
11668 ops[1] = gen_rtx_REG (SImode, src_start + i);
11669 output_asm_insn ("mov%?\t%0, %1", ops);
11677 arm_emit_movpair (rtx dest, rtx src)
11679 /* If the src is an immediate, simplify it. */
11680 if (CONST_INT_P (src))
11682 HOST_WIDE_INT val = INTVAL (src);
11683 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11684 if ((val >> 16) & 0x0000ffff)
11685 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11687 GEN_INT ((val >> 16) & 0x0000ffff));
11690 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11691 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11694 /* Output a move from arm registers to an fpa registers.
11695 OPERANDS[0] is an fpa register.
11696 OPERANDS[1] is the first registers of an arm register pair. */
11698 output_mov_double_fpa_from_arm (rtx *operands)
11700 int arm_reg0 = REGNO (operands[1]);
11703 gcc_assert (arm_reg0 != IP_REGNUM);
11705 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11706 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11707 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11708 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11712 /* Output a move from an fpa register to arm registers.
11713 OPERANDS[0] is the first registers of an arm register pair.
11714 OPERANDS[1] is an fpa register. */
11716 output_mov_double_arm_from_fpa (rtx *operands)
11718 int arm_reg0 = REGNO (operands[0]);
11721 gcc_assert (arm_reg0 != IP_REGNUM);
11723 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11724 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11725 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11726 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11730 /* Output a move between double words.
11731 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11732 or MEM<-REG and all MEMs must be offsettable addresses. */
11734 output_move_double (rtx *operands)
11736 enum rtx_code code0 = GET_CODE (operands[0]);
11737 enum rtx_code code1 = GET_CODE (operands[1]);
11742 unsigned int reg0 = REGNO (operands[0]);
11744 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11746 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
11748 switch (GET_CODE (XEXP (operands[1], 0)))
11752 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11753 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11755 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11759 gcc_assert (TARGET_LDRD);
11760 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11765 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11767 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11772 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11774 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11778 gcc_assert (TARGET_LDRD);
11779 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11784 /* Autoicrement addressing modes should never have overlapping
11785 base and destination registers, and overlapping index registers
11786 are already prohibited, so this doesn't need to worry about
11788 otherops[0] = operands[0];
11789 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11790 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11792 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
11794 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
11796 /* Registers overlap so split out the increment. */
11797 output_asm_insn ("add%?\t%1, %1, %2", otherops);
11798 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
11802 /* Use a single insn if we can.
11803 FIXME: IWMMXT allows offsets larger than ldrd can
11804 handle, fix these up with a pair of ldr. */
11806 || GET_CODE (otherops[2]) != CONST_INT
11807 || (INTVAL (otherops[2]) > -256
11808 && INTVAL (otherops[2]) < 256))
11809 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
11812 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
11813 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11819 /* Use a single insn if we can.
11820 FIXME: IWMMXT allows offsets larger than ldrd can handle,
11821 fix these up with a pair of ldr. */
11823 || GET_CODE (otherops[2]) != CONST_INT
11824 || (INTVAL (otherops[2]) > -256
11825 && INTVAL (otherops[2]) < 256))
11826 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
11829 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
11830 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
11837 /* We might be able to use ldrd %0, %1 here. However the range is
11838 different to ldr/adr, and it is broken on some ARMv7-M
11839 implementations. */
11840 /* Use the second register of the pair to avoid problematic
11842 otherops[1] = operands[1];
11843 output_asm_insn ("adr%?\t%0, %1", otherops);
11844 operands[1] = otherops[0];
11846 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11848 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
11851 /* ??? This needs checking for thumb2. */
11853 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
11854 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
11856 otherops[0] = operands[0];
11857 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
11858 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
11860 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
11862 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
11864 switch ((int) INTVAL (otherops[2]))
11867 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
11872 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
11877 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
11881 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
11882 operands[1] = otherops[0];
11884 && (GET_CODE (otherops[2]) == REG
11886 || (GET_CODE (otherops[2]) == CONST_INT
11887 && INTVAL (otherops[2]) > -256
11888 && INTVAL (otherops[2]) < 256)))
11890 if (reg_overlap_mentioned_p (operands[0],
11894 /* Swap base and index registers over to
11895 avoid a conflict. */
11897 otherops[1] = otherops[2];
11900 /* If both registers conflict, it will usually
11901 have been fixed by a splitter. */
11902 if (reg_overlap_mentioned_p (operands[0], otherops[2])
11903 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
11905 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11906 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
11910 otherops[0] = operands[0];
11911 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
11916 if (GET_CODE (otherops[2]) == CONST_INT)
11918 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
11919 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
11921 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11924 output_asm_insn ("add%?\t%0, %1, %2", otherops);
11927 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
11930 return "ldr%(d%)\t%0, [%1]";
11932 return "ldm%(ia%)\t%1, %M0";
11936 otherops[1] = adjust_address (operands[1], SImode, 4);
11937 /* Take care of overlapping base/data reg. */
11938 if (reg_mentioned_p (operands[0], operands[1]))
11940 output_asm_insn ("ldr%?\t%0, %1", otherops);
11941 output_asm_insn ("ldr%?\t%0, %1", operands);
11945 output_asm_insn ("ldr%?\t%0, %1", operands);
11946 output_asm_insn ("ldr%?\t%0, %1", otherops);
11953 /* Constraints should ensure this. */
11954 gcc_assert (code0 == MEM && code1 == REG);
11955 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
11957 switch (GET_CODE (XEXP (operands[0], 0)))
11961 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
11963 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11967 gcc_assert (TARGET_LDRD);
11968 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
11973 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
11975 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
11980 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
11982 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
11986 gcc_assert (TARGET_LDRD);
11987 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
11992 otherops[0] = operands[1];
11993 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
11994 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
11996 /* IWMMXT allows offsets larger than ldrd can handle,
11997 fix these up with a pair of ldr. */
11999 && GET_CODE (otherops[2]) == CONST_INT
12000 && (INTVAL(otherops[2]) <= -256
12001 || INTVAL(otherops[2]) >= 256))
12003 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12005 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12006 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12010 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12011 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12014 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12015 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12017 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12021 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12022 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12024 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12027 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12033 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12039 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12044 && (GET_CODE (otherops[2]) == REG
12046 || (GET_CODE (otherops[2]) == CONST_INT
12047 && INTVAL (otherops[2]) > -256
12048 && INTVAL (otherops[2]) < 256)))
12050 otherops[0] = operands[1];
12051 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12052 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12058 otherops[0] = adjust_address (operands[0], SImode, 4);
12059 otherops[1] = operands[1];
12060 output_asm_insn ("str%?\t%1, %0", operands);
12061 output_asm_insn ("str%?\t%H1, %0", otherops);
12068 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12069 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12072 output_move_quad (rtx *operands)
12074 if (REG_P (operands[0]))
12076 /* Load, or reg->reg move. */
12078 if (MEM_P (operands[1]))
12080 switch (GET_CODE (XEXP (operands[1], 0)))
12083 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12088 output_asm_insn ("adr%?\t%0, %1", operands);
12089 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12093 gcc_unreachable ();
12101 gcc_assert (REG_P (operands[1]));
12103 dest = REGNO (operands[0]);
12104 src = REGNO (operands[1]);
12106 /* This seems pretty dumb, but hopefully GCC won't try to do it
12109 for (i = 0; i < 4; i++)
12111 ops[0] = gen_rtx_REG (SImode, dest + i);
12112 ops[1] = gen_rtx_REG (SImode, src + i);
12113 output_asm_insn ("mov%?\t%0, %1", ops);
12116 for (i = 3; i >= 0; i--)
12118 ops[0] = gen_rtx_REG (SImode, dest + i);
12119 ops[1] = gen_rtx_REG (SImode, src + i);
12120 output_asm_insn ("mov%?\t%0, %1", ops);
12126 gcc_assert (MEM_P (operands[0]));
12127 gcc_assert (REG_P (operands[1]));
12128 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12130 switch (GET_CODE (XEXP (operands[0], 0)))
12133 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12137 gcc_unreachable ();
12144 /* Output a VFP load or store instruction. */
12147 output_move_vfp (rtx *operands)
12149 rtx reg, mem, addr, ops[2];
12150 int load = REG_P (operands[0]);
12151 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12152 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12155 enum machine_mode mode;
12157 reg = operands[!load];
12158 mem = operands[load];
12160 mode = GET_MODE (reg);
12162 gcc_assert (REG_P (reg));
12163 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12164 gcc_assert (mode == SFmode
12168 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12169 gcc_assert (MEM_P (mem));
12171 addr = XEXP (mem, 0);
12173 switch (GET_CODE (addr))
12176 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12177 ops[0] = XEXP (addr, 0);
12182 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12183 ops[0] = XEXP (addr, 0);
12188 templ = "f%s%c%%?\t%%%s0, %%1%s";
12194 sprintf (buff, templ,
12195 load ? "ld" : "st",
12198 integer_p ? "\t%@ int" : "");
12199 output_asm_insn (buff, ops);
12204 /* Output a Neon quad-word load or store, or a load or store for
12205 larger structure modes.
12207 WARNING: The ordering of elements is weird in big-endian mode,
12208 because we use VSTM, as required by the EABI. GCC RTL defines
12209 element ordering based on in-memory order. This can be differ
12210 from the architectural ordering of elements within a NEON register.
12211 The intrinsics defined in arm_neon.h use the NEON register element
12212 ordering, not the GCC RTL element ordering.
12214 For example, the in-memory ordering of a big-endian a quadword
12215 vector with 16-bit elements when stored from register pair {d0,d1}
12216 will be (lowest address first, d0[N] is NEON register element N):
12218 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12220 When necessary, quadword registers (dN, dN+1) are moved to ARM
12221 registers from rN in the order:
12223 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12225 So that STM/LDM can be used on vectors in ARM registers, and the
12226 same memory layout will result as if VSTM/VLDM were used. */
12229 output_move_neon (rtx *operands)
12231 rtx reg, mem, addr, ops[2];
12232 int regno, load = REG_P (operands[0]);
12235 enum machine_mode mode;
12237 reg = operands[!load];
12238 mem = operands[load];
12240 mode = GET_MODE (reg);
12242 gcc_assert (REG_P (reg));
12243 regno = REGNO (reg);
12244 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12245 || NEON_REGNO_OK_FOR_QUAD (regno));
12246 gcc_assert (VALID_NEON_DREG_MODE (mode)
12247 || VALID_NEON_QREG_MODE (mode)
12248 || VALID_NEON_STRUCT_MODE (mode));
12249 gcc_assert (MEM_P (mem));
12251 addr = XEXP (mem, 0);
12253 /* Strip off const from addresses like (const (plus (...))). */
12254 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12255 addr = XEXP (addr, 0);
12257 switch (GET_CODE (addr))
12260 templ = "v%smia%%?\t%%0!, %%h1";
12261 ops[0] = XEXP (addr, 0);
12266 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12267 templ = "v%smdb%%?\t%%0!, %%h1";
12268 ops[0] = XEXP (addr, 0);
12273 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12274 gcc_unreachable ();
12279 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12282 for (i = 0; i < nregs; i++)
12284 /* We're only using DImode here because it's a convenient size. */
12285 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12286 ops[1] = adjust_address (mem, DImode, 8 * i);
12287 if (reg_overlap_mentioned_p (ops[0], mem))
12289 gcc_assert (overlap == -1);
12294 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12295 output_asm_insn (buff, ops);
12300 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12301 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12302 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12303 output_asm_insn (buff, ops);
12310 templ = "v%smia%%?\t%%m0, %%h1";
12315 sprintf (buff, templ, load ? "ld" : "st");
12316 output_asm_insn (buff, ops);
12321 /* Output an ADD r, s, #n where n may be too big for one instruction.
12322 If adding zero to one register, output nothing. */
12324 output_add_immediate (rtx *operands)
12326 HOST_WIDE_INT n = INTVAL (operands[2]);
12328 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12331 output_multi_immediate (operands,
12332 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12335 output_multi_immediate (operands,
12336 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12343 /* Output a multiple immediate operation.
12344 OPERANDS is the vector of operands referred to in the output patterns.
12345 INSTR1 is the output pattern to use for the first constant.
12346 INSTR2 is the output pattern to use for subsequent constants.
12347 IMMED_OP is the index of the constant slot in OPERANDS.
12348 N is the constant value. */
12349 static const char *
12350 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12351 int immed_op, HOST_WIDE_INT n)
12353 #if HOST_BITS_PER_WIDE_INT > 32
12359 /* Quick and easy output. */
12360 operands[immed_op] = const0_rtx;
12361 output_asm_insn (instr1, operands);
12366 const char * instr = instr1;
12368 /* Note that n is never zero here (which would give no output). */
12369 for (i = 0; i < 32; i += 2)
12373 operands[immed_op] = GEN_INT (n & (255 << i));
12374 output_asm_insn (instr, operands);
12384 /* Return the name of a shifter operation. */
12385 static const char *
12386 arm_shift_nmem(enum rtx_code code)
12391 return ARM_LSL_NAME;
12407 /* Return the appropriate ARM instruction for the operation code.
12408 The returned result should not be overwritten. OP is the rtx of the
12409 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12412 arithmetic_instr (rtx op, int shift_first_arg)
12414 switch (GET_CODE (op))
12420 return shift_first_arg ? "rsb" : "sub";
12435 return arm_shift_nmem(GET_CODE(op));
12438 gcc_unreachable ();
12442 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12443 for the operation code. The returned result should not be overwritten.
12444 OP is the rtx code of the shift.
12445 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12447 static const char *
12448 shift_op (rtx op, HOST_WIDE_INT *amountp)
12451 enum rtx_code code = GET_CODE (op);
12453 switch (GET_CODE (XEXP (op, 1)))
12461 *amountp = INTVAL (XEXP (op, 1));
12465 gcc_unreachable ();
12471 gcc_assert (*amountp != -1);
12472 *amountp = 32 - *amountp;
12475 /* Fall through. */
12481 mnem = arm_shift_nmem(code);
12485 /* We never have to worry about the amount being other than a
12486 power of 2, since this case can never be reloaded from a reg. */
12487 gcc_assert (*amountp != -1);
12488 *amountp = int_log2 (*amountp);
12489 return ARM_LSL_NAME;
12492 gcc_unreachable ();
12495 if (*amountp != -1)
12497 /* This is not 100% correct, but follows from the desire to merge
12498 multiplication by a power of 2 with the recognizer for a
12499 shift. >=32 is not a valid shift for "lsl", so we must try and
12500 output a shift that produces the correct arithmetical result.
12501 Using lsr #32 is identical except for the fact that the carry bit
12502 is not set correctly if we set the flags; but we never use the
12503 carry bit from such an operation, so we can ignore that. */
12504 if (code == ROTATERT)
12505 /* Rotate is just modulo 32. */
12507 else if (*amountp != (*amountp & 31))
12509 if (code == ASHIFT)
12514 /* Shifts of 0 are no-ops. */
12522 /* Obtain the shift from the POWER of two. */
12524 static HOST_WIDE_INT
12525 int_log2 (HOST_WIDE_INT power)
12527 HOST_WIDE_INT shift = 0;
12529 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12531 gcc_assert (shift <= 31);
12538 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12539 because /bin/as is horribly restrictive. The judgement about
12540 whether or not each character is 'printable' (and can be output as
12541 is) or not (and must be printed with an octal escape) must be made
12542 with reference to the *host* character set -- the situation is
12543 similar to that discussed in the comments above pp_c_char in
12544 c-pretty-print.c. */
12546 #define MAX_ASCII_LEN 51
12549 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12552 int len_so_far = 0;
12554 fputs ("\t.ascii\t\"", stream);
12556 for (i = 0; i < len; i++)
12560 if (len_so_far >= MAX_ASCII_LEN)
12562 fputs ("\"\n\t.ascii\t\"", stream);
12568 if (c == '\\' || c == '\"')
12570 putc ('\\', stream);
12578 fprintf (stream, "\\%03o", c);
12583 fputs ("\"\n", stream);
12586 /* Compute the register save mask for registers 0 through 12
12587 inclusive. This code is used by arm_compute_save_reg_mask. */
12589 static unsigned long
12590 arm_compute_save_reg0_reg12_mask (void)
12592 unsigned long func_type = arm_current_func_type ();
12593 unsigned long save_reg_mask = 0;
12596 if (IS_INTERRUPT (func_type))
12598 unsigned int max_reg;
12599 /* Interrupt functions must not corrupt any registers,
12600 even call clobbered ones. If this is a leaf function
12601 we can just examine the registers used by the RTL, but
12602 otherwise we have to assume that whatever function is
12603 called might clobber anything, and so we have to save
12604 all the call-clobbered registers as well. */
12605 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12606 /* FIQ handlers have registers r8 - r12 banked, so
12607 we only need to check r0 - r7, Normal ISRs only
12608 bank r14 and r15, so we must check up to r12.
12609 r13 is the stack pointer which is always preserved,
12610 so we do not need to consider it here. */
12615 for (reg = 0; reg <= max_reg; reg++)
12616 if (df_regs_ever_live_p (reg)
12617 || (! current_function_is_leaf && call_used_regs[reg]))
12618 save_reg_mask |= (1 << reg);
12620 /* Also save the pic base register if necessary. */
12622 && !TARGET_SINGLE_PIC_BASE
12623 && arm_pic_register != INVALID_REGNUM
12624 && crtl->uses_pic_offset_table)
12625 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12629 /* In the normal case we only need to save those registers
12630 which are call saved and which are used by this function. */
12631 for (reg = 0; reg <= 11; reg++)
12632 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12633 save_reg_mask |= (1 << reg);
12635 /* Handle the frame pointer as a special case. */
12636 if (frame_pointer_needed)
12637 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12639 /* If we aren't loading the PIC register,
12640 don't stack it even though it may be live. */
12642 && !TARGET_SINGLE_PIC_BASE
12643 && arm_pic_register != INVALID_REGNUM
12644 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12645 || crtl->uses_pic_offset_table))
12646 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12648 /* The prologue will copy SP into R0, so save it. */
12649 if (IS_STACKALIGN (func_type))
12650 save_reg_mask |= 1;
12653 /* Save registers so the exception handler can modify them. */
12654 if (crtl->calls_eh_return)
12660 reg = EH_RETURN_DATA_REGNO (i);
12661 if (reg == INVALID_REGNUM)
12663 save_reg_mask |= 1 << reg;
12667 return save_reg_mask;
12671 /* Compute the number of bytes used to store the static chain register on the
12672 stack, above the stack frame. We need to know this accurately to get the
12673 alignment of the rest of the stack frame correct. */
12675 static int arm_compute_static_chain_stack_bytes (void)
12677 unsigned long func_type = arm_current_func_type ();
12678 int static_chain_stack_bytes = 0;
12680 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12681 IS_NESTED (func_type) &&
12682 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12683 static_chain_stack_bytes = 4;
12685 return static_chain_stack_bytes;
12689 /* Compute a bit mask of which registers need to be
12690 saved on the stack for the current function.
12691 This is used by arm_get_frame_offsets, which may add extra registers. */
12693 static unsigned long
12694 arm_compute_save_reg_mask (void)
12696 unsigned int save_reg_mask = 0;
12697 unsigned long func_type = arm_current_func_type ();
12700 if (IS_NAKED (func_type))
12701 /* This should never really happen. */
12704 /* If we are creating a stack frame, then we must save the frame pointer,
12705 IP (which will hold the old stack pointer), LR and the PC. */
12706 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12708 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12711 | (1 << PC_REGNUM);
12713 /* Volatile functions do not return, so there
12714 is no need to save any other registers. */
12715 if (IS_VOLATILE (func_type))
12716 return save_reg_mask;
12718 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12720 /* Decide if we need to save the link register.
12721 Interrupt routines have their own banked link register,
12722 so they never need to save it.
12723 Otherwise if we do not use the link register we do not need to save
12724 it. If we are pushing other registers onto the stack however, we
12725 can save an instruction in the epilogue by pushing the link register
12726 now and then popping it back into the PC. This incurs extra memory
12727 accesses though, so we only do it when optimizing for size, and only
12728 if we know that we will not need a fancy return sequence. */
12729 if (df_regs_ever_live_p (LR_REGNUM)
12732 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12733 && !crtl->calls_eh_return))
12734 save_reg_mask |= 1 << LR_REGNUM;
12736 if (cfun->machine->lr_save_eliminated)
12737 save_reg_mask &= ~ (1 << LR_REGNUM);
12739 if (TARGET_REALLY_IWMMXT
12740 && ((bit_count (save_reg_mask)
12741 + ARM_NUM_INTS (crtl->args.pretend_args_size +
12742 arm_compute_static_chain_stack_bytes())
12745 /* The total number of registers that are going to be pushed
12746 onto the stack is odd. We need to ensure that the stack
12747 is 64-bit aligned before we start to save iWMMXt registers,
12748 and also before we start to create locals. (A local variable
12749 might be a double or long long which we will load/store using
12750 an iWMMXt instruction). Therefore we need to push another
12751 ARM register, so that the stack will be 64-bit aligned. We
12752 try to avoid using the arg registers (r0 -r3) as they might be
12753 used to pass values in a tail call. */
12754 for (reg = 4; reg <= 12; reg++)
12755 if ((save_reg_mask & (1 << reg)) == 0)
12759 save_reg_mask |= (1 << reg);
12762 cfun->machine->sibcall_blocked = 1;
12763 save_reg_mask |= (1 << 3);
12767 /* We may need to push an additional register for use initializing the
12768 PIC base register. */
12769 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
12770 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
12772 reg = thumb_find_work_register (1 << 4);
12773 if (!call_used_regs[reg])
12774 save_reg_mask |= (1 << reg);
12777 return save_reg_mask;
12781 /* Compute a bit mask of which registers need to be
12782 saved on the stack for the current function. */
12783 static unsigned long
12784 thumb1_compute_save_reg_mask (void)
12786 unsigned long mask;
12790 for (reg = 0; reg < 12; reg ++)
12791 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12795 && !TARGET_SINGLE_PIC_BASE
12796 && arm_pic_register != INVALID_REGNUM
12797 && crtl->uses_pic_offset_table)
12798 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12800 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
12801 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
12802 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12804 /* LR will also be pushed if any lo regs are pushed. */
12805 if (mask & 0xff || thumb_force_lr_save ())
12806 mask |= (1 << LR_REGNUM);
12808 /* Make sure we have a low work register if we need one.
12809 We will need one if we are going to push a high register,
12810 but we are not currently intending to push a low register. */
12811 if ((mask & 0xff) == 0
12812 && ((mask & 0x0f00) || TARGET_BACKTRACE))
12814 /* Use thumb_find_work_register to choose which register
12815 we will use. If the register is live then we will
12816 have to push it. Use LAST_LO_REGNUM as our fallback
12817 choice for the register to select. */
12818 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
12819 /* Make sure the register returned by thumb_find_work_register is
12820 not part of the return value. */
12821 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
12822 reg = LAST_LO_REGNUM;
12824 if (! call_used_regs[reg])
12828 /* The 504 below is 8 bytes less than 512 because there are two possible
12829 alignment words. We can't tell here if they will be present or not so we
12830 have to play it safe and assume that they are. */
12831 if ((CALLER_INTERWORKING_SLOT_SIZE +
12832 ROUND_UP_WORD (get_frame_size ()) +
12833 crtl->outgoing_args_size) >= 504)
12835 /* This is the same as the code in thumb1_expand_prologue() which
12836 determines which register to use for stack decrement. */
12837 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
12838 if (mask & (1 << reg))
12841 if (reg > LAST_LO_REGNUM)
12843 /* Make sure we have a register available for stack decrement. */
12844 mask |= 1 << LAST_LO_REGNUM;
12852 /* Return the number of bytes required to save VFP registers. */
12854 arm_get_vfp_saved_size (void)
12856 unsigned int regno;
12861 /* Space for saved VFP registers. */
12862 if (TARGET_HARD_FLOAT && TARGET_VFP)
12865 for (regno = FIRST_VFP_REGNUM;
12866 regno < LAST_VFP_REGNUM;
12869 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
12870 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
12874 /* Workaround ARM10 VFPr1 bug. */
12875 if (count == 2 && !arm_arch6)
12877 saved += count * 8;
12886 if (count == 2 && !arm_arch6)
12888 saved += count * 8;
12895 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
12896 everything bar the final return instruction. */
12898 output_return_instruction (rtx operand, int really_return, int reverse)
12900 char conditional[10];
12903 unsigned long live_regs_mask;
12904 unsigned long func_type;
12905 arm_stack_offsets *offsets;
12907 func_type = arm_current_func_type ();
12909 if (IS_NAKED (func_type))
12912 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12914 /* If this function was declared non-returning, and we have
12915 found a tail call, then we have to trust that the called
12916 function won't return. */
12921 /* Otherwise, trap an attempted return by aborting. */
12923 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
12925 assemble_external_libcall (ops[1]);
12926 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
12932 gcc_assert (!cfun->calls_alloca || really_return);
12934 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
12936 cfun->machine->return_used_this_function = 1;
12938 offsets = arm_get_frame_offsets ();
12939 live_regs_mask = offsets->saved_regs_mask;
12941 if (live_regs_mask)
12943 const char * return_reg;
12945 /* If we do not have any special requirements for function exit
12946 (e.g. interworking) then we can load the return address
12947 directly into the PC. Otherwise we must load it into LR. */
12949 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
12950 return_reg = reg_names[PC_REGNUM];
12952 return_reg = reg_names[LR_REGNUM];
12954 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
12956 /* There are three possible reasons for the IP register
12957 being saved. 1) a stack frame was created, in which case
12958 IP contains the old stack pointer, or 2) an ISR routine
12959 corrupted it, or 3) it was saved to align the stack on
12960 iWMMXt. In case 1, restore IP into SP, otherwise just
12962 if (frame_pointer_needed)
12964 live_regs_mask &= ~ (1 << IP_REGNUM);
12965 live_regs_mask |= (1 << SP_REGNUM);
12968 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
12971 /* On some ARM architectures it is faster to use LDR rather than
12972 LDM to load a single register. On other architectures, the
12973 cost is the same. In 26 bit mode, or for exception handlers,
12974 we have to use LDM to load the PC so that the CPSR is also
12976 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12977 if (live_regs_mask == (1U << reg))
12980 if (reg <= LAST_ARM_REGNUM
12981 && (reg != LR_REGNUM
12983 || ! IS_INTERRUPT (func_type)))
12985 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
12986 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
12993 /* Generate the load multiple instruction to restore the
12994 registers. Note we can get here, even if
12995 frame_pointer_needed is true, but only if sp already
12996 points to the base of the saved core registers. */
12997 if (live_regs_mask & (1 << SP_REGNUM))
12999 unsigned HOST_WIDE_INT stack_adjust;
13001 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13002 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13004 if (stack_adjust && arm_arch5 && TARGET_ARM)
13005 if (TARGET_UNIFIED_ASM)
13006 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13008 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13011 /* If we can't use ldmib (SA110 bug),
13012 then try to pop r3 instead. */
13014 live_regs_mask |= 1 << 3;
13016 if (TARGET_UNIFIED_ASM)
13017 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13019 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13023 if (TARGET_UNIFIED_ASM)
13024 sprintf (instr, "pop%s\t{", conditional);
13026 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13028 p = instr + strlen (instr);
13030 for (reg = 0; reg <= SP_REGNUM; reg++)
13031 if (live_regs_mask & (1 << reg))
13033 int l = strlen (reg_names[reg]);
13039 memcpy (p, ", ", 2);
13043 memcpy (p, "%|", 2);
13044 memcpy (p + 2, reg_names[reg], l);
13048 if (live_regs_mask & (1 << LR_REGNUM))
13050 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13051 /* If returning from an interrupt, restore the CPSR. */
13052 if (IS_INTERRUPT (func_type))
13059 output_asm_insn (instr, & operand);
13061 /* See if we need to generate an extra instruction to
13062 perform the actual function return. */
13064 && func_type != ARM_FT_INTERWORKED
13065 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13067 /* The return has already been handled
13068 by loading the LR into the PC. */
13075 switch ((int) ARM_FUNC_TYPE (func_type))
13079 /* ??? This is wrong for unified assembly syntax. */
13080 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13083 case ARM_FT_INTERWORKED:
13084 sprintf (instr, "bx%s\t%%|lr", conditional);
13087 case ARM_FT_EXCEPTION:
13088 /* ??? This is wrong for unified assembly syntax. */
13089 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13093 /* Use bx if it's available. */
13094 if (arm_arch5 || arm_arch4t)
13095 sprintf (instr, "bx%s\t%%|lr", conditional);
13097 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13101 output_asm_insn (instr, & operand);
13107 /* Write the function name into the code section, directly preceding
13108 the function prologue.
13110 Code will be output similar to this:
13112 .ascii "arm_poke_function_name", 0
13115 .word 0xff000000 + (t1 - t0)
13116 arm_poke_function_name
13118 stmfd sp!, {fp, ip, lr, pc}
13121 When performing a stack backtrace, code can inspect the value
13122 of 'pc' stored at 'fp' + 0. If the trace function then looks
13123 at location pc - 12 and the top 8 bits are set, then we know
13124 that there is a function name embedded immediately preceding this
13125 location and has length ((pc[-3]) & 0xff000000).
13127 We assume that pc is declared as a pointer to an unsigned long.
13129 It is of no benefit to output the function name if we are assembling
13130 a leaf function. These function types will not contain a stack
13131 backtrace structure, therefore it is not possible to determine the
13134 arm_poke_function_name (FILE *stream, const char *name)
13136 unsigned long alignlength;
13137 unsigned long length;
13140 length = strlen (name) + 1;
13141 alignlength = ROUND_UP_WORD (length);
13143 ASM_OUTPUT_ASCII (stream, name, length);
13144 ASM_OUTPUT_ALIGN (stream, 2);
13145 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13146 assemble_aligned_integer (UNITS_PER_WORD, x);
13149 /* Place some comments into the assembler stream
13150 describing the current function. */
13152 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13154 unsigned long func_type;
13158 thumb1_output_function_prologue (f, frame_size);
13162 /* Sanity check. */
13163 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13165 func_type = arm_current_func_type ();
13167 switch ((int) ARM_FUNC_TYPE (func_type))
13170 case ARM_FT_NORMAL:
13172 case ARM_FT_INTERWORKED:
13173 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13176 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13179 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13181 case ARM_FT_EXCEPTION:
13182 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13186 if (IS_NAKED (func_type))
13187 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13189 if (IS_VOLATILE (func_type))
13190 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13192 if (IS_NESTED (func_type))
13193 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13194 if (IS_STACKALIGN (func_type))
13195 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13197 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13199 crtl->args.pretend_args_size, frame_size);
13201 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13202 frame_pointer_needed,
13203 cfun->machine->uses_anonymous_args);
13205 if (cfun->machine->lr_save_eliminated)
13206 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13208 if (crtl->calls_eh_return)
13209 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13214 arm_output_epilogue (rtx sibling)
13217 unsigned long saved_regs_mask;
13218 unsigned long func_type;
13219 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13220 frame that is $fp + 4 for a non-variadic function. */
13221 int floats_offset = 0;
13223 FILE * f = asm_out_file;
13224 unsigned int lrm_count = 0;
13225 int really_return = (sibling == NULL);
13227 arm_stack_offsets *offsets;
13229 /* If we have already generated the return instruction
13230 then it is futile to generate anything else. */
13231 if (use_return_insn (FALSE, sibling) &&
13232 (cfun->machine->return_used_this_function != 0))
13235 func_type = arm_current_func_type ();
13237 if (IS_NAKED (func_type))
13238 /* Naked functions don't have epilogues. */
13241 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13245 /* A volatile function should never return. Call abort. */
13246 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13247 assemble_external_libcall (op);
13248 output_asm_insn ("bl\t%a0", &op);
13253 /* If we are throwing an exception, then we really must be doing a
13254 return, so we can't tail-call. */
13255 gcc_assert (!crtl->calls_eh_return || really_return);
13257 offsets = arm_get_frame_offsets ();
13258 saved_regs_mask = offsets->saved_regs_mask;
13261 lrm_count = bit_count (saved_regs_mask);
13263 floats_offset = offsets->saved_args;
13264 /* Compute how far away the floats will be. */
13265 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13266 if (saved_regs_mask & (1 << reg))
13267 floats_offset += 4;
13269 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13271 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13272 int vfp_offset = offsets->frame;
13274 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13276 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13277 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13279 floats_offset += 12;
13280 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13281 reg, FP_REGNUM, floats_offset - vfp_offset);
13286 start_reg = LAST_FPA_REGNUM;
13288 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13290 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13292 floats_offset += 12;
13294 /* We can't unstack more than four registers at once. */
13295 if (start_reg - reg == 3)
13297 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13298 reg, FP_REGNUM, floats_offset - vfp_offset);
13299 start_reg = reg - 1;
13304 if (reg != start_reg)
13305 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13306 reg + 1, start_reg - reg,
13307 FP_REGNUM, floats_offset - vfp_offset);
13308 start_reg = reg - 1;
13312 /* Just in case the last register checked also needs unstacking. */
13313 if (reg != start_reg)
13314 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13315 reg + 1, start_reg - reg,
13316 FP_REGNUM, floats_offset - vfp_offset);
13319 if (TARGET_HARD_FLOAT && TARGET_VFP)
13323 /* The fldmd insns do not have base+offset addressing
13324 modes, so we use IP to hold the address. */
13325 saved_size = arm_get_vfp_saved_size ();
13327 if (saved_size > 0)
13329 floats_offset += saved_size;
13330 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13331 FP_REGNUM, floats_offset - vfp_offset);
13333 start_reg = FIRST_VFP_REGNUM;
13334 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13336 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13337 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13339 if (start_reg != reg)
13340 vfp_output_fldmd (f, IP_REGNUM,
13341 (start_reg - FIRST_VFP_REGNUM) / 2,
13342 (reg - start_reg) / 2);
13343 start_reg = reg + 2;
13346 if (start_reg != reg)
13347 vfp_output_fldmd (f, IP_REGNUM,
13348 (start_reg - FIRST_VFP_REGNUM) / 2,
13349 (reg - start_reg) / 2);
13354 /* The frame pointer is guaranteed to be non-double-word aligned.
13355 This is because it is set to (old_stack_pointer - 4) and the
13356 old_stack_pointer was double word aligned. Thus the offset to
13357 the iWMMXt registers to be loaded must also be non-double-word
13358 sized, so that the resultant address *is* double-word aligned.
13359 We can ignore floats_offset since that was already included in
13360 the live_regs_mask. */
13361 lrm_count += (lrm_count % 2 ? 2 : 1);
13363 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13364 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13366 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13367 reg, FP_REGNUM, lrm_count * 4);
13372 /* saved_regs_mask should contain the IP, which at the time of stack
13373 frame generation actually contains the old stack pointer. So a
13374 quick way to unwind the stack is just pop the IP register directly
13375 into the stack pointer. */
13376 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13377 saved_regs_mask &= ~ (1 << IP_REGNUM);
13378 saved_regs_mask |= (1 << SP_REGNUM);
13380 /* There are two registers left in saved_regs_mask - LR and PC. We
13381 only need to restore the LR register (the return address), but to
13382 save time we can load it directly into the PC, unless we need a
13383 special function exit sequence, or we are not really returning. */
13385 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13386 && !crtl->calls_eh_return)
13387 /* Delete the LR from the register mask, so that the LR on
13388 the stack is loaded into the PC in the register mask. */
13389 saved_regs_mask &= ~ (1 << LR_REGNUM);
13391 saved_regs_mask &= ~ (1 << PC_REGNUM);
13393 /* We must use SP as the base register, because SP is one of the
13394 registers being restored. If an interrupt or page fault
13395 happens in the ldm instruction, the SP might or might not
13396 have been restored. That would be bad, as then SP will no
13397 longer indicate the safe area of stack, and we can get stack
13398 corruption. Using SP as the base register means that it will
13399 be reset correctly to the original value, should an interrupt
13400 occur. If the stack pointer already points at the right
13401 place, then omit the subtraction. */
13402 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13403 || cfun->calls_alloca)
13404 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13405 4 * bit_count (saved_regs_mask));
13406 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13408 if (IS_INTERRUPT (func_type))
13409 /* Interrupt handlers will have pushed the
13410 IP onto the stack, so restore it now. */
13411 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13415 /* This branch is executed for ARM mode (non-apcs frames) and
13416 Thumb-2 mode. Frame layout is essentially the same for those
13417 cases, except that in ARM mode frame pointer points to the
13418 first saved register, while in Thumb-2 mode the frame pointer points
13419 to the last saved register.
13421 It is possible to make frame pointer point to last saved
13422 register in both cases, and remove some conditionals below.
13423 That means that fp setup in prologue would be just "mov fp, sp"
13424 and sp restore in epilogue would be just "mov sp, fp", whereas
13425 now we have to use add/sub in those cases. However, the value
13426 of that would be marginal, as both mov and add/sub are 32-bit
13427 in ARM mode, and it would require extra conditionals
13428 in arm_expand_prologue to distingish ARM-apcs-frame case
13429 (where frame pointer is required to point at first register)
13430 and ARM-non-apcs-frame. Therefore, such change is postponed
13431 until real need arise. */
13432 unsigned HOST_WIDE_INT amount;
13434 /* Restore stack pointer if necessary. */
13435 if (TARGET_ARM && frame_pointer_needed)
13437 operands[0] = stack_pointer_rtx;
13438 operands[1] = hard_frame_pointer_rtx;
13440 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13441 output_add_immediate (operands);
13445 if (frame_pointer_needed)
13447 /* For Thumb-2 restore sp from the frame pointer.
13448 Operand restrictions mean we have to incrememnt FP, then copy
13450 amount = offsets->locals_base - offsets->saved_regs;
13451 operands[0] = hard_frame_pointer_rtx;
13455 unsigned long count;
13456 operands[0] = stack_pointer_rtx;
13457 amount = offsets->outgoing_args - offsets->saved_regs;
13458 /* pop call clobbered registers if it avoids a
13459 separate stack adjustment. */
13460 count = offsets->saved_regs - offsets->saved_args;
13463 && !crtl->calls_eh_return
13464 && bit_count(saved_regs_mask) * 4 == count
13465 && !IS_INTERRUPT (func_type)
13466 && !crtl->tail_call_emit)
13468 unsigned long mask;
13469 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13471 mask &= ~saved_regs_mask;
13473 while (bit_count (mask) * 4 > amount)
13475 while ((mask & (1 << reg)) == 0)
13477 mask &= ~(1 << reg);
13479 if (bit_count (mask) * 4 == amount) {
13481 saved_regs_mask |= mask;
13488 operands[1] = operands[0];
13489 operands[2] = GEN_INT (amount);
13490 output_add_immediate (operands);
13492 if (frame_pointer_needed)
13493 asm_fprintf (f, "\tmov\t%r, %r\n",
13494 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13497 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13499 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13500 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13501 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13506 start_reg = FIRST_FPA_REGNUM;
13508 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13510 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13512 if (reg - start_reg == 3)
13514 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13515 start_reg, SP_REGNUM);
13516 start_reg = reg + 1;
13521 if (reg != start_reg)
13522 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13523 start_reg, reg - start_reg,
13526 start_reg = reg + 1;
13530 /* Just in case the last register checked also needs unstacking. */
13531 if (reg != start_reg)
13532 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13533 start_reg, reg - start_reg, SP_REGNUM);
13536 if (TARGET_HARD_FLOAT && TARGET_VFP)
13538 start_reg = FIRST_VFP_REGNUM;
13539 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13541 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13542 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13544 if (start_reg != reg)
13545 vfp_output_fldmd (f, SP_REGNUM,
13546 (start_reg - FIRST_VFP_REGNUM) / 2,
13547 (reg - start_reg) / 2);
13548 start_reg = reg + 2;
13551 if (start_reg != reg)
13552 vfp_output_fldmd (f, SP_REGNUM,
13553 (start_reg - FIRST_VFP_REGNUM) / 2,
13554 (reg - start_reg) / 2);
13557 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13558 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13559 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13561 /* If we can, restore the LR into the PC. */
13562 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13563 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13564 && !IS_STACKALIGN (func_type)
13566 && crtl->args.pretend_args_size == 0
13567 && saved_regs_mask & (1 << LR_REGNUM)
13568 && !crtl->calls_eh_return)
13570 saved_regs_mask &= ~ (1 << LR_REGNUM);
13571 saved_regs_mask |= (1 << PC_REGNUM);
13572 rfe = IS_INTERRUPT (func_type);
13577 /* Load the registers off the stack. If we only have one register
13578 to load use the LDR instruction - it is faster. For Thumb-2
13579 always use pop and the assembler will pick the best instruction.*/
13580 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13581 && !IS_INTERRUPT(func_type))
13583 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13585 else if (saved_regs_mask)
13587 if (saved_regs_mask & (1 << SP_REGNUM))
13588 /* Note - write back to the stack register is not enabled
13589 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13590 in the list of registers and if we add writeback the
13591 instruction becomes UNPREDICTABLE. */
13592 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13594 else if (TARGET_ARM)
13595 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13598 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13601 if (crtl->args.pretend_args_size)
13603 /* Unwind the pre-pushed regs. */
13604 operands[0] = operands[1] = stack_pointer_rtx;
13605 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13606 output_add_immediate (operands);
13610 /* We may have already restored PC directly from the stack. */
13611 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13614 /* Stack adjustment for exception handler. */
13615 if (crtl->calls_eh_return)
13616 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13617 ARM_EH_STACKADJ_REGNUM);
13619 /* Generate the return instruction. */
13620 switch ((int) ARM_FUNC_TYPE (func_type))
13624 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13627 case ARM_FT_EXCEPTION:
13628 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13631 case ARM_FT_INTERWORKED:
13632 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13636 if (IS_STACKALIGN (func_type))
13638 /* See comment in arm_expand_prologue. */
13639 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13641 if (arm_arch5 || arm_arch4t)
13642 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13644 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13652 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13653 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13655 arm_stack_offsets *offsets;
13661 /* Emit any call-via-reg trampolines that are needed for v4t support
13662 of call_reg and call_value_reg type insns. */
13663 for (regno = 0; regno < LR_REGNUM; regno++)
13665 rtx label = cfun->machine->call_via[regno];
13669 switch_to_section (function_section (current_function_decl));
13670 targetm.asm_out.internal_label (asm_out_file, "L",
13671 CODE_LABEL_NUMBER (label));
13672 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13676 /* ??? Probably not safe to set this here, since it assumes that a
13677 function will be emitted as assembly immediately after we generate
13678 RTL for it. This does not happen for inline functions. */
13679 cfun->machine->return_used_this_function = 0;
13681 else /* TARGET_32BIT */
13683 /* We need to take into account any stack-frame rounding. */
13684 offsets = arm_get_frame_offsets ();
13686 gcc_assert (!use_return_insn (FALSE, NULL)
13687 || (cfun->machine->return_used_this_function != 0)
13688 || offsets->saved_regs == offsets->outgoing_args
13689 || frame_pointer_needed);
13691 /* Reset the ARM-specific per-function variables. */
13692 after_arm_reorg = 0;
13696 /* Generate and emit an insn that we will recognize as a push_multi.
13697 Unfortunately, since this insn does not reflect very well the actual
13698 semantics of the operation, we need to annotate the insn for the benefit
13699 of DWARF2 frame unwind information. */
13701 emit_multi_reg_push (unsigned long mask)
13704 int num_dwarf_regs;
13708 int dwarf_par_index;
13711 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13712 if (mask & (1 << i))
13715 gcc_assert (num_regs && num_regs <= 16);
13717 /* We don't record the PC in the dwarf frame information. */
13718 num_dwarf_regs = num_regs;
13719 if (mask & (1 << PC_REGNUM))
13722 /* For the body of the insn we are going to generate an UNSPEC in
13723 parallel with several USEs. This allows the insn to be recognized
13724 by the push_multi pattern in the arm.md file. The insn looks
13725 something like this:
13728 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
13729 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13730 (use (reg:SI 11 fp))
13731 (use (reg:SI 12 ip))
13732 (use (reg:SI 14 lr))
13733 (use (reg:SI 15 pc))
13736 For the frame note however, we try to be more explicit and actually
13737 show each register being stored into the stack frame, plus a (single)
13738 decrement of the stack pointer. We do it this way in order to be
13739 friendly to the stack unwinding code, which only wants to see a single
13740 stack decrement per instruction. The RTL we generate for the note looks
13741 something like this:
13744 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13745 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13746 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
13747 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
13748 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
13751 This sequence is used both by the code to support stack unwinding for
13752 exceptions handlers and the code to generate dwarf2 frame debugging. */
13754 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13755 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13756 dwarf_par_index = 1;
13758 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13760 if (mask & (1 << i))
13762 reg = gen_rtx_REG (SImode, i);
13764 XVECEXP (par, 0, 0)
13765 = gen_rtx_SET (VOIDmode,
13766 gen_frame_mem (BLKmode,
13767 gen_rtx_PRE_DEC (BLKmode,
13768 stack_pointer_rtx)),
13769 gen_rtx_UNSPEC (BLKmode,
13770 gen_rtvec (1, reg),
13771 UNSPEC_PUSH_MULT));
13773 if (i != PC_REGNUM)
13775 tmp = gen_rtx_SET (VOIDmode,
13776 gen_frame_mem (SImode, stack_pointer_rtx),
13778 RTX_FRAME_RELATED_P (tmp) = 1;
13779 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
13787 for (j = 1, i++; j < num_regs; i++)
13789 if (mask & (1 << i))
13791 reg = gen_rtx_REG (SImode, i);
13793 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
13795 if (i != PC_REGNUM)
13798 = gen_rtx_SET (VOIDmode,
13799 gen_frame_mem (SImode,
13800 plus_constant (stack_pointer_rtx,
13803 RTX_FRAME_RELATED_P (tmp) = 1;
13804 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
13811 par = emit_insn (par);
13813 tmp = gen_rtx_SET (VOIDmode,
13815 plus_constant (stack_pointer_rtx, -4 * num_regs));
13816 RTX_FRAME_RELATED_P (tmp) = 1;
13817 XVECEXP (dwarf, 0, 0) = tmp;
13819 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13824 /* Calculate the size of the return value that is passed in registers. */
13826 arm_size_return_regs (void)
13828 enum machine_mode mode;
13830 if (crtl->return_rtx != 0)
13831 mode = GET_MODE (crtl->return_rtx);
13833 mode = DECL_MODE (DECL_RESULT (current_function_decl));
13835 return GET_MODE_SIZE (mode);
13839 emit_sfm (int base_reg, int count)
13846 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13847 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13849 reg = gen_rtx_REG (XFmode, base_reg++);
13851 XVECEXP (par, 0, 0)
13852 = gen_rtx_SET (VOIDmode,
13853 gen_frame_mem (BLKmode,
13854 gen_rtx_PRE_DEC (BLKmode,
13855 stack_pointer_rtx)),
13856 gen_rtx_UNSPEC (BLKmode,
13857 gen_rtvec (1, reg),
13858 UNSPEC_PUSH_MULT));
13859 tmp = gen_rtx_SET (VOIDmode,
13860 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
13861 RTX_FRAME_RELATED_P (tmp) = 1;
13862 XVECEXP (dwarf, 0, 1) = tmp;
13864 for (i = 1; i < count; i++)
13866 reg = gen_rtx_REG (XFmode, base_reg++);
13867 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13869 tmp = gen_rtx_SET (VOIDmode,
13870 gen_frame_mem (XFmode,
13871 plus_constant (stack_pointer_rtx,
13874 RTX_FRAME_RELATED_P (tmp) = 1;
13875 XVECEXP (dwarf, 0, i + 1) = tmp;
13878 tmp = gen_rtx_SET (VOIDmode,
13880 plus_constant (stack_pointer_rtx, -12 * count));
13882 RTX_FRAME_RELATED_P (tmp) = 1;
13883 XVECEXP (dwarf, 0, 0) = tmp;
13885 par = emit_insn (par);
13886 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13892 /* Return true if the current function needs to save/restore LR. */
13895 thumb_force_lr_save (void)
13897 return !cfun->machine->lr_save_eliminated
13898 && (!leaf_function_p ()
13899 || thumb_far_jump_used_p ()
13900 || df_regs_ever_live_p (LR_REGNUM));
13904 /* Compute the distance from register FROM to register TO.
13905 These can be the arg pointer (26), the soft frame pointer (25),
13906 the stack pointer (13) or the hard frame pointer (11).
13907 In thumb mode r7 is used as the soft frame pointer, if needed.
13908 Typical stack layout looks like this:
13910 old stack pointer -> | |
13913 | | saved arguments for
13914 | | vararg functions
13917 hard FP & arg pointer -> | | \
13925 soft frame pointer -> | | /
13930 locals base pointer -> | | /
13935 current stack pointer -> | | /
13938 For a given function some or all of these stack components
13939 may not be needed, giving rise to the possibility of
13940 eliminating some of the registers.
13942 The values returned by this function must reflect the behavior
13943 of arm_expand_prologue() and arm_compute_save_reg_mask().
13945 The sign of the number returned reflects the direction of stack
13946 growth, so the values are positive for all eliminations except
13947 from the soft frame pointer to the hard frame pointer.
13949 SFP may point just inside the local variables block to ensure correct
13953 /* Calculate stack offsets. These are used to calculate register elimination
13954 offsets and in prologue/epilogue code. Also calculates which registers
13955 should be saved. */
13957 static arm_stack_offsets *
13958 arm_get_frame_offsets (void)
13960 struct arm_stack_offsets *offsets;
13961 unsigned long func_type;
13965 HOST_WIDE_INT frame_size;
13968 offsets = &cfun->machine->stack_offsets;
13970 /* We need to know if we are a leaf function. Unfortunately, it
13971 is possible to be called after start_sequence has been called,
13972 which causes get_insns to return the insns for the sequence,
13973 not the function, which will cause leaf_function_p to return
13974 the incorrect result.
13976 to know about leaf functions once reload has completed, and the
13977 frame size cannot be changed after that time, so we can safely
13978 use the cached value. */
13980 if (reload_completed)
13983 /* Initially this is the size of the local variables. It will translated
13984 into an offset once we have determined the size of preceding data. */
13985 frame_size = ROUND_UP_WORD (get_frame_size ());
13987 leaf = leaf_function_p ();
13989 /* Space for variadic functions. */
13990 offsets->saved_args = crtl->args.pretend_args_size;
13992 /* In Thumb mode this is incorrect, but never used. */
13993 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
13994 arm_compute_static_chain_stack_bytes();
13998 unsigned int regno;
14000 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14001 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14002 saved = core_saved;
14004 /* We know that SP will be doubleword aligned on entry, and we must
14005 preserve that condition at any subroutine call. We also require the
14006 soft frame pointer to be doubleword aligned. */
14008 if (TARGET_REALLY_IWMMXT)
14010 /* Check for the call-saved iWMMXt registers. */
14011 for (regno = FIRST_IWMMXT_REGNUM;
14012 regno <= LAST_IWMMXT_REGNUM;
14014 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14018 func_type = arm_current_func_type ();
14019 if (! IS_VOLATILE (func_type))
14021 /* Space for saved FPA registers. */
14022 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14023 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14026 /* Space for saved VFP registers. */
14027 if (TARGET_HARD_FLOAT && TARGET_VFP)
14028 saved += arm_get_vfp_saved_size ();
14031 else /* TARGET_THUMB1 */
14033 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14034 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14035 saved = core_saved;
14036 if (TARGET_BACKTRACE)
14040 /* Saved registers include the stack frame. */
14041 offsets->saved_regs = offsets->saved_args + saved +
14042 arm_compute_static_chain_stack_bytes();
14043 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14044 /* A leaf function does not need any stack alignment if it has nothing
14046 if (leaf && frame_size == 0)
14048 offsets->outgoing_args = offsets->soft_frame;
14049 offsets->locals_base = offsets->soft_frame;
14053 /* Ensure SFP has the correct alignment. */
14054 if (ARM_DOUBLEWORD_ALIGN
14055 && (offsets->soft_frame & 7))
14057 offsets->soft_frame += 4;
14058 /* Try to align stack by pushing an extra reg. Don't bother doing this
14059 when there is a stack frame as the alignment will be rolled into
14060 the normal stack adjustment. */
14061 if (frame_size + crtl->outgoing_args_size == 0)
14065 /* If it is safe to use r3, then do so. This sometimes
14066 generates better code on Thumb-2 by avoiding the need to
14067 use 32-bit push/pop instructions. */
14068 if (!crtl->tail_call_emit
14069 && arm_size_return_regs () <= 12)
14074 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14076 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14085 offsets->saved_regs += 4;
14086 offsets->saved_regs_mask |= (1 << reg);
14091 offsets->locals_base = offsets->soft_frame + frame_size;
14092 offsets->outgoing_args = (offsets->locals_base
14093 + crtl->outgoing_args_size);
14095 if (ARM_DOUBLEWORD_ALIGN)
14097 /* Ensure SP remains doubleword aligned. */
14098 if (offsets->outgoing_args & 7)
14099 offsets->outgoing_args += 4;
14100 gcc_assert (!(offsets->outgoing_args & 7));
14107 /* Calculate the relative offsets for the different stack pointers. Positive
14108 offsets are in the direction of stack growth. */
14111 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14113 arm_stack_offsets *offsets;
14115 offsets = arm_get_frame_offsets ();
14117 /* OK, now we have enough information to compute the distances.
14118 There must be an entry in these switch tables for each pair
14119 of registers in ELIMINABLE_REGS, even if some of the entries
14120 seem to be redundant or useless. */
14123 case ARG_POINTER_REGNUM:
14126 case THUMB_HARD_FRAME_POINTER_REGNUM:
14129 case FRAME_POINTER_REGNUM:
14130 /* This is the reverse of the soft frame pointer
14131 to hard frame pointer elimination below. */
14132 return offsets->soft_frame - offsets->saved_args;
14134 case ARM_HARD_FRAME_POINTER_REGNUM:
14135 /* This is only non-zero in the case where the static chain register
14136 is stored above the frame. */
14137 return offsets->frame - offsets->saved_args - 4;
14139 case STACK_POINTER_REGNUM:
14140 /* If nothing has been pushed on the stack at all
14141 then this will return -4. This *is* correct! */
14142 return offsets->outgoing_args - (offsets->saved_args + 4);
14145 gcc_unreachable ();
14147 gcc_unreachable ();
14149 case FRAME_POINTER_REGNUM:
14152 case THUMB_HARD_FRAME_POINTER_REGNUM:
14155 case ARM_HARD_FRAME_POINTER_REGNUM:
14156 /* The hard frame pointer points to the top entry in the
14157 stack frame. The soft frame pointer to the bottom entry
14158 in the stack frame. If there is no stack frame at all,
14159 then they are identical. */
14161 return offsets->frame - offsets->soft_frame;
14163 case STACK_POINTER_REGNUM:
14164 return offsets->outgoing_args - offsets->soft_frame;
14167 gcc_unreachable ();
14169 gcc_unreachable ();
14172 /* You cannot eliminate from the stack pointer.
14173 In theory you could eliminate from the hard frame
14174 pointer to the stack pointer, but this will never
14175 happen, since if a stack frame is not needed the
14176 hard frame pointer will never be used. */
14177 gcc_unreachable ();
14181 /* Given FROM and TO register numbers, say whether this elimination is
14182 allowed. Frame pointer elimination is automatically handled.
14184 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14185 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14186 pointer, we must eliminate FRAME_POINTER_REGNUM into
14187 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14188 ARG_POINTER_REGNUM. */
14191 arm_can_eliminate (const int from, const int to)
14193 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14194 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14195 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14196 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14200 /* Emit RTL to save coprocessor registers on function entry. Returns the
14201 number of bytes pushed. */
14204 arm_save_coproc_regs(void)
14206 int saved_size = 0;
14208 unsigned start_reg;
14211 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14212 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14214 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
14215 insn = gen_rtx_MEM (V2SImode, insn);
14216 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14217 RTX_FRAME_RELATED_P (insn) = 1;
14221 /* Save any floating point call-saved registers used by this
14223 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
14225 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14226 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14228 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
14229 insn = gen_rtx_MEM (XFmode, insn);
14230 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14231 RTX_FRAME_RELATED_P (insn) = 1;
14237 start_reg = LAST_FPA_REGNUM;
14239 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14241 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14243 if (start_reg - reg == 3)
14245 insn = emit_sfm (reg, 4);
14246 RTX_FRAME_RELATED_P (insn) = 1;
14248 start_reg = reg - 1;
14253 if (start_reg != reg)
14255 insn = emit_sfm (reg + 1, start_reg - reg);
14256 RTX_FRAME_RELATED_P (insn) = 1;
14257 saved_size += (start_reg - reg) * 12;
14259 start_reg = reg - 1;
14263 if (start_reg != reg)
14265 insn = emit_sfm (reg + 1, start_reg - reg);
14266 saved_size += (start_reg - reg) * 12;
14267 RTX_FRAME_RELATED_P (insn) = 1;
14270 if (TARGET_HARD_FLOAT && TARGET_VFP)
14272 start_reg = FIRST_VFP_REGNUM;
14274 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14276 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14277 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14279 if (start_reg != reg)
14280 saved_size += vfp_emit_fstmd (start_reg,
14281 (reg - start_reg) / 2);
14282 start_reg = reg + 2;
14285 if (start_reg != reg)
14286 saved_size += vfp_emit_fstmd (start_reg,
14287 (reg - start_reg) / 2);
14293 /* Set the Thumb frame pointer from the stack pointer. */
14296 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14298 HOST_WIDE_INT amount;
14301 amount = offsets->outgoing_args - offsets->locals_base;
14303 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14304 stack_pointer_rtx, GEN_INT (amount)));
14307 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14308 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14309 expects the first two operands to be the same. */
14312 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14314 hard_frame_pointer_rtx));
14318 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14319 hard_frame_pointer_rtx,
14320 stack_pointer_rtx));
14322 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14323 plus_constant (stack_pointer_rtx, amount));
14324 RTX_FRAME_RELATED_P (dwarf) = 1;
14325 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14328 RTX_FRAME_RELATED_P (insn) = 1;
14331 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14334 arm_expand_prologue (void)
14339 unsigned long live_regs_mask;
14340 unsigned long func_type;
14342 int saved_pretend_args = 0;
14343 int saved_regs = 0;
14344 unsigned HOST_WIDE_INT args_to_push;
14345 arm_stack_offsets *offsets;
14347 func_type = arm_current_func_type ();
14349 /* Naked functions don't have prologues. */
14350 if (IS_NAKED (func_type))
14353 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14354 args_to_push = crtl->args.pretend_args_size;
14356 /* Compute which register we will have to save onto the stack. */
14357 offsets = arm_get_frame_offsets ();
14358 live_regs_mask = offsets->saved_regs_mask;
14360 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14362 if (IS_STACKALIGN (func_type))
14367 /* Handle a word-aligned stack pointer. We generate the following:
14372 <save and restore r0 in normal prologue/epilogue>
14376 The unwinder doesn't need to know about the stack realignment.
14377 Just tell it we saved SP in r0. */
14378 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14380 r0 = gen_rtx_REG (SImode, 0);
14381 r1 = gen_rtx_REG (SImode, 1);
14382 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14383 compiler won't choke. */
14384 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14385 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14386 insn = gen_movsi (r0, stack_pointer_rtx);
14387 RTX_FRAME_RELATED_P (insn) = 1;
14388 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14390 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14391 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14394 /* For APCS frames, if IP register is clobbered
14395 when creating frame, save that register in a special
14397 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14399 if (IS_INTERRUPT (func_type))
14401 /* Interrupt functions must not corrupt any registers.
14402 Creating a frame pointer however, corrupts the IP
14403 register, so we must push it first. */
14404 insn = emit_multi_reg_push (1 << IP_REGNUM);
14406 /* Do not set RTX_FRAME_RELATED_P on this insn.
14407 The dwarf stack unwinding code only wants to see one
14408 stack decrement per function, and this is not it. If
14409 this instruction is labeled as being part of the frame
14410 creation sequence then dwarf2out_frame_debug_expr will
14411 die when it encounters the assignment of IP to FP
14412 later on, since the use of SP here establishes SP as
14413 the CFA register and not IP.
14415 Anyway this instruction is not really part of the stack
14416 frame creation although it is part of the prologue. */
14418 else if (IS_NESTED (func_type))
14420 /* The Static chain register is the same as the IP register
14421 used as a scratch register during stack frame creation.
14422 To get around this need to find somewhere to store IP
14423 whilst the frame is being created. We try the following
14426 1. The last argument register.
14427 2. A slot on the stack above the frame. (This only
14428 works if the function is not a varargs function).
14429 3. Register r3, after pushing the argument registers
14432 Note - we only need to tell the dwarf2 backend about the SP
14433 adjustment in the second variant; the static chain register
14434 doesn't need to be unwound, as it doesn't contain a value
14435 inherited from the caller. */
14437 if (df_regs_ever_live_p (3) == false)
14438 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14439 else if (args_to_push == 0)
14443 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14446 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14447 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14450 /* Just tell the dwarf backend that we adjusted SP. */
14451 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14452 plus_constant (stack_pointer_rtx,
14454 RTX_FRAME_RELATED_P (insn) = 1;
14455 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14459 /* Store the args on the stack. */
14460 if (cfun->machine->uses_anonymous_args)
14461 insn = emit_multi_reg_push
14462 ((0xf0 >> (args_to_push / 4)) & 0xf);
14465 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14466 GEN_INT (- args_to_push)));
14468 RTX_FRAME_RELATED_P (insn) = 1;
14470 saved_pretend_args = 1;
14471 fp_offset = args_to_push;
14474 /* Now reuse r3 to preserve IP. */
14475 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14479 insn = emit_set_insn (ip_rtx,
14480 plus_constant (stack_pointer_rtx, fp_offset));
14481 RTX_FRAME_RELATED_P (insn) = 1;
14486 /* Push the argument registers, or reserve space for them. */
14487 if (cfun->machine->uses_anonymous_args)
14488 insn = emit_multi_reg_push
14489 ((0xf0 >> (args_to_push / 4)) & 0xf);
14492 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14493 GEN_INT (- args_to_push)));
14494 RTX_FRAME_RELATED_P (insn) = 1;
14497 /* If this is an interrupt service routine, and the link register
14498 is going to be pushed, and we're not generating extra
14499 push of IP (needed when frame is needed and frame layout if apcs),
14500 subtracting four from LR now will mean that the function return
14501 can be done with a single instruction. */
14502 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14503 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14504 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14507 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14509 emit_set_insn (lr, plus_constant (lr, -4));
14512 if (live_regs_mask)
14514 saved_regs += bit_count (live_regs_mask) * 4;
14515 if (optimize_size && !frame_pointer_needed
14516 && saved_regs == offsets->saved_regs - offsets->saved_args)
14518 /* If no coprocessor registers are being pushed and we don't have
14519 to worry about a frame pointer then push extra registers to
14520 create the stack frame. This is done is a way that does not
14521 alter the frame layout, so is independent of the epilogue. */
14525 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14527 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14528 if (frame && n * 4 >= frame)
14531 live_regs_mask |= (1 << n) - 1;
14532 saved_regs += frame;
14535 insn = emit_multi_reg_push (live_regs_mask);
14536 RTX_FRAME_RELATED_P (insn) = 1;
14539 if (! IS_VOLATILE (func_type))
14540 saved_regs += arm_save_coproc_regs ();
14542 if (frame_pointer_needed && TARGET_ARM)
14544 /* Create the new frame pointer. */
14545 if (TARGET_APCS_FRAME)
14547 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14548 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14549 RTX_FRAME_RELATED_P (insn) = 1;
14551 if (IS_NESTED (func_type))
14553 /* Recover the static chain register. */
14554 if (!df_regs_ever_live_p (3)
14555 || saved_pretend_args)
14556 insn = gen_rtx_REG (SImode, 3);
14557 else /* if (crtl->args.pretend_args_size == 0) */
14559 insn = plus_constant (hard_frame_pointer_rtx, 4);
14560 insn = gen_frame_mem (SImode, insn);
14562 emit_set_insn (ip_rtx, insn);
14563 /* Add a USE to stop propagate_one_insn() from barfing. */
14564 emit_insn (gen_prologue_use (ip_rtx));
14569 insn = GEN_INT (saved_regs - 4);
14570 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14571 stack_pointer_rtx, insn));
14572 RTX_FRAME_RELATED_P (insn) = 1;
14576 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14578 /* This add can produce multiple insns for a large constant, so we
14579 need to get tricky. */
14580 rtx last = get_last_insn ();
14582 amount = GEN_INT (offsets->saved_args + saved_regs
14583 - offsets->outgoing_args);
14585 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14589 last = last ? NEXT_INSN (last) : get_insns ();
14590 RTX_FRAME_RELATED_P (last) = 1;
14592 while (last != insn);
14594 /* If the frame pointer is needed, emit a special barrier that
14595 will prevent the scheduler from moving stores to the frame
14596 before the stack adjustment. */
14597 if (frame_pointer_needed)
14598 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14599 hard_frame_pointer_rtx));
14603 if (frame_pointer_needed && TARGET_THUMB2)
14604 thumb_set_frame_pointer (offsets);
14606 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14608 unsigned long mask;
14610 mask = live_regs_mask;
14611 mask &= THUMB2_WORK_REGS;
14612 if (!IS_NESTED (func_type))
14613 mask |= (1 << IP_REGNUM);
14614 arm_load_pic_register (mask);
14617 /* If we are profiling, make sure no instructions are scheduled before
14618 the call to mcount. Similarly if the user has requested no
14619 scheduling in the prolog. Similarly if we want non-call exceptions
14620 using the EABI unwinder, to prevent faulting instructions from being
14621 swapped with a stack adjustment. */
14622 if (crtl->profile || !TARGET_SCHED_PROLOG
14623 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14624 emit_insn (gen_blockage ());
14626 /* If the link register is being kept alive, with the return address in it,
14627 then make sure that it does not get reused by the ce2 pass. */
14628 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14629 cfun->machine->lr_save_eliminated = 1;
14632 /* Print condition code to STREAM. Helper function for arm_print_operand. */
14634 arm_print_condition (FILE *stream)
14636 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14638 /* Branch conversion is not implemented for Thumb-2. */
14641 output_operand_lossage ("predicated Thumb instruction");
14644 if (current_insn_predicate != NULL)
14646 output_operand_lossage
14647 ("predicated instruction in conditional sequence");
14651 fputs (arm_condition_codes[arm_current_cc], stream);
14653 else if (current_insn_predicate)
14655 enum arm_cond_code code;
14659 output_operand_lossage ("predicated Thumb instruction");
14663 code = get_arm_condition_code (current_insn_predicate);
14664 fputs (arm_condition_codes[code], stream);
14669 /* If CODE is 'd', then the X is a condition operand and the instruction
14670 should only be executed if the condition is true.
14671 if CODE is 'D', then the X is a condition operand and the instruction
14672 should only be executed if the condition is false: however, if the mode
14673 of the comparison is CCFPEmode, then always execute the instruction -- we
14674 do this because in these circumstances !GE does not necessarily imply LT;
14675 in these cases the instruction pattern will take care to make sure that
14676 an instruction containing %d will follow, thereby undoing the effects of
14677 doing this instruction unconditionally.
14678 If CODE is 'N' then X is a floating point operand that must be negated
14680 If CODE is 'B' then output a bitwise inverted value of X (a const int).
14681 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
14683 arm_print_operand (FILE *stream, rtx x, int code)
14688 fputs (ASM_COMMENT_START, stream);
14692 fputs (user_label_prefix, stream);
14696 fputs (REGISTER_PREFIX, stream);
14700 arm_print_condition (stream);
14704 /* Nothing in unified syntax, otherwise the current condition code. */
14705 if (!TARGET_UNIFIED_ASM)
14706 arm_print_condition (stream);
14710 /* The current condition code in unified syntax, otherwise nothing. */
14711 if (TARGET_UNIFIED_ASM)
14712 arm_print_condition (stream);
14716 /* The current condition code for a condition code setting instruction.
14717 Preceded by 's' in unified syntax, otherwise followed by 's'. */
14718 if (TARGET_UNIFIED_ASM)
14720 fputc('s', stream);
14721 arm_print_condition (stream);
14725 arm_print_condition (stream);
14726 fputc('s', stream);
14731 /* If the instruction is conditionally executed then print
14732 the current condition code, otherwise print 's'. */
14733 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
14734 if (current_insn_predicate)
14735 arm_print_condition (stream);
14737 fputc('s', stream);
14740 /* %# is a "break" sequence. It doesn't output anything, but is used to
14741 separate e.g. operand numbers from following text, if that text consists
14742 of further digits which we don't want to be part of the operand
14750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14751 r = REAL_VALUE_NEGATE (r);
14752 fprintf (stream, "%s", fp_const_from_val (&r));
14756 /* An integer or symbol address without a preceding # sign. */
14758 switch (GET_CODE (x))
14761 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14765 output_addr_const (stream, x);
14769 gcc_unreachable ();
14774 if (GET_CODE (x) == CONST_INT)
14777 val = ARM_SIGN_EXTEND (~INTVAL (x));
14778 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
14782 putc ('~', stream);
14783 output_addr_const (stream, x);
14788 /* The low 16 bits of an immediate constant. */
14789 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
14793 fprintf (stream, "%s", arithmetic_instr (x, 1));
14796 /* Truncate Cirrus shift counts. */
14798 if (GET_CODE (x) == CONST_INT)
14800 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
14803 arm_print_operand (stream, x, 0);
14807 fprintf (stream, "%s", arithmetic_instr (x, 0));
14815 if (!shift_operator (x, SImode))
14817 output_operand_lossage ("invalid shift operand");
14821 shift = shift_op (x, &val);
14825 fprintf (stream, ", %s ", shift);
14827 arm_print_operand (stream, XEXP (x, 1), 0);
14829 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
14834 /* An explanation of the 'Q', 'R' and 'H' register operands:
14836 In a pair of registers containing a DI or DF value the 'Q'
14837 operand returns the register number of the register containing
14838 the least significant part of the value. The 'R' operand returns
14839 the register number of the register containing the most
14840 significant part of the value.
14842 The 'H' operand returns the higher of the two register numbers.
14843 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
14844 same as the 'Q' operand, since the most significant part of the
14845 value is held in the lower number register. The reverse is true
14846 on systems where WORDS_BIG_ENDIAN is false.
14848 The purpose of these operands is to distinguish between cases
14849 where the endian-ness of the values is important (for example
14850 when they are added together), and cases where the endian-ness
14851 is irrelevant, but the order of register operations is important.
14852 For example when loading a value from memory into a register
14853 pair, the endian-ness does not matter. Provided that the value
14854 from the lower memory address is put into the lower numbered
14855 register, and the value from the higher address is put into the
14856 higher numbered register, the load will work regardless of whether
14857 the value being loaded is big-wordian or little-wordian. The
14858 order of the two register loads can matter however, if the address
14859 of the memory location is actually held in one of the registers
14860 being overwritten by the load. */
14862 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14864 output_operand_lossage ("invalid operand for code '%c'", code);
14868 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
14872 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14874 output_operand_lossage ("invalid operand for code '%c'", code);
14878 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
14882 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14884 output_operand_lossage ("invalid operand for code '%c'", code);
14888 asm_fprintf (stream, "%r", REGNO (x) + 1);
14892 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14894 output_operand_lossage ("invalid operand for code '%c'", code);
14898 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
14902 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
14904 output_operand_lossage ("invalid operand for code '%c'", code);
14908 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
14912 asm_fprintf (stream, "%r",
14913 GET_CODE (XEXP (x, 0)) == REG
14914 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
14918 asm_fprintf (stream, "{%r-%r}",
14920 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
14923 /* Like 'M', but writing doubleword vector registers, for use by Neon
14927 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
14928 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
14930 asm_fprintf (stream, "{d%d}", regno);
14932 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
14937 /* CONST_TRUE_RTX means always -- that's the default. */
14938 if (x == const_true_rtx)
14941 if (!COMPARISON_P (x))
14943 output_operand_lossage ("invalid operand for code '%c'", code);
14947 fputs (arm_condition_codes[get_arm_condition_code (x)],
14952 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
14953 want to do that. */
14954 if (x == const_true_rtx)
14956 output_operand_lossage ("instruction never executed");
14959 if (!COMPARISON_P (x))
14961 output_operand_lossage ("invalid operand for code '%c'", code);
14965 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
14966 (get_arm_condition_code (x))],
14970 /* Cirrus registers can be accessed in a variety of ways:
14971 single floating point (f)
14972 double floating point (d)
14974 64bit integer (dx). */
14975 case 'W': /* Cirrus register in F mode. */
14976 case 'X': /* Cirrus register in D mode. */
14977 case 'Y': /* Cirrus register in FX mode. */
14978 case 'Z': /* Cirrus register in DX mode. */
14979 gcc_assert (GET_CODE (x) == REG
14980 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
14982 fprintf (stream, "mv%s%s",
14984 : code == 'X' ? "d"
14985 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
14989 /* Print cirrus register in the mode specified by the register's mode. */
14992 int mode = GET_MODE (x);
14994 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
14996 output_operand_lossage ("invalid operand for code '%c'", code);
15000 fprintf (stream, "mv%s%s",
15001 mode == DFmode ? "d"
15002 : mode == SImode ? "fx"
15003 : mode == DImode ? "dx"
15004 : "f", reg_names[REGNO (x)] + 2);
15010 if (GET_CODE (x) != REG
15011 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15012 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15013 /* Bad value for wCG register number. */
15015 output_operand_lossage ("invalid operand for code '%c'", code);
15020 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15023 /* Print an iWMMXt control register name. */
15025 if (GET_CODE (x) != CONST_INT
15027 || INTVAL (x) >= 16)
15028 /* Bad value for wC register number. */
15030 output_operand_lossage ("invalid operand for code '%c'", code);
15036 static const char * wc_reg_names [16] =
15038 "wCID", "wCon", "wCSSF", "wCASF",
15039 "wC4", "wC5", "wC6", "wC7",
15040 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15041 "wC12", "wC13", "wC14", "wC15"
15044 fprintf (stream, wc_reg_names [INTVAL (x)]);
15048 /* Print a VFP/Neon double precision or quad precision register name. */
15052 int mode = GET_MODE (x);
15053 int is_quad = (code == 'q');
15056 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15058 output_operand_lossage ("invalid operand for code '%c'", code);
15062 if (GET_CODE (x) != REG
15063 || !IS_VFP_REGNUM (REGNO (x)))
15065 output_operand_lossage ("invalid operand for code '%c'", code);
15070 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15071 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15073 output_operand_lossage ("invalid operand for code '%c'", code);
15077 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15078 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15082 /* These two codes print the low/high doubleword register of a Neon quad
15083 register, respectively. For pair-structure types, can also print
15084 low/high quadword registers. */
15088 int mode = GET_MODE (x);
15091 if ((GET_MODE_SIZE (mode) != 16
15092 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15094 output_operand_lossage ("invalid operand for code '%c'", code);
15099 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15101 output_operand_lossage ("invalid operand for code '%c'", code);
15105 if (GET_MODE_SIZE (mode) == 16)
15106 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15107 + (code == 'f' ? 1 : 0));
15109 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15110 + (code == 'f' ? 1 : 0));
15114 /* Print a VFPv3 floating-point constant, represented as an integer
15118 int index = vfp3_const_double_index (x);
15119 gcc_assert (index != -1);
15120 fprintf (stream, "%d", index);
15124 /* Print bits representing opcode features for Neon.
15126 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15127 and polynomials as unsigned.
15129 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15131 Bit 2 is 1 for rounding functions, 0 otherwise. */
15133 /* Identify the type as 's', 'u', 'p' or 'f'. */
15136 HOST_WIDE_INT bits = INTVAL (x);
15137 fputc ("uspf"[bits & 3], stream);
15141 /* Likewise, but signed and unsigned integers are both 'i'. */
15144 HOST_WIDE_INT bits = INTVAL (x);
15145 fputc ("iipf"[bits & 3], stream);
15149 /* As for 'T', but emit 'u' instead of 'p'. */
15152 HOST_WIDE_INT bits = INTVAL (x);
15153 fputc ("usuf"[bits & 3], stream);
15157 /* Bit 2: rounding (vs none). */
15160 HOST_WIDE_INT bits = INTVAL (x);
15161 fputs ((bits & 4) != 0 ? "r" : "", stream);
15165 /* Memory operand for vld1/vst1 instruction. */
15169 bool postinc = FALSE;
15170 gcc_assert (GET_CODE (x) == MEM);
15171 addr = XEXP (x, 0);
15172 if (GET_CODE (addr) == POST_INC)
15175 addr = XEXP (addr, 0);
15177 asm_fprintf (stream, "[%r]", REGNO (addr));
15179 fputs("!", stream);
15183 /* Register specifier for vld1.16/vst1.16. Translate the S register
15184 number into a D register number and element index. */
15187 int mode = GET_MODE (x);
15190 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15192 output_operand_lossage ("invalid operand for code '%c'", code);
15197 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15199 output_operand_lossage ("invalid operand for code '%c'", code);
15203 regno = regno - FIRST_VFP_REGNUM;
15204 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15211 output_operand_lossage ("missing operand");
15215 switch (GET_CODE (x))
15218 asm_fprintf (stream, "%r", REGNO (x));
15222 output_memory_reference_mode = GET_MODE (x);
15223 output_address (XEXP (x, 0));
15230 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15231 sizeof (fpstr), 0, 1);
15232 fprintf (stream, "#%s", fpstr);
15235 fprintf (stream, "#%s", fp_immediate_constant (x));
15239 gcc_assert (GET_CODE (x) != NEG);
15240 fputc ('#', stream);
15241 if (GET_CODE (x) == HIGH)
15243 fputs (":lower16:", stream);
15247 output_addr_const (stream, x);
15253 /* Target hook for assembling integer objects. The ARM version needs to
15254 handle word-sized values specially. */
15256 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15258 enum machine_mode mode;
15260 if (size == UNITS_PER_WORD && aligned_p)
15262 fputs ("\t.word\t", asm_out_file);
15263 output_addr_const (asm_out_file, x);
15265 /* Mark symbols as position independent. We only do this in the
15266 .text segment, not in the .data segment. */
15267 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15268 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15270 /* See legitimize_pic_address for an explanation of the
15271 TARGET_VXWORKS_RTP check. */
15272 if (TARGET_VXWORKS_RTP
15273 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15274 fputs ("(GOT)", asm_out_file);
15276 fputs ("(GOTOFF)", asm_out_file);
15278 fputc ('\n', asm_out_file);
15282 mode = GET_MODE (x);
15284 if (arm_vector_mode_supported_p (mode))
15288 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15290 units = CONST_VECTOR_NUNITS (x);
15291 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15293 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15294 for (i = 0; i < units; i++)
15296 rtx elt = CONST_VECTOR_ELT (x, i);
15298 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15301 for (i = 0; i < units; i++)
15303 rtx elt = CONST_VECTOR_ELT (x, i);
15304 REAL_VALUE_TYPE rval;
15306 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15309 (rval, GET_MODE_INNER (mode),
15310 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15316 return default_assemble_integer (x, size, aligned_p);
15320 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15324 if (!TARGET_AAPCS_BASED)
15327 default_named_section_asm_out_constructor
15328 : default_named_section_asm_out_destructor) (symbol, priority);
15332 /* Put these in the .init_array section, using a special relocation. */
15333 if (priority != DEFAULT_INIT_PRIORITY)
15336 sprintf (buf, "%s.%.5u",
15337 is_ctor ? ".init_array" : ".fini_array",
15339 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15346 switch_to_section (s);
15347 assemble_align (POINTER_SIZE);
15348 fputs ("\t.word\t", asm_out_file);
15349 output_addr_const (asm_out_file, symbol);
15350 fputs ("(target1)\n", asm_out_file);
15353 /* Add a function to the list of static constructors. */
15356 arm_elf_asm_constructor (rtx symbol, int priority)
15358 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15361 /* Add a function to the list of static destructors. */
15364 arm_elf_asm_destructor (rtx symbol, int priority)
15366 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15369 /* A finite state machine takes care of noticing whether or not instructions
15370 can be conditionally executed, and thus decrease execution time and code
15371 size by deleting branch instructions. The fsm is controlled by
15372 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15374 /* The state of the fsm controlling condition codes are:
15375 0: normal, do nothing special
15376 1: make ASM_OUTPUT_OPCODE not output this instruction
15377 2: make ASM_OUTPUT_OPCODE not output this instruction
15378 3: make instructions conditional
15379 4: make instructions conditional
15381 State transitions (state->state by whom under condition):
15382 0 -> 1 final_prescan_insn if the `target' is a label
15383 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15384 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15385 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15386 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15387 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15388 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15389 (the target insn is arm_target_insn).
15391 If the jump clobbers the conditions then we use states 2 and 4.
15393 A similar thing can be done with conditional return insns.
15395 XXX In case the `target' is an unconditional branch, this conditionalising
15396 of the instructions always reduces code size, but not always execution
15397 time. But then, I want to reduce the code size to somewhere near what
15398 /bin/cc produces. */
15400 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15401 instructions. When a COND_EXEC instruction is seen the subsequent
15402 instructions are scanned so that multiple conditional instructions can be
15403 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15404 specify the length and true/false mask for the IT block. These will be
15405 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15407 /* Returns the index of the ARM condition code string in
15408 `arm_condition_codes'. COMPARISON should be an rtx like
15409 `(eq (...) (...))'. */
15410 static enum arm_cond_code
15411 get_arm_condition_code (rtx comparison)
15413 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15414 enum arm_cond_code code;
15415 enum rtx_code comp_code = GET_CODE (comparison);
15417 if (GET_MODE_CLASS (mode) != MODE_CC)
15418 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15419 XEXP (comparison, 1));
15423 case CC_DNEmode: code = ARM_NE; goto dominance;
15424 case CC_DEQmode: code = ARM_EQ; goto dominance;
15425 case CC_DGEmode: code = ARM_GE; goto dominance;
15426 case CC_DGTmode: code = ARM_GT; goto dominance;
15427 case CC_DLEmode: code = ARM_LE; goto dominance;
15428 case CC_DLTmode: code = ARM_LT; goto dominance;
15429 case CC_DGEUmode: code = ARM_CS; goto dominance;
15430 case CC_DGTUmode: code = ARM_HI; goto dominance;
15431 case CC_DLEUmode: code = ARM_LS; goto dominance;
15432 case CC_DLTUmode: code = ARM_CC;
15435 gcc_assert (comp_code == EQ || comp_code == NE);
15437 if (comp_code == EQ)
15438 return ARM_INVERSE_CONDITION_CODE (code);
15444 case NE: return ARM_NE;
15445 case EQ: return ARM_EQ;
15446 case GE: return ARM_PL;
15447 case LT: return ARM_MI;
15448 default: gcc_unreachable ();
15454 case NE: return ARM_NE;
15455 case EQ: return ARM_EQ;
15456 default: gcc_unreachable ();
15462 case NE: return ARM_MI;
15463 case EQ: return ARM_PL;
15464 default: gcc_unreachable ();
15469 /* These encodings assume that AC=1 in the FPA system control
15470 byte. This allows us to handle all cases except UNEQ and
15474 case GE: return ARM_GE;
15475 case GT: return ARM_GT;
15476 case LE: return ARM_LS;
15477 case LT: return ARM_MI;
15478 case NE: return ARM_NE;
15479 case EQ: return ARM_EQ;
15480 case ORDERED: return ARM_VC;
15481 case UNORDERED: return ARM_VS;
15482 case UNLT: return ARM_LT;
15483 case UNLE: return ARM_LE;
15484 case UNGT: return ARM_HI;
15485 case UNGE: return ARM_PL;
15486 /* UNEQ and LTGT do not have a representation. */
15487 case UNEQ: /* Fall through. */
15488 case LTGT: /* Fall through. */
15489 default: gcc_unreachable ();
15495 case NE: return ARM_NE;
15496 case EQ: return ARM_EQ;
15497 case GE: return ARM_LE;
15498 case GT: return ARM_LT;
15499 case LE: return ARM_GE;
15500 case LT: return ARM_GT;
15501 case GEU: return ARM_LS;
15502 case GTU: return ARM_CC;
15503 case LEU: return ARM_CS;
15504 case LTU: return ARM_HI;
15505 default: gcc_unreachable ();
15511 case LTU: return ARM_CS;
15512 case GEU: return ARM_CC;
15513 default: gcc_unreachable ();
15519 case NE: return ARM_NE;
15520 case EQ: return ARM_EQ;
15521 case GE: return ARM_GE;
15522 case GT: return ARM_GT;
15523 case LE: return ARM_LE;
15524 case LT: return ARM_LT;
15525 case GEU: return ARM_CS;
15526 case GTU: return ARM_HI;
15527 case LEU: return ARM_LS;
15528 case LTU: return ARM_CC;
15529 default: gcc_unreachable ();
15532 default: gcc_unreachable ();
15536 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15539 thumb2_final_prescan_insn (rtx insn)
15541 rtx first_insn = insn;
15542 rtx body = PATTERN (insn);
15544 enum arm_cond_code code;
15548 /* Remove the previous insn from the count of insns to be output. */
15549 if (arm_condexec_count)
15550 arm_condexec_count--;
15552 /* Nothing to do if we are already inside a conditional block. */
15553 if (arm_condexec_count)
15556 if (GET_CODE (body) != COND_EXEC)
15559 /* Conditional jumps are implemented directly. */
15560 if (GET_CODE (insn) == JUMP_INSN)
15563 predicate = COND_EXEC_TEST (body);
15564 arm_current_cc = get_arm_condition_code (predicate);
15566 n = get_attr_ce_count (insn);
15567 arm_condexec_count = 1;
15568 arm_condexec_mask = (1 << n) - 1;
15569 arm_condexec_masklen = n;
15570 /* See if subsequent instructions can be combined into the same block. */
15573 insn = next_nonnote_insn (insn);
15575 /* Jumping into the middle of an IT block is illegal, so a label or
15576 barrier terminates the block. */
15577 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15580 body = PATTERN (insn);
15581 /* USE and CLOBBER aren't really insns, so just skip them. */
15582 if (GET_CODE (body) == USE
15583 || GET_CODE (body) == CLOBBER)
15586 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
15587 if (GET_CODE (body) != COND_EXEC)
15589 /* Allow up to 4 conditionally executed instructions in a block. */
15590 n = get_attr_ce_count (insn);
15591 if (arm_condexec_masklen + n > 4)
15594 predicate = COND_EXEC_TEST (body);
15595 code = get_arm_condition_code (predicate);
15596 mask = (1 << n) - 1;
15597 if (arm_current_cc == code)
15598 arm_condexec_mask |= (mask << arm_condexec_masklen);
15599 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15602 arm_condexec_count++;
15603 arm_condexec_masklen += n;
15605 /* A jump must be the last instruction in a conditional block. */
15606 if (GET_CODE(insn) == JUMP_INSN)
15609 /* Restore recog_data (getting the attributes of other insns can
15610 destroy this array, but final.c assumes that it remains intact
15611 across this call). */
15612 extract_constrain_insn_cached (first_insn);
15616 arm_final_prescan_insn (rtx insn)
15618 /* BODY will hold the body of INSN. */
15619 rtx body = PATTERN (insn);
15621 /* This will be 1 if trying to repeat the trick, and things need to be
15622 reversed if it appears to fail. */
15625 /* If we start with a return insn, we only succeed if we find another one. */
15626 int seeking_return = 0;
15628 /* START_INSN will hold the insn from where we start looking. This is the
15629 first insn after the following code_label if REVERSE is true. */
15630 rtx start_insn = insn;
15632 /* If in state 4, check if the target branch is reached, in order to
15633 change back to state 0. */
15634 if (arm_ccfsm_state == 4)
15636 if (insn == arm_target_insn)
15638 arm_target_insn = NULL;
15639 arm_ccfsm_state = 0;
15644 /* If in state 3, it is possible to repeat the trick, if this insn is an
15645 unconditional branch to a label, and immediately following this branch
15646 is the previous target label which is only used once, and the label this
15647 branch jumps to is not too far off. */
15648 if (arm_ccfsm_state == 3)
15650 if (simplejump_p (insn))
15652 start_insn = next_nonnote_insn (start_insn);
15653 if (GET_CODE (start_insn) == BARRIER)
15655 /* XXX Isn't this always a barrier? */
15656 start_insn = next_nonnote_insn (start_insn);
15658 if (GET_CODE (start_insn) == CODE_LABEL
15659 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15660 && LABEL_NUSES (start_insn) == 1)
15665 else if (GET_CODE (body) == RETURN)
15667 start_insn = next_nonnote_insn (start_insn);
15668 if (GET_CODE (start_insn) == BARRIER)
15669 start_insn = next_nonnote_insn (start_insn);
15670 if (GET_CODE (start_insn) == CODE_LABEL
15671 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
15672 && LABEL_NUSES (start_insn) == 1)
15675 seeking_return = 1;
15684 gcc_assert (!arm_ccfsm_state || reverse);
15685 if (GET_CODE (insn) != JUMP_INSN)
15688 /* This jump might be paralleled with a clobber of the condition codes
15689 the jump should always come first */
15690 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
15691 body = XVECEXP (body, 0, 0);
15694 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
15695 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
15698 int fail = FALSE, succeed = FALSE;
15699 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
15700 int then_not_else = TRUE;
15701 rtx this_insn = start_insn, label = 0;
15703 /* Register the insn jumped to. */
15706 if (!seeking_return)
15707 label = XEXP (SET_SRC (body), 0);
15709 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
15710 label = XEXP (XEXP (SET_SRC (body), 1), 0);
15711 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
15713 label = XEXP (XEXP (SET_SRC (body), 2), 0);
15714 then_not_else = FALSE;
15716 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
15717 seeking_return = 1;
15718 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
15720 seeking_return = 1;
15721 then_not_else = FALSE;
15724 gcc_unreachable ();
15726 /* See how many insns this branch skips, and what kind of insns. If all
15727 insns are okay, and the label or unconditional branch to the same
15728 label is not too far away, succeed. */
15729 for (insns_skipped = 0;
15730 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
15734 this_insn = next_nonnote_insn (this_insn);
15738 switch (GET_CODE (this_insn))
15741 /* Succeed if it is the target label, otherwise fail since
15742 control falls in from somewhere else. */
15743 if (this_insn == label)
15745 arm_ccfsm_state = 1;
15753 /* Succeed if the following insn is the target label.
15755 If return insns are used then the last insn in a function
15756 will be a barrier. */
15757 this_insn = next_nonnote_insn (this_insn);
15758 if (this_insn && this_insn == label)
15760 arm_ccfsm_state = 1;
15768 /* The AAPCS says that conditional calls should not be
15769 used since they make interworking inefficient (the
15770 linker can't transform BL<cond> into BLX). That's
15771 only a problem if the machine has BLX. */
15778 /* Succeed if the following insn is the target label, or
15779 if the following two insns are a barrier and the
15781 this_insn = next_nonnote_insn (this_insn);
15782 if (this_insn && GET_CODE (this_insn) == BARRIER)
15783 this_insn = next_nonnote_insn (this_insn);
15785 if (this_insn && this_insn == label
15786 && insns_skipped < max_insns_skipped)
15788 arm_ccfsm_state = 1;
15796 /* If this is an unconditional branch to the same label, succeed.
15797 If it is to another label, do nothing. If it is conditional,
15799 /* XXX Probably, the tests for SET and the PC are
15802 scanbody = PATTERN (this_insn);
15803 if (GET_CODE (scanbody) == SET
15804 && GET_CODE (SET_DEST (scanbody)) == PC)
15806 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
15807 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
15809 arm_ccfsm_state = 2;
15812 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
15815 /* Fail if a conditional return is undesirable (e.g. on a
15816 StrongARM), but still allow this if optimizing for size. */
15817 else if (GET_CODE (scanbody) == RETURN
15818 && !use_return_insn (TRUE, NULL)
15821 else if (GET_CODE (scanbody) == RETURN
15824 arm_ccfsm_state = 2;
15827 else if (GET_CODE (scanbody) == PARALLEL)
15829 switch (get_attr_conds (this_insn))
15839 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
15844 /* Instructions using or affecting the condition codes make it
15846 scanbody = PATTERN (this_insn);
15847 if (!(GET_CODE (scanbody) == SET
15848 || GET_CODE (scanbody) == PARALLEL)
15849 || get_attr_conds (this_insn) != CONDS_NOCOND)
15852 /* A conditional cirrus instruction must be followed by
15853 a non Cirrus instruction. However, since we
15854 conditionalize instructions in this function and by
15855 the time we get here we can't add instructions
15856 (nops), because shorten_branches() has already been
15857 called, we will disable conditionalizing Cirrus
15858 instructions to be safe. */
15859 if (GET_CODE (scanbody) != USE
15860 && GET_CODE (scanbody) != CLOBBER
15861 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
15871 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
15872 arm_target_label = CODE_LABEL_NUMBER (label);
15875 gcc_assert (seeking_return || arm_ccfsm_state == 2);
15877 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
15879 this_insn = next_nonnote_insn (this_insn);
15880 gcc_assert (!this_insn
15881 || (GET_CODE (this_insn) != BARRIER
15882 && GET_CODE (this_insn) != CODE_LABEL));
15886 /* Oh, dear! we ran off the end.. give up. */
15887 extract_constrain_insn_cached (insn);
15888 arm_ccfsm_state = 0;
15889 arm_target_insn = NULL;
15892 arm_target_insn = this_insn;
15895 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
15898 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
15900 if (reverse || then_not_else)
15901 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
15904 /* Restore recog_data (getting the attributes of other insns can
15905 destroy this array, but final.c assumes that it remains intact
15906 across this call. */
15907 extract_constrain_insn_cached (insn);
15911 /* Output IT instructions. */
15913 thumb2_asm_output_opcode (FILE * stream)
15918 if (arm_condexec_mask)
15920 for (n = 0; n < arm_condexec_masklen; n++)
15921 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
15923 asm_fprintf(stream, "i%s\t%s\n\t", buff,
15924 arm_condition_codes[arm_current_cc]);
15925 arm_condexec_mask = 0;
15929 /* Returns true if REGNO is a valid register
15930 for holding a quantity of type MODE. */
15932 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
15934 if (GET_MODE_CLASS (mode) == MODE_CC)
15935 return (regno == CC_REGNUM
15936 || (TARGET_HARD_FLOAT && TARGET_VFP
15937 && regno == VFPCC_REGNUM));
15940 /* For the Thumb we only allow values bigger than SImode in
15941 registers 0 - 6, so that there is always a second low
15942 register available to hold the upper part of the value.
15943 We probably we ought to ensure that the register is the
15944 start of an even numbered register pair. */
15945 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
15947 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
15948 && IS_CIRRUS_REGNUM (regno))
15949 /* We have outlawed SI values in Cirrus registers because they
15950 reside in the lower 32 bits, but SF values reside in the
15951 upper 32 bits. This causes gcc all sorts of grief. We can't
15952 even split the registers into pairs because Cirrus SI values
15953 get sign extended to 64bits-- aldyh. */
15954 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
15956 if (TARGET_HARD_FLOAT && TARGET_VFP
15957 && IS_VFP_REGNUM (regno))
15959 if (mode == SFmode || mode == SImode)
15960 return VFP_REGNO_OK_FOR_SINGLE (regno);
15962 if (mode == DFmode)
15963 return VFP_REGNO_OK_FOR_DOUBLE (regno);
15965 /* VFP registers can hold HFmode values, but there is no point in
15966 putting them there unless we have the NEON extensions for
15967 loading/storing them, too. */
15968 if (mode == HFmode)
15969 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
15972 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
15973 || (VALID_NEON_QREG_MODE (mode)
15974 && NEON_REGNO_OK_FOR_QUAD (regno))
15975 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
15976 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
15977 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
15978 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
15979 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
15984 if (TARGET_REALLY_IWMMXT)
15986 if (IS_IWMMXT_GR_REGNUM (regno))
15987 return mode == SImode;
15989 if (IS_IWMMXT_REGNUM (regno))
15990 return VALID_IWMMXT_REG_MODE (mode);
15993 /* We allow almost any value to be stored in the general registers.
15994 Restrict doubleword quantities to even register pairs so that we can
15995 use ldrd. Do not allow very large Neon structure opaque modes in
15996 general registers; they would use too many. */
15997 if (regno <= LAST_ARM_REGNUM)
15998 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
15999 && ARM_NUM_REGS (mode) <= 4;
16001 if (regno == FRAME_POINTER_REGNUM
16002 || regno == ARG_POINTER_REGNUM)
16003 /* We only allow integers in the fake hard registers. */
16004 return GET_MODE_CLASS (mode) == MODE_INT;
16006 /* The only registers left are the FPA registers
16007 which we only allow to hold FP values. */
16008 return (TARGET_HARD_FLOAT && TARGET_FPA
16009 && GET_MODE_CLASS (mode) == MODE_FLOAT
16010 && regno >= FIRST_FPA_REGNUM
16011 && regno <= LAST_FPA_REGNUM);
16014 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16015 not used in arm mode. */
16018 arm_regno_class (int regno)
16022 if (regno == STACK_POINTER_REGNUM)
16024 if (regno == CC_REGNUM)
16031 if (TARGET_THUMB2 && regno < 8)
16034 if ( regno <= LAST_ARM_REGNUM
16035 || regno == FRAME_POINTER_REGNUM
16036 || regno == ARG_POINTER_REGNUM)
16037 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16039 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16040 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16042 if (IS_CIRRUS_REGNUM (regno))
16043 return CIRRUS_REGS;
16045 if (IS_VFP_REGNUM (regno))
16047 if (regno <= D7_VFP_REGNUM)
16048 return VFP_D0_D7_REGS;
16049 else if (regno <= LAST_LO_VFP_REGNUM)
16050 return VFP_LO_REGS;
16052 return VFP_HI_REGS;
16055 if (IS_IWMMXT_REGNUM (regno))
16056 return IWMMXT_REGS;
16058 if (IS_IWMMXT_GR_REGNUM (regno))
16059 return IWMMXT_GR_REGS;
16064 /* Handle a special case when computing the offset
16065 of an argument from the frame pointer. */
16067 arm_debugger_arg_offset (int value, rtx addr)
16071 /* We are only interested if dbxout_parms() failed to compute the offset. */
16075 /* We can only cope with the case where the address is held in a register. */
16076 if (GET_CODE (addr) != REG)
16079 /* If we are using the frame pointer to point at the argument, then
16080 an offset of 0 is correct. */
16081 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16084 /* If we are using the stack pointer to point at the
16085 argument, then an offset of 0 is correct. */
16086 /* ??? Check this is consistent with thumb2 frame layout. */
16087 if ((TARGET_THUMB || !frame_pointer_needed)
16088 && REGNO (addr) == SP_REGNUM)
16091 /* Oh dear. The argument is pointed to by a register rather
16092 than being held in a register, or being stored at a known
16093 offset from the frame pointer. Since GDB only understands
16094 those two kinds of argument we must translate the address
16095 held in the register into an offset from the frame pointer.
16096 We do this by searching through the insns for the function
16097 looking to see where this register gets its value. If the
16098 register is initialized from the frame pointer plus an offset
16099 then we are in luck and we can continue, otherwise we give up.
16101 This code is exercised by producing debugging information
16102 for a function with arguments like this:
16104 double func (double a, double b, int c, double d) {return d;}
16106 Without this code the stab for parameter 'd' will be set to
16107 an offset of 0 from the frame pointer, rather than 8. */
16109 /* The if() statement says:
16111 If the insn is a normal instruction
16112 and if the insn is setting the value in a register
16113 and if the register being set is the register holding the address of the argument
16114 and if the address is computing by an addition
16115 that involves adding to a register
16116 which is the frame pointer
16121 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16123 if ( GET_CODE (insn) == INSN
16124 && GET_CODE (PATTERN (insn)) == SET
16125 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16126 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16127 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16128 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16129 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16132 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16141 warning (0, "unable to compute real location of stacked parameter");
16142 value = 8; /* XXX magic hack */
16148 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16151 if ((MASK) & insn_flags) \
16152 add_builtin_function ((NAME), (TYPE), (CODE), \
16153 BUILT_IN_MD, NULL, NULL_TREE); \
16157 struct builtin_description
16159 const unsigned int mask;
16160 const enum insn_code icode;
16161 const char * const name;
16162 const enum arm_builtins code;
16163 const enum rtx_code comparison;
16164 const unsigned int flag;
16167 static const struct builtin_description bdesc_2arg[] =
16169 #define IWMMXT_BUILTIN(code, string, builtin) \
16170 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16171 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16173 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16174 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16175 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16176 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16177 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16178 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16179 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16180 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16181 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16182 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16183 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16184 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16185 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16186 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16187 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16188 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16189 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16190 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16191 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16192 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16193 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16194 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16195 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16196 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16197 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16198 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16199 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16200 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16201 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16202 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16203 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16204 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16205 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16206 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16207 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16208 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16209 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16210 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16211 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16212 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16213 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16214 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16215 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16216 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16217 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16218 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16219 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16220 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16221 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16222 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16223 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16224 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16225 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16226 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16227 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16228 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16229 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16230 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16232 #define IWMMXT_BUILTIN2(code, builtin) \
16233 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16235 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16236 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16237 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16238 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16239 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16240 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16241 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16242 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16243 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16244 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16245 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16246 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16247 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16248 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16249 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16250 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16251 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16252 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16253 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16254 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16255 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16256 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16257 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16258 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16259 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16260 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16261 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16262 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16263 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16264 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16265 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16266 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16269 static const struct builtin_description bdesc_1arg[] =
16271 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16272 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16273 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16274 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16275 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16276 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16277 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16278 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16279 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16280 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16281 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16282 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16283 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16284 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16285 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16286 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16287 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16288 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16291 /* Set up all the iWMMXt builtins. This is
16292 not called if TARGET_IWMMXT is zero. */
16295 arm_init_iwmmxt_builtins (void)
16297 const struct builtin_description * d;
16299 tree endlink = void_list_node;
16301 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16302 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16303 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16306 = build_function_type (integer_type_node,
16307 tree_cons (NULL_TREE, integer_type_node, endlink));
16308 tree v8qi_ftype_v8qi_v8qi_int
16309 = build_function_type (V8QI_type_node,
16310 tree_cons (NULL_TREE, V8QI_type_node,
16311 tree_cons (NULL_TREE, V8QI_type_node,
16312 tree_cons (NULL_TREE,
16315 tree v4hi_ftype_v4hi_int
16316 = build_function_type (V4HI_type_node,
16317 tree_cons (NULL_TREE, V4HI_type_node,
16318 tree_cons (NULL_TREE, integer_type_node,
16320 tree v2si_ftype_v2si_int
16321 = build_function_type (V2SI_type_node,
16322 tree_cons (NULL_TREE, V2SI_type_node,
16323 tree_cons (NULL_TREE, integer_type_node,
16325 tree v2si_ftype_di_di
16326 = build_function_type (V2SI_type_node,
16327 tree_cons (NULL_TREE, long_long_integer_type_node,
16328 tree_cons (NULL_TREE, long_long_integer_type_node,
16330 tree di_ftype_di_int
16331 = build_function_type (long_long_integer_type_node,
16332 tree_cons (NULL_TREE, long_long_integer_type_node,
16333 tree_cons (NULL_TREE, integer_type_node,
16335 tree di_ftype_di_int_int
16336 = build_function_type (long_long_integer_type_node,
16337 tree_cons (NULL_TREE, long_long_integer_type_node,
16338 tree_cons (NULL_TREE, integer_type_node,
16339 tree_cons (NULL_TREE,
16342 tree int_ftype_v8qi
16343 = build_function_type (integer_type_node,
16344 tree_cons (NULL_TREE, V8QI_type_node,
16346 tree int_ftype_v4hi
16347 = build_function_type (integer_type_node,
16348 tree_cons (NULL_TREE, V4HI_type_node,
16350 tree int_ftype_v2si
16351 = build_function_type (integer_type_node,
16352 tree_cons (NULL_TREE, V2SI_type_node,
16354 tree int_ftype_v8qi_int
16355 = build_function_type (integer_type_node,
16356 tree_cons (NULL_TREE, V8QI_type_node,
16357 tree_cons (NULL_TREE, integer_type_node,
16359 tree int_ftype_v4hi_int
16360 = build_function_type (integer_type_node,
16361 tree_cons (NULL_TREE, V4HI_type_node,
16362 tree_cons (NULL_TREE, integer_type_node,
16364 tree int_ftype_v2si_int
16365 = build_function_type (integer_type_node,
16366 tree_cons (NULL_TREE, V2SI_type_node,
16367 tree_cons (NULL_TREE, integer_type_node,
16369 tree v8qi_ftype_v8qi_int_int
16370 = build_function_type (V8QI_type_node,
16371 tree_cons (NULL_TREE, V8QI_type_node,
16372 tree_cons (NULL_TREE, integer_type_node,
16373 tree_cons (NULL_TREE,
16376 tree v4hi_ftype_v4hi_int_int
16377 = build_function_type (V4HI_type_node,
16378 tree_cons (NULL_TREE, V4HI_type_node,
16379 tree_cons (NULL_TREE, integer_type_node,
16380 tree_cons (NULL_TREE,
16383 tree v2si_ftype_v2si_int_int
16384 = build_function_type (V2SI_type_node,
16385 tree_cons (NULL_TREE, V2SI_type_node,
16386 tree_cons (NULL_TREE, integer_type_node,
16387 tree_cons (NULL_TREE,
16390 /* Miscellaneous. */
16391 tree v8qi_ftype_v4hi_v4hi
16392 = build_function_type (V8QI_type_node,
16393 tree_cons (NULL_TREE, V4HI_type_node,
16394 tree_cons (NULL_TREE, V4HI_type_node,
16396 tree v4hi_ftype_v2si_v2si
16397 = build_function_type (V4HI_type_node,
16398 tree_cons (NULL_TREE, V2SI_type_node,
16399 tree_cons (NULL_TREE, V2SI_type_node,
16401 tree v2si_ftype_v4hi_v4hi
16402 = build_function_type (V2SI_type_node,
16403 tree_cons (NULL_TREE, V4HI_type_node,
16404 tree_cons (NULL_TREE, V4HI_type_node,
16406 tree v2si_ftype_v8qi_v8qi
16407 = build_function_type (V2SI_type_node,
16408 tree_cons (NULL_TREE, V8QI_type_node,
16409 tree_cons (NULL_TREE, V8QI_type_node,
16411 tree v4hi_ftype_v4hi_di
16412 = build_function_type (V4HI_type_node,
16413 tree_cons (NULL_TREE, V4HI_type_node,
16414 tree_cons (NULL_TREE,
16415 long_long_integer_type_node,
16417 tree v2si_ftype_v2si_di
16418 = build_function_type (V2SI_type_node,
16419 tree_cons (NULL_TREE, V2SI_type_node,
16420 tree_cons (NULL_TREE,
16421 long_long_integer_type_node,
16423 tree void_ftype_int_int
16424 = build_function_type (void_type_node,
16425 tree_cons (NULL_TREE, integer_type_node,
16426 tree_cons (NULL_TREE, integer_type_node,
16429 = build_function_type (long_long_unsigned_type_node, endlink);
16431 = build_function_type (long_long_integer_type_node,
16432 tree_cons (NULL_TREE, V8QI_type_node,
16435 = build_function_type (long_long_integer_type_node,
16436 tree_cons (NULL_TREE, V4HI_type_node,
16439 = build_function_type (long_long_integer_type_node,
16440 tree_cons (NULL_TREE, V2SI_type_node,
16442 tree v2si_ftype_v4hi
16443 = build_function_type (V2SI_type_node,
16444 tree_cons (NULL_TREE, V4HI_type_node,
16446 tree v4hi_ftype_v8qi
16447 = build_function_type (V4HI_type_node,
16448 tree_cons (NULL_TREE, V8QI_type_node,
16451 tree di_ftype_di_v4hi_v4hi
16452 = build_function_type (long_long_unsigned_type_node,
16453 tree_cons (NULL_TREE,
16454 long_long_unsigned_type_node,
16455 tree_cons (NULL_TREE, V4HI_type_node,
16456 tree_cons (NULL_TREE,
16460 tree di_ftype_v4hi_v4hi
16461 = build_function_type (long_long_unsigned_type_node,
16462 tree_cons (NULL_TREE, V4HI_type_node,
16463 tree_cons (NULL_TREE, V4HI_type_node,
16466 /* Normal vector binops. */
16467 tree v8qi_ftype_v8qi_v8qi
16468 = build_function_type (V8QI_type_node,
16469 tree_cons (NULL_TREE, V8QI_type_node,
16470 tree_cons (NULL_TREE, V8QI_type_node,
16472 tree v4hi_ftype_v4hi_v4hi
16473 = build_function_type (V4HI_type_node,
16474 tree_cons (NULL_TREE, V4HI_type_node,
16475 tree_cons (NULL_TREE, V4HI_type_node,
16477 tree v2si_ftype_v2si_v2si
16478 = build_function_type (V2SI_type_node,
16479 tree_cons (NULL_TREE, V2SI_type_node,
16480 tree_cons (NULL_TREE, V2SI_type_node,
16482 tree di_ftype_di_di
16483 = build_function_type (long_long_unsigned_type_node,
16484 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16485 tree_cons (NULL_TREE,
16486 long_long_unsigned_type_node,
16489 /* Add all builtins that are more or less simple operations on two
16491 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16493 /* Use one of the operands; the target can have a different mode for
16494 mask-generating compares. */
16495 enum machine_mode mode;
16501 mode = insn_data[d->icode].operand[1].mode;
16506 type = v8qi_ftype_v8qi_v8qi;
16509 type = v4hi_ftype_v4hi_v4hi;
16512 type = v2si_ftype_v2si_v2si;
16515 type = di_ftype_di_di;
16519 gcc_unreachable ();
16522 def_mbuiltin (d->mask, d->name, type, d->code);
16525 /* Add the remaining MMX insns with somewhat more complicated types. */
16526 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16532 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16534 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16535 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16537 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16538 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16539 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16540 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16541 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16542 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16544 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16545 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16546 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16547 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16548 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16551 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16552 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16553 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16554 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16555 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16556 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16558 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16560 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16561 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16562 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16565 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16567 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16568 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16572 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16577 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16581 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16585 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16586 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16592 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16593 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16595 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16598 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16600 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16601 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16603 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16604 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16605 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16608 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16610 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16612 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16614 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16618 arm_init_tls_builtins (void)
16622 ftype = build_function_type (ptr_type_node, void_list_node);
16623 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16624 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16626 TREE_NOTHROW (decl) = 1;
16627 TREE_READONLY (decl) = 1;
16630 enum neon_builtin_type_bits {
16646 #define v8qi_UP T_V8QI
16647 #define v4hi_UP T_V4HI
16648 #define v2si_UP T_V2SI
16649 #define v2sf_UP T_V2SF
16651 #define v16qi_UP T_V16QI
16652 #define v8hi_UP T_V8HI
16653 #define v4si_UP T_V4SI
16654 #define v4sf_UP T_V4SF
16655 #define v2di_UP T_V2DI
16660 #define UP(X) X##_UP
16695 NEON_LOADSTRUCTLANE,
16697 NEON_STORESTRUCTLANE,
16706 const neon_itype itype;
16708 const enum insn_code codes[T_MAX];
16709 const unsigned int num_vars;
16710 unsigned int base_fcode;
16711 } neon_builtin_datum;
16713 #define CF(N,X) CODE_FOR_neon_##N##X
16715 #define VAR1(T, N, A) \
16716 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
16717 #define VAR2(T, N, A, B) \
16718 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
16719 #define VAR3(T, N, A, B, C) \
16720 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
16721 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
16722 #define VAR4(T, N, A, B, C, D) \
16723 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
16724 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
16725 #define VAR5(T, N, A, B, C, D, E) \
16726 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
16727 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
16728 #define VAR6(T, N, A, B, C, D, E, F) \
16729 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
16730 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
16731 #define VAR7(T, N, A, B, C, D, E, F, G) \
16732 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
16733 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16735 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
16736 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16738 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16739 CF (N, G), CF (N, H) }, 8, 0
16740 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
16741 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16742 | UP (H) | UP (I), \
16743 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16744 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
16745 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
16746 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
16747 | UP (H) | UP (I) | UP (J), \
16748 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
16749 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
16751 /* The mode entries in the following table correspond to the "key" type of the
16752 instruction variant, i.e. equivalent to that which would be specified after
16753 the assembler mnemonic, which usually refers to the last vector operand.
16754 (Signed/unsigned/polynomial types are not differentiated between though, and
16755 are all mapped onto the same mode for a given element size.) The modes
16756 listed per instruction should be the same as those defined for that
16757 instruction's pattern in neon.md.
16758 WARNING: Variants should be listed in the same increasing order as
16759 neon_builtin_type_bits. */
16761 static neon_builtin_datum neon_builtin_data[] =
16763 { VAR10 (BINOP, vadd,
16764 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16765 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
16766 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
16767 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16768 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16769 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
16770 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16771 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16772 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
16773 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16774 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
16775 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
16776 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
16777 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
16778 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
16779 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
16780 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
16781 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
16782 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
16783 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
16784 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
16785 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
16786 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16787 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16788 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16789 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
16790 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
16791 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
16792 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16793 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16794 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16795 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
16796 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16797 { VAR10 (BINOP, vsub,
16798 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16799 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
16800 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
16801 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16802 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16803 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
16804 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16805 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16806 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16807 { VAR2 (BINOP, vcage, v2sf, v4sf) },
16808 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
16809 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16810 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16811 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
16812 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16813 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
16814 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16815 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16816 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
16817 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16818 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16819 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
16820 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
16821 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
16822 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
16823 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16824 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
16825 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16826 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16827 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16828 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16829 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16830 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16831 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
16832 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
16833 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
16834 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
16835 /* FIXME: vget_lane supports more variants than this! */
16836 { VAR10 (GETLANE, vget_lane,
16837 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16838 { VAR10 (SETLANE, vset_lane,
16839 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16840 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
16841 { VAR10 (DUP, vdup_n,
16842 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16843 { VAR10 (DUPLANE, vdup_lane,
16844 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16845 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
16846 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
16847 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
16848 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
16849 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
16850 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
16851 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
16852 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16853 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16854 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
16855 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
16856 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16857 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
16858 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
16859 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16860 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16861 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
16862 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
16863 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16864 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
16865 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
16866 { VAR10 (BINOP, vext,
16867 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16868 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16869 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
16870 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
16871 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
16872 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
16873 { VAR10 (SELECT, vbsl,
16874 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16875 { VAR1 (VTBL, vtbl1, v8qi) },
16876 { VAR1 (VTBL, vtbl2, v8qi) },
16877 { VAR1 (VTBL, vtbl3, v8qi) },
16878 { VAR1 (VTBL, vtbl4, v8qi) },
16879 { VAR1 (VTBX, vtbx1, v8qi) },
16880 { VAR1 (VTBX, vtbx2, v8qi) },
16881 { VAR1 (VTBX, vtbx3, v8qi) },
16882 { VAR1 (VTBX, vtbx4, v8qi) },
16883 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16884 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16885 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
16886 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
16887 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
16888 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
16889 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
16890 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
16891 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
16892 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
16893 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
16894 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
16895 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
16896 { VAR10 (LOAD1, vld1,
16897 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16898 { VAR10 (LOAD1LANE, vld1_lane,
16899 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16900 { VAR10 (LOAD1, vld1_dup,
16901 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16902 { VAR10 (STORE1, vst1,
16903 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16904 { VAR10 (STORE1LANE, vst1_lane,
16905 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16906 { VAR9 (LOADSTRUCT,
16907 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16908 { VAR7 (LOADSTRUCTLANE, vld2_lane,
16909 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16910 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
16911 { VAR9 (STORESTRUCT, vst2,
16912 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16913 { VAR7 (STORESTRUCTLANE, vst2_lane,
16914 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16915 { VAR9 (LOADSTRUCT,
16916 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16917 { VAR7 (LOADSTRUCTLANE, vld3_lane,
16918 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16919 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
16920 { VAR9 (STORESTRUCT, vst3,
16921 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16922 { VAR7 (STORESTRUCTLANE, vst3_lane,
16923 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16924 { VAR9 (LOADSTRUCT, vld4,
16925 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16926 { VAR7 (LOADSTRUCTLANE, vld4_lane,
16927 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16928 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
16929 { VAR9 (STORESTRUCT, vst4,
16930 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
16931 { VAR7 (STORESTRUCTLANE, vst4_lane,
16932 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
16933 { VAR10 (LOGICBINOP, vand,
16934 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16935 { VAR10 (LOGICBINOP, vorr,
16936 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16937 { VAR10 (BINOP, veor,
16938 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16939 { VAR10 (LOGICBINOP, vbic,
16940 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
16941 { VAR10 (LOGICBINOP, vorn,
16942 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
16958 arm_init_neon_builtins (void)
16960 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
16962 tree neon_intQI_type_node;
16963 tree neon_intHI_type_node;
16964 tree neon_polyQI_type_node;
16965 tree neon_polyHI_type_node;
16966 tree neon_intSI_type_node;
16967 tree neon_intDI_type_node;
16968 tree neon_float_type_node;
16970 tree intQI_pointer_node;
16971 tree intHI_pointer_node;
16972 tree intSI_pointer_node;
16973 tree intDI_pointer_node;
16974 tree float_pointer_node;
16976 tree const_intQI_node;
16977 tree const_intHI_node;
16978 tree const_intSI_node;
16979 tree const_intDI_node;
16980 tree const_float_node;
16982 tree const_intQI_pointer_node;
16983 tree const_intHI_pointer_node;
16984 tree const_intSI_pointer_node;
16985 tree const_intDI_pointer_node;
16986 tree const_float_pointer_node;
16988 tree V8QI_type_node;
16989 tree V4HI_type_node;
16990 tree V2SI_type_node;
16991 tree V2SF_type_node;
16992 tree V16QI_type_node;
16993 tree V8HI_type_node;
16994 tree V4SI_type_node;
16995 tree V4SF_type_node;
16996 tree V2DI_type_node;
16998 tree intUQI_type_node;
16999 tree intUHI_type_node;
17000 tree intUSI_type_node;
17001 tree intUDI_type_node;
17003 tree intEI_type_node;
17004 tree intOI_type_node;
17005 tree intCI_type_node;
17006 tree intXI_type_node;
17008 tree V8QI_pointer_node;
17009 tree V4HI_pointer_node;
17010 tree V2SI_pointer_node;
17011 tree V2SF_pointer_node;
17012 tree V16QI_pointer_node;
17013 tree V8HI_pointer_node;
17014 tree V4SI_pointer_node;
17015 tree V4SF_pointer_node;
17016 tree V2DI_pointer_node;
17018 tree void_ftype_pv8qi_v8qi_v8qi;
17019 tree void_ftype_pv4hi_v4hi_v4hi;
17020 tree void_ftype_pv2si_v2si_v2si;
17021 tree void_ftype_pv2sf_v2sf_v2sf;
17022 tree void_ftype_pdi_di_di;
17023 tree void_ftype_pv16qi_v16qi_v16qi;
17024 tree void_ftype_pv8hi_v8hi_v8hi;
17025 tree void_ftype_pv4si_v4si_v4si;
17026 tree void_ftype_pv4sf_v4sf_v4sf;
17027 tree void_ftype_pv2di_v2di_v2di;
17029 tree reinterp_ftype_dreg[5][5];
17030 tree reinterp_ftype_qreg[5][5];
17031 tree dreg_types[5], qreg_types[5];
17033 /* Create distinguished type nodes for NEON vector element types,
17034 and pointers to values of such types, so we can detect them later. */
17035 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17036 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17037 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17038 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17039 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17040 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17041 neon_float_type_node = make_node (REAL_TYPE);
17042 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17043 layout_type (neon_float_type_node);
17045 /* Define typedefs which exactly correspond to the modes we are basing vector
17046 types on. If you change these names you'll need to change
17047 the table used by arm_mangle_type too. */
17048 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17049 "__builtin_neon_qi");
17050 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17051 "__builtin_neon_hi");
17052 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17053 "__builtin_neon_si");
17054 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17055 "__builtin_neon_sf");
17056 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17057 "__builtin_neon_di");
17058 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17059 "__builtin_neon_poly8");
17060 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17061 "__builtin_neon_poly16");
17063 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17064 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17065 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17066 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17067 float_pointer_node = build_pointer_type (neon_float_type_node);
17069 /* Next create constant-qualified versions of the above types. */
17070 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17072 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17074 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17076 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17078 const_float_node = build_qualified_type (neon_float_type_node,
17081 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17082 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17083 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17084 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17085 const_float_pointer_node = build_pointer_type (const_float_node);
17087 /* Now create vector types based on our NEON element types. */
17088 /* 64-bit vectors. */
17090 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17092 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17094 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17096 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17097 /* 128-bit vectors. */
17099 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17101 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17103 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17105 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17107 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17109 /* Unsigned integer types for various mode sizes. */
17110 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17111 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17112 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17113 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17115 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17116 "__builtin_neon_uqi");
17117 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17118 "__builtin_neon_uhi");
17119 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17120 "__builtin_neon_usi");
17121 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17122 "__builtin_neon_udi");
17124 /* Opaque integer types for structures of vectors. */
17125 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17126 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17127 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17128 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17130 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17131 "__builtin_neon_ti");
17132 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17133 "__builtin_neon_ei");
17134 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17135 "__builtin_neon_oi");
17136 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17137 "__builtin_neon_ci");
17138 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17139 "__builtin_neon_xi");
17141 /* Pointers to vector types. */
17142 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17143 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17144 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17145 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17146 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17147 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17148 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17149 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17150 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17152 /* Operations which return results as pairs. */
17153 void_ftype_pv8qi_v8qi_v8qi =
17154 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17155 V8QI_type_node, NULL);
17156 void_ftype_pv4hi_v4hi_v4hi =
17157 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17158 V4HI_type_node, NULL);
17159 void_ftype_pv2si_v2si_v2si =
17160 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17161 V2SI_type_node, NULL);
17162 void_ftype_pv2sf_v2sf_v2sf =
17163 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17164 V2SF_type_node, NULL);
17165 void_ftype_pdi_di_di =
17166 build_function_type_list (void_type_node, intDI_pointer_node,
17167 neon_intDI_type_node, neon_intDI_type_node, NULL);
17168 void_ftype_pv16qi_v16qi_v16qi =
17169 build_function_type_list (void_type_node, V16QI_pointer_node,
17170 V16QI_type_node, V16QI_type_node, NULL);
17171 void_ftype_pv8hi_v8hi_v8hi =
17172 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17173 V8HI_type_node, NULL);
17174 void_ftype_pv4si_v4si_v4si =
17175 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17176 V4SI_type_node, NULL);
17177 void_ftype_pv4sf_v4sf_v4sf =
17178 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17179 V4SF_type_node, NULL);
17180 void_ftype_pv2di_v2di_v2di =
17181 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17182 V2DI_type_node, NULL);
17184 dreg_types[0] = V8QI_type_node;
17185 dreg_types[1] = V4HI_type_node;
17186 dreg_types[2] = V2SI_type_node;
17187 dreg_types[3] = V2SF_type_node;
17188 dreg_types[4] = neon_intDI_type_node;
17190 qreg_types[0] = V16QI_type_node;
17191 qreg_types[1] = V8HI_type_node;
17192 qreg_types[2] = V4SI_type_node;
17193 qreg_types[3] = V4SF_type_node;
17194 qreg_types[4] = V2DI_type_node;
17196 for (i = 0; i < 5; i++)
17199 for (j = 0; j < 5; j++)
17201 reinterp_ftype_dreg[i][j]
17202 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17203 reinterp_ftype_qreg[i][j]
17204 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17208 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17210 neon_builtin_datum *d = &neon_builtin_data[i];
17211 unsigned int j, codeidx = 0;
17213 d->base_fcode = fcode;
17215 for (j = 0; j < T_MAX; j++)
17217 const char* const modenames[] = {
17218 "v8qi", "v4hi", "v2si", "v2sf", "di",
17219 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17223 enum insn_code icode;
17224 int is_load = 0, is_store = 0;
17226 if ((d->bits & (1 << j)) == 0)
17229 icode = d->codes[codeidx++];
17234 case NEON_LOAD1LANE:
17235 case NEON_LOADSTRUCT:
17236 case NEON_LOADSTRUCTLANE:
17238 /* Fall through. */
17240 case NEON_STORE1LANE:
17241 case NEON_STORESTRUCT:
17242 case NEON_STORESTRUCTLANE:
17245 /* Fall through. */
17248 case NEON_LOGICBINOP:
17249 case NEON_SHIFTINSERT:
17256 case NEON_SHIFTIMM:
17257 case NEON_SHIFTACC:
17263 case NEON_LANEMULL:
17264 case NEON_LANEMULH:
17266 case NEON_SCALARMUL:
17267 case NEON_SCALARMULL:
17268 case NEON_SCALARMULH:
17269 case NEON_SCALARMAC:
17275 tree return_type = void_type_node, args = void_list_node;
17277 /* Build a function type directly from the insn_data for this
17278 builtin. The build_function_type() function takes care of
17279 removing duplicates for us. */
17280 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17284 if (is_load && k == 1)
17286 /* Neon load patterns always have the memory operand
17287 (a SImode pointer) in the operand 1 position. We
17288 want a const pointer to the element type in that
17290 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17296 eltype = const_intQI_pointer_node;
17301 eltype = const_intHI_pointer_node;
17306 eltype = const_intSI_pointer_node;
17311 eltype = const_float_pointer_node;
17316 eltype = const_intDI_pointer_node;
17319 default: gcc_unreachable ();
17322 else if (is_store && k == 0)
17324 /* Similarly, Neon store patterns use operand 0 as
17325 the memory location to store to (a SImode pointer).
17326 Use a pointer to the element type of the store in
17328 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17334 eltype = intQI_pointer_node;
17339 eltype = intHI_pointer_node;
17344 eltype = intSI_pointer_node;
17349 eltype = float_pointer_node;
17354 eltype = intDI_pointer_node;
17357 default: gcc_unreachable ();
17362 switch (insn_data[icode].operand[k].mode)
17364 case VOIDmode: eltype = void_type_node; break;
17366 case QImode: eltype = neon_intQI_type_node; break;
17367 case HImode: eltype = neon_intHI_type_node; break;
17368 case SImode: eltype = neon_intSI_type_node; break;
17369 case SFmode: eltype = neon_float_type_node; break;
17370 case DImode: eltype = neon_intDI_type_node; break;
17371 case TImode: eltype = intTI_type_node; break;
17372 case EImode: eltype = intEI_type_node; break;
17373 case OImode: eltype = intOI_type_node; break;
17374 case CImode: eltype = intCI_type_node; break;
17375 case XImode: eltype = intXI_type_node; break;
17376 /* 64-bit vectors. */
17377 case V8QImode: eltype = V8QI_type_node; break;
17378 case V4HImode: eltype = V4HI_type_node; break;
17379 case V2SImode: eltype = V2SI_type_node; break;
17380 case V2SFmode: eltype = V2SF_type_node; break;
17381 /* 128-bit vectors. */
17382 case V16QImode: eltype = V16QI_type_node; break;
17383 case V8HImode: eltype = V8HI_type_node; break;
17384 case V4SImode: eltype = V4SI_type_node; break;
17385 case V4SFmode: eltype = V4SF_type_node; break;
17386 case V2DImode: eltype = V2DI_type_node; break;
17387 default: gcc_unreachable ();
17391 if (k == 0 && !is_store)
17392 return_type = eltype;
17394 args = tree_cons (NULL_TREE, eltype, args);
17397 ftype = build_function_type (return_type, args);
17401 case NEON_RESULTPAIR:
17403 switch (insn_data[icode].operand[1].mode)
17405 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17406 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17407 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17408 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17409 case DImode: ftype = void_ftype_pdi_di_di; break;
17410 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17411 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17412 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17413 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17414 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17415 default: gcc_unreachable ();
17420 case NEON_REINTERP:
17422 /* We iterate over 5 doubleword types, then 5 quadword
17425 switch (insn_data[icode].operand[0].mode)
17427 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17428 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17429 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17430 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17431 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17432 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17433 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17434 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17435 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17436 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17437 default: gcc_unreachable ();
17443 gcc_unreachable ();
17446 gcc_assert (ftype != NULL);
17448 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17450 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17457 arm_init_fp16_builtins (void)
17459 tree fp16_type = make_node (REAL_TYPE);
17460 TYPE_PRECISION (fp16_type) = 16;
17461 layout_type (fp16_type);
17462 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17466 arm_init_builtins (void)
17468 arm_init_tls_builtins ();
17470 if (TARGET_REALLY_IWMMXT)
17471 arm_init_iwmmxt_builtins ();
17474 arm_init_neon_builtins ();
17476 if (arm_fp16_format)
17477 arm_init_fp16_builtins ();
17480 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17482 static const char *
17483 arm_invalid_parameter_type (const_tree t)
17485 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17486 return N_("function parameters cannot have __fp16 type");
17490 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17492 static const char *
17493 arm_invalid_return_type (const_tree t)
17495 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17496 return N_("functions cannot return __fp16 type");
17500 /* Implement TARGET_PROMOTED_TYPE. */
17503 arm_promoted_type (const_tree t)
17505 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17506 return float_type_node;
17510 /* Implement TARGET_CONVERT_TO_TYPE.
17511 Specifically, this hook implements the peculiarity of the ARM
17512 half-precision floating-point C semantics that requires conversions between
17513 __fp16 to or from double to do an intermediate conversion to float. */
17516 arm_convert_to_type (tree type, tree expr)
17518 tree fromtype = TREE_TYPE (expr);
17519 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17521 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17522 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17523 return convert (type, convert (float_type_node, expr));
17527 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17528 This simply adds HFmode as a supported mode; even though we don't
17529 implement arithmetic on this type directly, it's supported by
17530 optabs conversions, much the way the double-word arithmetic is
17531 special-cased in the default hook. */
17534 arm_scalar_mode_supported_p (enum machine_mode mode)
17536 if (mode == HFmode)
17537 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17539 return default_scalar_mode_supported_p (mode);
17542 /* Errors in the source file can cause expand_expr to return const0_rtx
17543 where we expect a vector. To avoid crashing, use one of the vector
17544 clear instructions. */
17547 safe_vector_operand (rtx x, enum machine_mode mode)
17549 if (x != const0_rtx)
17551 x = gen_reg_rtx (mode);
17553 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17554 : gen_rtx_SUBREG (DImode, x, 0)));
17558 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17561 arm_expand_binop_builtin (enum insn_code icode,
17562 tree exp, rtx target)
17565 tree arg0 = CALL_EXPR_ARG (exp, 0);
17566 tree arg1 = CALL_EXPR_ARG (exp, 1);
17567 rtx op0 = expand_normal (arg0);
17568 rtx op1 = expand_normal (arg1);
17569 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17570 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17571 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17573 if (VECTOR_MODE_P (mode0))
17574 op0 = safe_vector_operand (op0, mode0);
17575 if (VECTOR_MODE_P (mode1))
17576 op1 = safe_vector_operand (op1, mode1);
17579 || GET_MODE (target) != tmode
17580 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17581 target = gen_reg_rtx (tmode);
17583 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17585 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17586 op0 = copy_to_mode_reg (mode0, op0);
17587 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17588 op1 = copy_to_mode_reg (mode1, op1);
17590 pat = GEN_FCN (icode) (target, op0, op1);
17597 /* Subroutine of arm_expand_builtin to take care of unop insns. */
17600 arm_expand_unop_builtin (enum insn_code icode,
17601 tree exp, rtx target, int do_load)
17604 tree arg0 = CALL_EXPR_ARG (exp, 0);
17605 rtx op0 = expand_normal (arg0);
17606 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17607 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17610 || GET_MODE (target) != tmode
17611 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17612 target = gen_reg_rtx (tmode);
17614 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17617 if (VECTOR_MODE_P (mode0))
17618 op0 = safe_vector_operand (op0, mode0);
17620 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17621 op0 = copy_to_mode_reg (mode0, op0);
17624 pat = GEN_FCN (icode) (target, op0);
17632 neon_builtin_compare (const void *a, const void *b)
17634 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17635 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17636 unsigned int soughtcode = key->base_fcode;
17638 if (soughtcode >= memb->base_fcode
17639 && soughtcode < memb->base_fcode + memb->num_vars)
17641 else if (soughtcode < memb->base_fcode)
17647 static enum insn_code
17648 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17650 neon_builtin_datum key, *found;
17653 key.base_fcode = fcode;
17654 found = (neon_builtin_datum *)
17655 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17656 sizeof (neon_builtin_data[0]), neon_builtin_compare);
17657 gcc_assert (found);
17658 idx = fcode - (int) found->base_fcode;
17659 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17662 *itype = found->itype;
17664 return found->codes[idx];
17668 NEON_ARG_COPY_TO_REG,
17673 #define NEON_MAX_BUILTIN_ARGS 5
17675 /* Expand a Neon builtin. */
17677 arm_expand_neon_args (rtx target, int icode, int have_retval,
17682 tree arg[NEON_MAX_BUILTIN_ARGS];
17683 rtx op[NEON_MAX_BUILTIN_ARGS];
17684 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17685 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
17690 || GET_MODE (target) != tmode
17691 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
17692 target = gen_reg_rtx (tmode);
17694 va_start (ap, exp);
17698 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
17700 if (thisarg == NEON_ARG_STOP)
17704 arg[argc] = CALL_EXPR_ARG (exp, argc);
17705 op[argc] = expand_normal (arg[argc]);
17706 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
17710 case NEON_ARG_COPY_TO_REG:
17711 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
17712 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17713 (op[argc], mode[argc]))
17714 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
17717 case NEON_ARG_CONSTANT:
17718 /* FIXME: This error message is somewhat unhelpful. */
17719 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
17720 (op[argc], mode[argc]))
17721 error ("argument must be a constant");
17724 case NEON_ARG_STOP:
17725 gcc_unreachable ();
17738 pat = GEN_FCN (icode) (target, op[0]);
17742 pat = GEN_FCN (icode) (target, op[0], op[1]);
17746 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
17750 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
17754 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
17758 gcc_unreachable ();
17764 pat = GEN_FCN (icode) (op[0]);
17768 pat = GEN_FCN (icode) (op[0], op[1]);
17772 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
17776 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
17780 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
17784 gcc_unreachable ();
17795 /* Expand a Neon builtin. These are "special" because they don't have symbolic
17796 constants defined per-instruction or per instruction-variant. Instead, the
17797 required info is looked up in the table neon_builtin_data. */
17799 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
17802 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
17809 return arm_expand_neon_args (target, icode, 1, exp,
17810 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17814 case NEON_SCALARMUL:
17815 case NEON_SCALARMULL:
17816 case NEON_SCALARMULH:
17817 case NEON_SHIFTINSERT:
17818 case NEON_LOGICBINOP:
17819 return arm_expand_neon_args (target, icode, 1, exp,
17820 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17824 return arm_expand_neon_args (target, icode, 1, exp,
17825 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17826 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17830 case NEON_SHIFTIMM:
17831 return arm_expand_neon_args (target, icode, 1, exp,
17832 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
17836 return arm_expand_neon_args (target, icode, 1, exp,
17837 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17841 case NEON_REINTERP:
17842 return arm_expand_neon_args (target, icode, 1, exp,
17843 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17847 return arm_expand_neon_args (target, icode, 1, exp,
17848 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17850 case NEON_RESULTPAIR:
17851 return arm_expand_neon_args (target, icode, 0, exp,
17852 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17856 case NEON_LANEMULL:
17857 case NEON_LANEMULH:
17858 return arm_expand_neon_args (target, icode, 1, exp,
17859 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17860 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17863 return arm_expand_neon_args (target, icode, 1, exp,
17864 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17865 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
17867 case NEON_SHIFTACC:
17868 return arm_expand_neon_args (target, icode, 1, exp,
17869 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17870 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17872 case NEON_SCALARMAC:
17873 return arm_expand_neon_args (target, icode, 1, exp,
17874 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17875 NEON_ARG_CONSTANT, NEON_ARG_STOP);
17879 return arm_expand_neon_args (target, icode, 1, exp,
17880 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
17884 case NEON_LOADSTRUCT:
17885 return arm_expand_neon_args (target, icode, 1, exp,
17886 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17888 case NEON_LOAD1LANE:
17889 case NEON_LOADSTRUCTLANE:
17890 return arm_expand_neon_args (target, icode, 1, exp,
17891 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17895 case NEON_STORESTRUCT:
17896 return arm_expand_neon_args (target, icode, 0, exp,
17897 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
17899 case NEON_STORE1LANE:
17900 case NEON_STORESTRUCTLANE:
17901 return arm_expand_neon_args (target, icode, 0, exp,
17902 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
17906 gcc_unreachable ();
17909 /* Emit code to reinterpret one Neon type as another, without altering bits. */
17911 neon_reinterpret (rtx dest, rtx src)
17913 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
17916 /* Emit code to place a Neon pair result in memory locations (with equal
17919 neon_emit_pair_result_insn (enum machine_mode mode,
17920 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
17923 rtx mem = gen_rtx_MEM (mode, destaddr);
17924 rtx tmp1 = gen_reg_rtx (mode);
17925 rtx tmp2 = gen_reg_rtx (mode);
17927 emit_insn (intfn (tmp1, op1, tmp2, op2));
17929 emit_move_insn (mem, tmp1);
17930 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
17931 emit_move_insn (mem, tmp2);
17934 /* Set up operands for a register copy from src to dest, taking care not to
17935 clobber registers in the process.
17936 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
17937 be called with a large N, so that should be OK. */
17940 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
17942 unsigned int copied = 0, opctr = 0;
17943 unsigned int done = (1 << count) - 1;
17946 while (copied != done)
17948 for (i = 0; i < count; i++)
17952 for (j = 0; good && j < count; j++)
17953 if (i != j && (copied & (1 << j)) == 0
17954 && reg_overlap_mentioned_p (src[j], dest[i]))
17959 operands[opctr++] = dest[i];
17960 operands[opctr++] = src[i];
17966 gcc_assert (opctr == count * 2);
17969 /* Expand an expression EXP that calls a built-in function,
17970 with result going to TARGET if that's convenient
17971 (and in mode MODE if that's convenient).
17972 SUBTARGET may be used as the target for computing one of EXP's operands.
17973 IGNORE is nonzero if the value is to be ignored. */
17976 arm_expand_builtin (tree exp,
17978 rtx subtarget ATTRIBUTE_UNUSED,
17979 enum machine_mode mode ATTRIBUTE_UNUSED,
17980 int ignore ATTRIBUTE_UNUSED)
17982 const struct builtin_description * d;
17983 enum insn_code icode;
17984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17992 int fcode = DECL_FUNCTION_CODE (fndecl);
17994 enum machine_mode tmode;
17995 enum machine_mode mode0;
17996 enum machine_mode mode1;
17997 enum machine_mode mode2;
17999 if (fcode >= ARM_BUILTIN_NEON_BASE)
18000 return arm_expand_neon_builtin (fcode, exp, target);
18004 case ARM_BUILTIN_TEXTRMSB:
18005 case ARM_BUILTIN_TEXTRMUB:
18006 case ARM_BUILTIN_TEXTRMSH:
18007 case ARM_BUILTIN_TEXTRMUH:
18008 case ARM_BUILTIN_TEXTRMSW:
18009 case ARM_BUILTIN_TEXTRMUW:
18010 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18011 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18012 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18013 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18014 : CODE_FOR_iwmmxt_textrmw);
18016 arg0 = CALL_EXPR_ARG (exp, 0);
18017 arg1 = CALL_EXPR_ARG (exp, 1);
18018 op0 = expand_normal (arg0);
18019 op1 = expand_normal (arg1);
18020 tmode = insn_data[icode].operand[0].mode;
18021 mode0 = insn_data[icode].operand[1].mode;
18022 mode1 = insn_data[icode].operand[2].mode;
18024 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18025 op0 = copy_to_mode_reg (mode0, op0);
18026 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18028 /* @@@ better error message */
18029 error ("selector must be an immediate");
18030 return gen_reg_rtx (tmode);
18033 || GET_MODE (target) != tmode
18034 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18035 target = gen_reg_rtx (tmode);
18036 pat = GEN_FCN (icode) (target, op0, op1);
18042 case ARM_BUILTIN_TINSRB:
18043 case ARM_BUILTIN_TINSRH:
18044 case ARM_BUILTIN_TINSRW:
18045 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18046 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18047 : CODE_FOR_iwmmxt_tinsrw);
18048 arg0 = CALL_EXPR_ARG (exp, 0);
18049 arg1 = CALL_EXPR_ARG (exp, 1);
18050 arg2 = CALL_EXPR_ARG (exp, 2);
18051 op0 = expand_normal (arg0);
18052 op1 = expand_normal (arg1);
18053 op2 = expand_normal (arg2);
18054 tmode = insn_data[icode].operand[0].mode;
18055 mode0 = insn_data[icode].operand[1].mode;
18056 mode1 = insn_data[icode].operand[2].mode;
18057 mode2 = insn_data[icode].operand[3].mode;
18059 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18060 op0 = copy_to_mode_reg (mode0, op0);
18061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18062 op1 = copy_to_mode_reg (mode1, op1);
18063 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18065 /* @@@ better error message */
18066 error ("selector must be an immediate");
18070 || GET_MODE (target) != tmode
18071 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18072 target = gen_reg_rtx (tmode);
18073 pat = GEN_FCN (icode) (target, op0, op1, op2);
18079 case ARM_BUILTIN_SETWCX:
18080 arg0 = CALL_EXPR_ARG (exp, 0);
18081 arg1 = CALL_EXPR_ARG (exp, 1);
18082 op0 = force_reg (SImode, expand_normal (arg0));
18083 op1 = expand_normal (arg1);
18084 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18087 case ARM_BUILTIN_GETWCX:
18088 arg0 = CALL_EXPR_ARG (exp, 0);
18089 op0 = expand_normal (arg0);
18090 target = gen_reg_rtx (SImode);
18091 emit_insn (gen_iwmmxt_tmrc (target, op0));
18094 case ARM_BUILTIN_WSHUFH:
18095 icode = CODE_FOR_iwmmxt_wshufh;
18096 arg0 = CALL_EXPR_ARG (exp, 0);
18097 arg1 = CALL_EXPR_ARG (exp, 1);
18098 op0 = expand_normal (arg0);
18099 op1 = expand_normal (arg1);
18100 tmode = insn_data[icode].operand[0].mode;
18101 mode1 = insn_data[icode].operand[1].mode;
18102 mode2 = insn_data[icode].operand[2].mode;
18104 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18105 op0 = copy_to_mode_reg (mode1, op0);
18106 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18108 /* @@@ better error message */
18109 error ("mask must be an immediate");
18113 || GET_MODE (target) != tmode
18114 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18115 target = gen_reg_rtx (tmode);
18116 pat = GEN_FCN (icode) (target, op0, op1);
18122 case ARM_BUILTIN_WSADB:
18123 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18124 case ARM_BUILTIN_WSADH:
18125 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18126 case ARM_BUILTIN_WSADBZ:
18127 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18128 case ARM_BUILTIN_WSADHZ:
18129 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18131 /* Several three-argument builtins. */
18132 case ARM_BUILTIN_WMACS:
18133 case ARM_BUILTIN_WMACU:
18134 case ARM_BUILTIN_WALIGN:
18135 case ARM_BUILTIN_TMIA:
18136 case ARM_BUILTIN_TMIAPH:
18137 case ARM_BUILTIN_TMIATT:
18138 case ARM_BUILTIN_TMIATB:
18139 case ARM_BUILTIN_TMIABT:
18140 case ARM_BUILTIN_TMIABB:
18141 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18142 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18143 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18144 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18145 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18146 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18147 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18148 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18149 : CODE_FOR_iwmmxt_walign);
18150 arg0 = CALL_EXPR_ARG (exp, 0);
18151 arg1 = CALL_EXPR_ARG (exp, 1);
18152 arg2 = CALL_EXPR_ARG (exp, 2);
18153 op0 = expand_normal (arg0);
18154 op1 = expand_normal (arg1);
18155 op2 = expand_normal (arg2);
18156 tmode = insn_data[icode].operand[0].mode;
18157 mode0 = insn_data[icode].operand[1].mode;
18158 mode1 = insn_data[icode].operand[2].mode;
18159 mode2 = insn_data[icode].operand[3].mode;
18161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18162 op0 = copy_to_mode_reg (mode0, op0);
18163 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18164 op1 = copy_to_mode_reg (mode1, op1);
18165 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18166 op2 = copy_to_mode_reg (mode2, op2);
18168 || GET_MODE (target) != tmode
18169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18170 target = gen_reg_rtx (tmode);
18171 pat = GEN_FCN (icode) (target, op0, op1, op2);
18177 case ARM_BUILTIN_WZERO:
18178 target = gen_reg_rtx (DImode);
18179 emit_insn (gen_iwmmxt_clrdi (target));
18182 case ARM_BUILTIN_THREAD_POINTER:
18183 return arm_load_tp (target);
18189 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18190 if (d->code == (const enum arm_builtins) fcode)
18191 return arm_expand_binop_builtin (d->icode, exp, target);
18193 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18194 if (d->code == (const enum arm_builtins) fcode)
18195 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18197 /* @@@ Should really do something sensible here. */
18201 /* Return the number (counting from 0) of
18202 the least significant set bit in MASK. */
18205 number_of_first_bit_set (unsigned mask)
18210 (mask & (1 << bit)) == 0;
18217 /* Emit code to push or pop registers to or from the stack. F is the
18218 assembly file. MASK is the registers to push or pop. PUSH is
18219 nonzero if we should push, and zero if we should pop. For debugging
18220 output, if pushing, adjust CFA_OFFSET by the amount of space added
18221 to the stack. REAL_REGS should have the same number of bits set as
18222 MASK, and will be used instead (in the same order) to describe which
18223 registers were saved - this is used to mark the save slots when we
18224 push high registers after moving them to low registers. */
18226 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18227 unsigned long real_regs)
18230 int lo_mask = mask & 0xFF;
18231 int pushed_words = 0;
18235 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18237 /* Special case. Do not generate a POP PC statement here, do it in
18239 thumb_exit (f, -1);
18243 if (ARM_EABI_UNWIND_TABLES && push)
18245 fprintf (f, "\t.save\t{");
18246 for (regno = 0; regno < 15; regno++)
18248 if (real_regs & (1 << regno))
18250 if (real_regs & ((1 << regno) -1))
18252 asm_fprintf (f, "%r", regno);
18255 fprintf (f, "}\n");
18258 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18260 /* Look at the low registers first. */
18261 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18265 asm_fprintf (f, "%r", regno);
18267 if ((lo_mask & ~1) != 0)
18274 if (push && (mask & (1 << LR_REGNUM)))
18276 /* Catch pushing the LR. */
18280 asm_fprintf (f, "%r", LR_REGNUM);
18284 else if (!push && (mask & (1 << PC_REGNUM)))
18286 /* Catch popping the PC. */
18287 if (TARGET_INTERWORK || TARGET_BACKTRACE
18288 || crtl->calls_eh_return)
18290 /* The PC is never poped directly, instead
18291 it is popped into r3 and then BX is used. */
18292 fprintf (f, "}\n");
18294 thumb_exit (f, -1);
18303 asm_fprintf (f, "%r", PC_REGNUM);
18307 fprintf (f, "}\n");
18309 if (push && pushed_words && dwarf2out_do_frame ())
18311 char *l = dwarf2out_cfi_label (false);
18312 int pushed_mask = real_regs;
18314 *cfa_offset += pushed_words * 4;
18315 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18318 pushed_mask = real_regs;
18319 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18321 if (pushed_mask & 1)
18322 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18327 /* Generate code to return from a thumb function.
18328 If 'reg_containing_return_addr' is -1, then the return address is
18329 actually on the stack, at the stack pointer. */
18331 thumb_exit (FILE *f, int reg_containing_return_addr)
18333 unsigned regs_available_for_popping;
18334 unsigned regs_to_pop;
18336 unsigned available;
18340 int restore_a4 = FALSE;
18342 /* Compute the registers we need to pop. */
18346 if (reg_containing_return_addr == -1)
18348 regs_to_pop |= 1 << LR_REGNUM;
18352 if (TARGET_BACKTRACE)
18354 /* Restore the (ARM) frame pointer and stack pointer. */
18355 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18359 /* If there is nothing to pop then just emit the BX instruction and
18361 if (pops_needed == 0)
18363 if (crtl->calls_eh_return)
18364 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18366 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18369 /* Otherwise if we are not supporting interworking and we have not created
18370 a backtrace structure and the function was not entered in ARM mode then
18371 just pop the return address straight into the PC. */
18372 else if (!TARGET_INTERWORK
18373 && !TARGET_BACKTRACE
18374 && !is_called_in_ARM_mode (current_function_decl)
18375 && !crtl->calls_eh_return)
18377 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18381 /* Find out how many of the (return) argument registers we can corrupt. */
18382 regs_available_for_popping = 0;
18384 /* If returning via __builtin_eh_return, the bottom three registers
18385 all contain information needed for the return. */
18386 if (crtl->calls_eh_return)
18390 /* If we can deduce the registers used from the function's
18391 return value. This is more reliable that examining
18392 df_regs_ever_live_p () because that will be set if the register is
18393 ever used in the function, not just if the register is used
18394 to hold a return value. */
18396 if (crtl->return_rtx != 0)
18397 mode = GET_MODE (crtl->return_rtx);
18399 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18401 size = GET_MODE_SIZE (mode);
18405 /* In a void function we can use any argument register.
18406 In a function that returns a structure on the stack
18407 we can use the second and third argument registers. */
18408 if (mode == VOIDmode)
18409 regs_available_for_popping =
18410 (1 << ARG_REGISTER (1))
18411 | (1 << ARG_REGISTER (2))
18412 | (1 << ARG_REGISTER (3));
18414 regs_available_for_popping =
18415 (1 << ARG_REGISTER (2))
18416 | (1 << ARG_REGISTER (3));
18418 else if (size <= 4)
18419 regs_available_for_popping =
18420 (1 << ARG_REGISTER (2))
18421 | (1 << ARG_REGISTER (3));
18422 else if (size <= 8)
18423 regs_available_for_popping =
18424 (1 << ARG_REGISTER (3));
18427 /* Match registers to be popped with registers into which we pop them. */
18428 for (available = regs_available_for_popping,
18429 required = regs_to_pop;
18430 required != 0 && available != 0;
18431 available &= ~(available & - available),
18432 required &= ~(required & - required))
18435 /* If we have any popping registers left over, remove them. */
18437 regs_available_for_popping &= ~available;
18439 /* Otherwise if we need another popping register we can use
18440 the fourth argument register. */
18441 else if (pops_needed)
18443 /* If we have not found any free argument registers and
18444 reg a4 contains the return address, we must move it. */
18445 if (regs_available_for_popping == 0
18446 && reg_containing_return_addr == LAST_ARG_REGNUM)
18448 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18449 reg_containing_return_addr = LR_REGNUM;
18451 else if (size > 12)
18453 /* Register a4 is being used to hold part of the return value,
18454 but we have dire need of a free, low register. */
18457 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18460 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18462 /* The fourth argument register is available. */
18463 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18469 /* Pop as many registers as we can. */
18470 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18471 regs_available_for_popping);
18473 /* Process the registers we popped. */
18474 if (reg_containing_return_addr == -1)
18476 /* The return address was popped into the lowest numbered register. */
18477 regs_to_pop &= ~(1 << LR_REGNUM);
18479 reg_containing_return_addr =
18480 number_of_first_bit_set (regs_available_for_popping);
18482 /* Remove this register for the mask of available registers, so that
18483 the return address will not be corrupted by further pops. */
18484 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18487 /* If we popped other registers then handle them here. */
18488 if (regs_available_for_popping)
18492 /* Work out which register currently contains the frame pointer. */
18493 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18495 /* Move it into the correct place. */
18496 asm_fprintf (f, "\tmov\t%r, %r\n",
18497 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18499 /* (Temporarily) remove it from the mask of popped registers. */
18500 regs_available_for_popping &= ~(1 << frame_pointer);
18501 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18503 if (regs_available_for_popping)
18507 /* We popped the stack pointer as well,
18508 find the register that contains it. */
18509 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18511 /* Move it into the stack register. */
18512 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18514 /* At this point we have popped all necessary registers, so
18515 do not worry about restoring regs_available_for_popping
18516 to its correct value:
18518 assert (pops_needed == 0)
18519 assert (regs_available_for_popping == (1 << frame_pointer))
18520 assert (regs_to_pop == (1 << STACK_POINTER)) */
18524 /* Since we have just move the popped value into the frame
18525 pointer, the popping register is available for reuse, and
18526 we know that we still have the stack pointer left to pop. */
18527 regs_available_for_popping |= (1 << frame_pointer);
18531 /* If we still have registers left on the stack, but we no longer have
18532 any registers into which we can pop them, then we must move the return
18533 address into the link register and make available the register that
18535 if (regs_available_for_popping == 0 && pops_needed > 0)
18537 regs_available_for_popping |= 1 << reg_containing_return_addr;
18539 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18540 reg_containing_return_addr);
18542 reg_containing_return_addr = LR_REGNUM;
18545 /* If we have registers left on the stack then pop some more.
18546 We know that at most we will want to pop FP and SP. */
18547 if (pops_needed > 0)
18552 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18553 regs_available_for_popping);
18555 /* We have popped either FP or SP.
18556 Move whichever one it is into the correct register. */
18557 popped_into = number_of_first_bit_set (regs_available_for_popping);
18558 move_to = number_of_first_bit_set (regs_to_pop);
18560 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18562 regs_to_pop &= ~(1 << move_to);
18567 /* If we still have not popped everything then we must have only
18568 had one register available to us and we are now popping the SP. */
18569 if (pops_needed > 0)
18573 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18574 regs_available_for_popping);
18576 popped_into = number_of_first_bit_set (regs_available_for_popping);
18578 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18580 assert (regs_to_pop == (1 << STACK_POINTER))
18581 assert (pops_needed == 1)
18585 /* If necessary restore the a4 register. */
18588 if (reg_containing_return_addr != LR_REGNUM)
18590 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18591 reg_containing_return_addr = LR_REGNUM;
18594 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18597 if (crtl->calls_eh_return)
18598 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18600 /* Return to caller. */
18601 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18606 thumb1_final_prescan_insn (rtx insn)
18608 if (flag_print_asm_name)
18609 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18610 INSN_ADDRESSES (INSN_UID (insn)));
18614 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18616 unsigned HOST_WIDE_INT mask = 0xff;
18619 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18620 if (val == 0) /* XXX */
18623 for (i = 0; i < 25; i++)
18624 if ((val & (mask << i)) == val)
18630 /* Returns nonzero if the current function contains,
18631 or might contain a far jump. */
18633 thumb_far_jump_used_p (void)
18637 /* This test is only important for leaf functions. */
18638 /* assert (!leaf_function_p ()); */
18640 /* If we have already decided that far jumps may be used,
18641 do not bother checking again, and always return true even if
18642 it turns out that they are not being used. Once we have made
18643 the decision that far jumps are present (and that hence the link
18644 register will be pushed onto the stack) we cannot go back on it. */
18645 if (cfun->machine->far_jump_used)
18648 /* If this function is not being called from the prologue/epilogue
18649 generation code then it must be being called from the
18650 INITIAL_ELIMINATION_OFFSET macro. */
18651 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18653 /* In this case we know that we are being asked about the elimination
18654 of the arg pointer register. If that register is not being used,
18655 then there are no arguments on the stack, and we do not have to
18656 worry that a far jump might force the prologue to push the link
18657 register, changing the stack offsets. In this case we can just
18658 return false, since the presence of far jumps in the function will
18659 not affect stack offsets.
18661 If the arg pointer is live (or if it was live, but has now been
18662 eliminated and so set to dead) then we do have to test to see if
18663 the function might contain a far jump. This test can lead to some
18664 false negatives, since before reload is completed, then length of
18665 branch instructions is not known, so gcc defaults to returning their
18666 longest length, which in turn sets the far jump attribute to true.
18668 A false negative will not result in bad code being generated, but it
18669 will result in a needless push and pop of the link register. We
18670 hope that this does not occur too often.
18672 If we need doubleword stack alignment this could affect the other
18673 elimination offsets so we can't risk getting it wrong. */
18674 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
18675 cfun->machine->arg_pointer_live = 1;
18676 else if (!cfun->machine->arg_pointer_live)
18680 /* Check to see if the function contains a branch
18681 insn with the far jump attribute set. */
18682 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18684 if (GET_CODE (insn) == JUMP_INSN
18685 /* Ignore tablejump patterns. */
18686 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18687 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
18688 && get_attr_far_jump (insn) == FAR_JUMP_YES
18691 /* Record the fact that we have decided that
18692 the function does use far jumps. */
18693 cfun->machine->far_jump_used = 1;
18701 /* Return nonzero if FUNC must be entered in ARM mode. */
18703 is_called_in_ARM_mode (tree func)
18705 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
18707 /* Ignore the problem about functions whose address is taken. */
18708 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
18712 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
18718 /* The bits which aren't usefully expanded as rtl. */
18720 thumb_unexpanded_epilogue (void)
18722 arm_stack_offsets *offsets;
18724 unsigned long live_regs_mask = 0;
18725 int high_regs_pushed = 0;
18726 int had_to_push_lr;
18729 if (cfun->machine->return_used_this_function != 0)
18732 if (IS_NAKED (arm_current_func_type ()))
18735 offsets = arm_get_frame_offsets ();
18736 live_regs_mask = offsets->saved_regs_mask;
18737 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18739 /* If we can deduce the registers used from the function's return value.
18740 This is more reliable that examining df_regs_ever_live_p () because that
18741 will be set if the register is ever used in the function, not just if
18742 the register is used to hold a return value. */
18743 size = arm_size_return_regs ();
18745 /* The prolog may have pushed some high registers to use as
18746 work registers. e.g. the testsuite file:
18747 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
18748 compiles to produce:
18749 push {r4, r5, r6, r7, lr}
18753 as part of the prolog. We have to undo that pushing here. */
18755 if (high_regs_pushed)
18757 unsigned long mask = live_regs_mask & 0xff;
18760 /* The available low registers depend on the size of the value we are
18768 /* Oh dear! We have no low registers into which we can pop
18771 ("no low registers available for popping high registers");
18773 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
18774 if (live_regs_mask & (1 << next_hi_reg))
18777 while (high_regs_pushed)
18779 /* Find lo register(s) into which the high register(s) can
18781 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18783 if (mask & (1 << regno))
18784 high_regs_pushed--;
18785 if (high_regs_pushed == 0)
18789 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
18791 /* Pop the values into the low register(s). */
18792 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
18794 /* Move the value(s) into the high registers. */
18795 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
18797 if (mask & (1 << regno))
18799 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
18802 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
18803 if (live_regs_mask & (1 << next_hi_reg))
18808 live_regs_mask &= ~0x0f00;
18811 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
18812 live_regs_mask &= 0xff;
18814 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
18816 /* Pop the return address into the PC. */
18817 if (had_to_push_lr)
18818 live_regs_mask |= 1 << PC_REGNUM;
18820 /* Either no argument registers were pushed or a backtrace
18821 structure was created which includes an adjusted stack
18822 pointer, so just pop everything. */
18823 if (live_regs_mask)
18824 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18827 /* We have either just popped the return address into the
18828 PC or it is was kept in LR for the entire function. */
18829 if (!had_to_push_lr)
18830 thumb_exit (asm_out_file, LR_REGNUM);
18834 /* Pop everything but the return address. */
18835 if (live_regs_mask)
18836 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
18839 if (had_to_push_lr)
18843 /* We have no free low regs, so save one. */
18844 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
18848 /* Get the return address into a temporary register. */
18849 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
18850 1 << LAST_ARG_REGNUM);
18854 /* Move the return address to lr. */
18855 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
18857 /* Restore the low register. */
18858 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
18863 regno = LAST_ARG_REGNUM;
18868 /* Remove the argument registers that were pushed onto the stack. */
18869 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
18870 SP_REGNUM, SP_REGNUM,
18871 crtl->args.pretend_args_size);
18873 thumb_exit (asm_out_file, regno);
18879 /* Functions to save and restore machine-specific function data. */
18880 static struct machine_function *
18881 arm_init_machine_status (void)
18883 struct machine_function *machine;
18884 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
18886 #if ARM_FT_UNKNOWN != 0
18887 machine->func_type = ARM_FT_UNKNOWN;
18892 /* Return an RTX indicating where the return address to the
18893 calling function can be found. */
18895 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
18900 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
18903 /* Do anything needed before RTL is emitted for each function. */
18905 arm_init_expanders (void)
18907 /* Arrange to initialize and mark the machine per-function status. */
18908 init_machine_status = arm_init_machine_status;
18910 /* This is to stop the combine pass optimizing away the alignment
18911 adjustment of va_arg. */
18912 /* ??? It is claimed that this should not be necessary. */
18914 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
18918 /* Like arm_compute_initial_elimination offset. Simpler because there
18919 isn't an ABI specified frame pointer for Thumb. Instead, we set it
18920 to point at the base of the local variables after static stack
18921 space for a function has been allocated. */
18924 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
18926 arm_stack_offsets *offsets;
18928 offsets = arm_get_frame_offsets ();
18932 case ARG_POINTER_REGNUM:
18935 case STACK_POINTER_REGNUM:
18936 return offsets->outgoing_args - offsets->saved_args;
18938 case FRAME_POINTER_REGNUM:
18939 return offsets->soft_frame - offsets->saved_args;
18941 case ARM_HARD_FRAME_POINTER_REGNUM:
18942 return offsets->saved_regs - offsets->saved_args;
18944 case THUMB_HARD_FRAME_POINTER_REGNUM:
18945 return offsets->locals_base - offsets->saved_args;
18948 gcc_unreachable ();
18952 case FRAME_POINTER_REGNUM:
18955 case STACK_POINTER_REGNUM:
18956 return offsets->outgoing_args - offsets->soft_frame;
18958 case ARM_HARD_FRAME_POINTER_REGNUM:
18959 return offsets->saved_regs - offsets->soft_frame;
18961 case THUMB_HARD_FRAME_POINTER_REGNUM:
18962 return offsets->locals_base - offsets->soft_frame;
18965 gcc_unreachable ();
18970 gcc_unreachable ();
18974 /* Generate the rest of a function's prologue. */
18976 thumb1_expand_prologue (void)
18980 HOST_WIDE_INT amount;
18981 arm_stack_offsets *offsets;
18982 unsigned long func_type;
18984 unsigned long live_regs_mask;
18986 func_type = arm_current_func_type ();
18988 /* Naked functions don't have prologues. */
18989 if (IS_NAKED (func_type))
18992 if (IS_INTERRUPT (func_type))
18994 error ("interrupt Service Routines cannot be coded in Thumb mode");
18998 offsets = arm_get_frame_offsets ();
18999 live_regs_mask = offsets->saved_regs_mask;
19000 /* Load the pic register before setting the frame pointer,
19001 so we can use r7 as a temporary work register. */
19002 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19003 arm_load_pic_register (live_regs_mask);
19005 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19006 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19007 stack_pointer_rtx);
19009 amount = offsets->outgoing_args - offsets->saved_regs;
19014 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19015 GEN_INT (- amount)));
19016 RTX_FRAME_RELATED_P (insn) = 1;
19022 /* The stack decrement is too big for an immediate value in a single
19023 insn. In theory we could issue multiple subtracts, but after
19024 three of them it becomes more space efficient to place the full
19025 value in the constant pool and load into a register. (Also the
19026 ARM debugger really likes to see only one stack decrement per
19027 function). So instead we look for a scratch register into which
19028 we can load the decrement, and then we subtract this from the
19029 stack pointer. Unfortunately on the thumb the only available
19030 scratch registers are the argument registers, and we cannot use
19031 these as they may hold arguments to the function. Instead we
19032 attempt to locate a call preserved register which is used by this
19033 function. If we can find one, then we know that it will have
19034 been pushed at the start of the prologue and so we can corrupt
19036 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19037 if (live_regs_mask & (1 << regno))
19040 gcc_assert(regno <= LAST_LO_REGNUM);
19042 reg = gen_rtx_REG (SImode, regno);
19044 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19046 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19047 stack_pointer_rtx, reg));
19048 RTX_FRAME_RELATED_P (insn) = 1;
19049 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19050 plus_constant (stack_pointer_rtx,
19052 RTX_FRAME_RELATED_P (dwarf) = 1;
19053 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19057 if (frame_pointer_needed)
19058 thumb_set_frame_pointer (offsets);
19060 /* If we are profiling, make sure no instructions are scheduled before
19061 the call to mcount. Similarly if the user has requested no
19062 scheduling in the prolog. Similarly if we want non-call exceptions
19063 using the EABI unwinder, to prevent faulting instructions from being
19064 swapped with a stack adjustment. */
19065 if (crtl->profile || !TARGET_SCHED_PROLOG
19066 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19067 emit_insn (gen_blockage ());
19069 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19070 if (live_regs_mask & 0xff)
19071 cfun->machine->lr_save_eliminated = 0;
19076 thumb1_expand_epilogue (void)
19078 HOST_WIDE_INT amount;
19079 arm_stack_offsets *offsets;
19082 /* Naked functions don't have prologues. */
19083 if (IS_NAKED (arm_current_func_type ()))
19086 offsets = arm_get_frame_offsets ();
19087 amount = offsets->outgoing_args - offsets->saved_regs;
19089 if (frame_pointer_needed)
19091 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19092 amount = offsets->locals_base - offsets->saved_regs;
19095 gcc_assert (amount >= 0);
19099 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19100 GEN_INT (amount)));
19103 /* r3 is always free in the epilogue. */
19104 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19106 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19107 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19111 /* Emit a USE (stack_pointer_rtx), so that
19112 the stack adjustment will not be deleted. */
19113 emit_insn (gen_prologue_use (stack_pointer_rtx));
19115 if (crtl->profile || !TARGET_SCHED_PROLOG)
19116 emit_insn (gen_blockage ());
19118 /* Emit a clobber for each insn that will be restored in the epilogue,
19119 so that flow2 will get register lifetimes correct. */
19120 for (regno = 0; regno < 13; regno++)
19121 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19122 emit_clobber (gen_rtx_REG (SImode, regno));
19124 if (! df_regs_ever_live_p (LR_REGNUM))
19125 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19129 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19131 arm_stack_offsets *offsets;
19132 unsigned long live_regs_mask = 0;
19133 unsigned long l_mask;
19134 unsigned high_regs_pushed = 0;
19135 int cfa_offset = 0;
19138 if (IS_NAKED (arm_current_func_type ()))
19141 if (is_called_in_ARM_mode (current_function_decl))
19145 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19146 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19148 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19150 /* Generate code sequence to switch us into Thumb mode. */
19151 /* The .code 32 directive has already been emitted by
19152 ASM_DECLARE_FUNCTION_NAME. */
19153 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19154 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19156 /* Generate a label, so that the debugger will notice the
19157 change in instruction sets. This label is also used by
19158 the assembler to bypass the ARM code when this function
19159 is called from a Thumb encoded function elsewhere in the
19160 same file. Hence the definition of STUB_NAME here must
19161 agree with the definition in gas/config/tc-arm.c. */
19163 #define STUB_NAME ".real_start_of"
19165 fprintf (f, "\t.code\t16\n");
19167 if (arm_dllexport_name_p (name))
19168 name = arm_strip_name_encoding (name);
19170 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19171 fprintf (f, "\t.thumb_func\n");
19172 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19175 if (crtl->args.pretend_args_size)
19177 /* Output unwind directive for the stack adjustment. */
19178 if (ARM_EABI_UNWIND_TABLES)
19179 fprintf (f, "\t.pad #%d\n",
19180 crtl->args.pretend_args_size);
19182 if (cfun->machine->uses_anonymous_args)
19186 fprintf (f, "\tpush\t{");
19188 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19190 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19191 regno <= LAST_ARG_REGNUM;
19193 asm_fprintf (f, "%r%s", regno,
19194 regno == LAST_ARG_REGNUM ? "" : ", ");
19196 fprintf (f, "}\n");
19199 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19200 SP_REGNUM, SP_REGNUM,
19201 crtl->args.pretend_args_size);
19203 /* We don't need to record the stores for unwinding (would it
19204 help the debugger any if we did?), but record the change in
19205 the stack pointer. */
19206 if (dwarf2out_do_frame ())
19208 char *l = dwarf2out_cfi_label (false);
19210 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19211 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19215 /* Get the registers we are going to push. */
19216 offsets = arm_get_frame_offsets ();
19217 live_regs_mask = offsets->saved_regs_mask;
19218 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19219 l_mask = live_regs_mask & 0x40ff;
19220 /* Then count how many other high registers will need to be pushed. */
19221 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19223 if (TARGET_BACKTRACE)
19226 unsigned work_register;
19228 /* We have been asked to create a stack backtrace structure.
19229 The code looks like this:
19233 0 sub SP, #16 Reserve space for 4 registers.
19234 2 push {R7} Push low registers.
19235 4 add R7, SP, #20 Get the stack pointer before the push.
19236 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19237 8 mov R7, PC Get hold of the start of this code plus 12.
19238 10 str R7, [SP, #16] Store it.
19239 12 mov R7, FP Get hold of the current frame pointer.
19240 14 str R7, [SP, #4] Store it.
19241 16 mov R7, LR Get hold of the current return address.
19242 18 str R7, [SP, #12] Store it.
19243 20 add R7, SP, #16 Point at the start of the backtrace structure.
19244 22 mov FP, R7 Put this value into the frame pointer. */
19246 work_register = thumb_find_work_register (live_regs_mask);
19248 if (ARM_EABI_UNWIND_TABLES)
19249 asm_fprintf (f, "\t.pad #16\n");
19252 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19253 SP_REGNUM, SP_REGNUM);
19255 if (dwarf2out_do_frame ())
19257 char *l = dwarf2out_cfi_label (false);
19259 cfa_offset = cfa_offset + 16;
19260 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19265 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19266 offset = bit_count (l_mask) * UNITS_PER_WORD;
19271 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19272 offset + 16 + crtl->args.pretend_args_size);
19274 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19277 /* Make sure that the instruction fetching the PC is in the right place
19278 to calculate "start of backtrace creation code + 12". */
19281 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19282 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19284 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19285 ARM_HARD_FRAME_POINTER_REGNUM);
19286 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19291 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19292 ARM_HARD_FRAME_POINTER_REGNUM);
19293 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19295 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19296 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19300 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19301 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19303 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19305 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19306 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19308 /* Optimization: If we are not pushing any low registers but we are going
19309 to push some high registers then delay our first push. This will just
19310 be a push of LR and we can combine it with the push of the first high
19312 else if ((l_mask & 0xff) != 0
19313 || (high_regs_pushed == 0 && l_mask))
19314 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19316 if (high_regs_pushed)
19318 unsigned pushable_regs;
19319 unsigned next_hi_reg;
19321 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19322 if (live_regs_mask & (1 << next_hi_reg))
19325 pushable_regs = l_mask & 0xff;
19327 if (pushable_regs == 0)
19328 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19330 while (high_regs_pushed > 0)
19332 unsigned long real_regs_mask = 0;
19334 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19336 if (pushable_regs & (1 << regno))
19338 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19340 high_regs_pushed --;
19341 real_regs_mask |= (1 << next_hi_reg);
19343 if (high_regs_pushed)
19345 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19347 if (live_regs_mask & (1 << next_hi_reg))
19352 pushable_regs &= ~((1 << regno) - 1);
19358 /* If we had to find a work register and we have not yet
19359 saved the LR then add it to the list of regs to push. */
19360 if (l_mask == (1 << LR_REGNUM))
19362 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19364 real_regs_mask | (1 << LR_REGNUM));
19368 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19373 /* Handle the case of a double word load into a low register from
19374 a computed memory address. The computed address may involve a
19375 register which is overwritten by the load. */
19377 thumb_load_double_from_address (rtx *operands)
19385 gcc_assert (GET_CODE (operands[0]) == REG);
19386 gcc_assert (GET_CODE (operands[1]) == MEM);
19388 /* Get the memory address. */
19389 addr = XEXP (operands[1], 0);
19391 /* Work out how the memory address is computed. */
19392 switch (GET_CODE (addr))
19395 operands[2] = adjust_address (operands[1], SImode, 4);
19397 if (REGNO (operands[0]) == REGNO (addr))
19399 output_asm_insn ("ldr\t%H0, %2", operands);
19400 output_asm_insn ("ldr\t%0, %1", operands);
19404 output_asm_insn ("ldr\t%0, %1", operands);
19405 output_asm_insn ("ldr\t%H0, %2", operands);
19410 /* Compute <address> + 4 for the high order load. */
19411 operands[2] = adjust_address (operands[1], SImode, 4);
19413 output_asm_insn ("ldr\t%0, %1", operands);
19414 output_asm_insn ("ldr\t%H0, %2", operands);
19418 arg1 = XEXP (addr, 0);
19419 arg2 = XEXP (addr, 1);
19421 if (CONSTANT_P (arg1))
19422 base = arg2, offset = arg1;
19424 base = arg1, offset = arg2;
19426 gcc_assert (GET_CODE (base) == REG);
19428 /* Catch the case of <address> = <reg> + <reg> */
19429 if (GET_CODE (offset) == REG)
19431 int reg_offset = REGNO (offset);
19432 int reg_base = REGNO (base);
19433 int reg_dest = REGNO (operands[0]);
19435 /* Add the base and offset registers together into the
19436 higher destination register. */
19437 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19438 reg_dest + 1, reg_base, reg_offset);
19440 /* Load the lower destination register from the address in
19441 the higher destination register. */
19442 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19443 reg_dest, reg_dest + 1);
19445 /* Load the higher destination register from its own address
19447 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19448 reg_dest + 1, reg_dest + 1);
19452 /* Compute <address> + 4 for the high order load. */
19453 operands[2] = adjust_address (operands[1], SImode, 4);
19455 /* If the computed address is held in the low order register
19456 then load the high order register first, otherwise always
19457 load the low order register first. */
19458 if (REGNO (operands[0]) == REGNO (base))
19460 output_asm_insn ("ldr\t%H0, %2", operands);
19461 output_asm_insn ("ldr\t%0, %1", operands);
19465 output_asm_insn ("ldr\t%0, %1", operands);
19466 output_asm_insn ("ldr\t%H0, %2", operands);
19472 /* With no registers to worry about we can just load the value
19474 operands[2] = adjust_address (operands[1], SImode, 4);
19476 output_asm_insn ("ldr\t%H0, %2", operands);
19477 output_asm_insn ("ldr\t%0, %1", operands);
19481 gcc_unreachable ();
19488 thumb_output_move_mem_multiple (int n, rtx *operands)
19495 if (REGNO (operands[4]) > REGNO (operands[5]))
19498 operands[4] = operands[5];
19501 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19502 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19506 if (REGNO (operands[4]) > REGNO (operands[5]))
19509 operands[4] = operands[5];
19512 if (REGNO (operands[5]) > REGNO (operands[6]))
19515 operands[5] = operands[6];
19518 if (REGNO (operands[4]) > REGNO (operands[5]))
19521 operands[4] = operands[5];
19525 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19526 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19530 gcc_unreachable ();
19536 /* Output a call-via instruction for thumb state. */
19538 thumb_call_via_reg (rtx reg)
19540 int regno = REGNO (reg);
19543 gcc_assert (regno < LR_REGNUM);
19545 /* If we are in the normal text section we can use a single instance
19546 per compilation unit. If we are doing function sections, then we need
19547 an entry per section, since we can't rely on reachability. */
19548 if (in_section == text_section)
19550 thumb_call_reg_needed = 1;
19552 if (thumb_call_via_label[regno] == NULL)
19553 thumb_call_via_label[regno] = gen_label_rtx ();
19554 labelp = thumb_call_via_label + regno;
19558 if (cfun->machine->call_via[regno] == NULL)
19559 cfun->machine->call_via[regno] = gen_label_rtx ();
19560 labelp = cfun->machine->call_via + regno;
19563 output_asm_insn ("bl\t%a0", labelp);
19567 /* Routines for generating rtl. */
19569 thumb_expand_movmemqi (rtx *operands)
19571 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19572 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19573 HOST_WIDE_INT len = INTVAL (operands[2]);
19574 HOST_WIDE_INT offset = 0;
19578 emit_insn (gen_movmem12b (out, in, out, in));
19584 emit_insn (gen_movmem8b (out, in, out, in));
19590 rtx reg = gen_reg_rtx (SImode);
19591 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19592 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19599 rtx reg = gen_reg_rtx (HImode);
19600 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19601 plus_constant (in, offset))));
19602 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19610 rtx reg = gen_reg_rtx (QImode);
19611 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19612 plus_constant (in, offset))));
19613 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19619 thumb_reload_out_hi (rtx *operands)
19621 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19624 /* Handle reading a half-word from memory during reload. */
19626 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19628 gcc_unreachable ();
19631 /* Return the length of a function name prefix
19632 that starts with the character 'c'. */
19634 arm_get_strip_length (int c)
19638 ARM_NAME_ENCODING_LENGTHS
19643 /* Return a pointer to a function's name with any
19644 and all prefix encodings stripped from it. */
19646 arm_strip_name_encoding (const char *name)
19650 while ((skip = arm_get_strip_length (* name)))
19656 /* If there is a '*' anywhere in the name's prefix, then
19657 emit the stripped name verbatim, otherwise prepend an
19658 underscore if leading underscores are being used. */
19660 arm_asm_output_labelref (FILE *stream, const char *name)
19665 while ((skip = arm_get_strip_length (* name)))
19667 verbatim |= (*name == '*');
19672 fputs (name, stream);
19674 asm_fprintf (stream, "%U%s", name);
19678 arm_file_start (void)
19682 if (TARGET_UNIFIED_ASM)
19683 asm_fprintf (asm_out_file, "\t.syntax unified\n");
19687 const char *fpu_name;
19688 if (arm_select[0].string)
19689 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
19690 else if (arm_select[1].string)
19691 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
19693 asm_fprintf (asm_out_file, "\t.cpu %s\n",
19694 all_cores[arm_default_cpu].name);
19696 if (TARGET_SOFT_FLOAT)
19699 fpu_name = "softvfp";
19701 fpu_name = "softfpa";
19705 int set_float_abi_attributes = 0;
19706 switch (arm_fpu_arch)
19711 case FPUTYPE_FPA_EMU2:
19714 case FPUTYPE_FPA_EMU3:
19717 case FPUTYPE_MAVERICK:
19718 fpu_name = "maverick";
19722 set_float_abi_attributes = 1;
19724 case FPUTYPE_VFP3D16:
19725 fpu_name = "vfpv3-d16";
19726 set_float_abi_attributes = 1;
19729 fpu_name = "vfpv3";
19730 set_float_abi_attributes = 1;
19734 set_float_abi_attributes = 1;
19736 case FPUTYPE_NEON_FP16:
19737 fpu_name = "neon-fp16";
19738 set_float_abi_attributes = 1;
19743 if (set_float_abi_attributes)
19745 if (TARGET_HARD_FLOAT)
19746 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
19747 if (TARGET_HARD_FLOAT_ABI)
19748 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
19751 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
19753 /* Some of these attributes only apply when the corresponding features
19754 are used. However we don't have any easy way of figuring this out.
19755 Conservatively record the setting that would have been used. */
19757 /* Tag_ABI_FP_rounding. */
19758 if (flag_rounding_math)
19759 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
19760 if (!flag_unsafe_math_optimizations)
19762 /* Tag_ABI_FP_denomal. */
19763 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
19764 /* Tag_ABI_FP_exceptions. */
19765 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
19767 /* Tag_ABI_FP_user_exceptions. */
19768 if (flag_signaling_nans)
19769 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
19770 /* Tag_ABI_FP_number_model. */
19771 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
19772 flag_finite_math_only ? 1 : 3);
19774 /* Tag_ABI_align8_needed. */
19775 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
19776 /* Tag_ABI_align8_preserved. */
19777 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
19778 /* Tag_ABI_enum_size. */
19779 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
19780 flag_short_enums ? 1 : 2);
19782 /* Tag_ABI_optimization_goals. */
19785 else if (optimize >= 2)
19791 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
19793 /* Tag_ABI_FP_16bit_format. */
19794 if (arm_fp16_format)
19795 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
19796 (int)arm_fp16_format);
19798 if (arm_lang_output_object_attributes_hook)
19799 arm_lang_output_object_attributes_hook();
19801 default_file_start();
19805 arm_file_end (void)
19809 if (NEED_INDICATE_EXEC_STACK)
19810 /* Add .note.GNU-stack. */
19811 file_end_indicate_exec_stack ();
19813 if (! thumb_call_reg_needed)
19816 switch_to_section (text_section);
19817 asm_fprintf (asm_out_file, "\t.code 16\n");
19818 ASM_OUTPUT_ALIGN (asm_out_file, 1);
19820 for (regno = 0; regno < LR_REGNUM; regno++)
19822 rtx label = thumb_call_via_label[regno];
19826 targetm.asm_out.internal_label (asm_out_file, "L",
19827 CODE_LABEL_NUMBER (label));
19828 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19834 /* Symbols in the text segment can be accessed without indirecting via the
19835 constant pool; it may take an extra binary operation, but this is still
19836 faster than indirecting via memory. Don't do this when not optimizing,
19837 since we won't be calculating al of the offsets necessary to do this
19841 arm_encode_section_info (tree decl, rtx rtl, int first)
19843 if (optimize > 0 && TREE_CONSTANT (decl))
19844 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
19846 default_encode_section_info (decl, rtl, first);
19848 #endif /* !ARM_PE */
19851 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
19853 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
19854 && !strcmp (prefix, "L"))
19856 arm_ccfsm_state = 0;
19857 arm_target_insn = NULL;
19859 default_internal_label (stream, prefix, labelno);
19862 /* Output code to add DELTA to the first argument, and then jump
19863 to FUNCTION. Used for C++ multiple inheritance. */
19865 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
19866 HOST_WIDE_INT delta,
19867 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
19870 static int thunk_label = 0;
19873 int mi_delta = delta;
19874 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
19876 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
19879 mi_delta = - mi_delta;
19883 int labelno = thunk_label++;
19884 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
19885 /* Thunks are entered in arm mode when avaiable. */
19886 if (TARGET_THUMB1_ONLY)
19888 /* push r3 so we can use it as a temporary. */
19889 /* TODO: Omit this save if r3 is not used. */
19890 fputs ("\tpush {r3}\n", file);
19891 fputs ("\tldr\tr3, ", file);
19895 fputs ("\tldr\tr12, ", file);
19897 assemble_name (file, label);
19898 fputc ('\n', file);
19901 /* If we are generating PIC, the ldr instruction below loads
19902 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
19903 the address of the add + 8, so we have:
19905 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
19908 Note that we have "+ 1" because some versions of GNU ld
19909 don't set the low bit of the result for R_ARM_REL32
19910 relocations against thumb function symbols.
19911 On ARMv6M this is +4, not +8. */
19912 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
19913 assemble_name (file, labelpc);
19914 fputs (":\n", file);
19915 if (TARGET_THUMB1_ONLY)
19917 /* This is 2 insns after the start of the thunk, so we know it
19918 is 4-byte aligned. */
19919 fputs ("\tadd\tr3, pc, r3\n", file);
19920 fputs ("\tmov r12, r3\n", file);
19923 fputs ("\tadd\tr12, pc, r12\n", file);
19925 else if (TARGET_THUMB1_ONLY)
19926 fputs ("\tmov r12, r3\n", file);
19928 if (TARGET_THUMB1_ONLY)
19930 if (mi_delta > 255)
19932 fputs ("\tldr\tr3, ", file);
19933 assemble_name (file, label);
19934 fputs ("+4\n", file);
19935 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
19936 mi_op, this_regno, this_regno);
19938 else if (mi_delta != 0)
19940 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19941 mi_op, this_regno, this_regno,
19947 /* TODO: Use movw/movt for large constants when available. */
19948 while (mi_delta != 0)
19950 if ((mi_delta & (3 << shift)) == 0)
19954 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
19955 mi_op, this_regno, this_regno,
19956 mi_delta & (0xff << shift));
19957 mi_delta &= ~(0xff << shift);
19964 if (TARGET_THUMB1_ONLY)
19965 fputs ("\tpop\t{r3}\n", file);
19967 fprintf (file, "\tbx\tr12\n");
19968 ASM_OUTPUT_ALIGN (file, 2);
19969 assemble_name (file, label);
19970 fputs (":\n", file);
19973 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
19974 rtx tem = XEXP (DECL_RTL (function), 0);
19975 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
19976 tem = gen_rtx_MINUS (GET_MODE (tem),
19978 gen_rtx_SYMBOL_REF (Pmode,
19979 ggc_strdup (labelpc)));
19980 assemble_integer (tem, 4, BITS_PER_WORD, 1);
19983 /* Output ".word .LTHUNKn". */
19984 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
19986 if (TARGET_THUMB1_ONLY && mi_delta > 255)
19987 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
19991 fputs ("\tb\t", file);
19992 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
19993 if (NEED_PLT_RELOC)
19994 fputs ("(PLT)", file);
19995 fputc ('\n', file);
20000 arm_emit_vector_const (FILE *file, rtx x)
20003 const char * pattern;
20005 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20007 switch (GET_MODE (x))
20009 case V2SImode: pattern = "%08x"; break;
20010 case V4HImode: pattern = "%04x"; break;
20011 case V8QImode: pattern = "%02x"; break;
20012 default: gcc_unreachable ();
20015 fprintf (file, "0x");
20016 for (i = CONST_VECTOR_NUNITS (x); i--;)
20020 element = CONST_VECTOR_ELT (x, i);
20021 fprintf (file, pattern, INTVAL (element));
20027 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20028 HFmode constant pool entries are actually loaded with ldr. */
20030 arm_emit_fp16_const (rtx c)
20035 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20036 bits = real_to_target (NULL, &r, HFmode);
20037 if (WORDS_BIG_ENDIAN)
20038 assemble_zeros (2);
20039 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20040 if (!WORDS_BIG_ENDIAN)
20041 assemble_zeros (2);
20045 arm_output_load_gr (rtx *operands)
20052 if (GET_CODE (operands [1]) != MEM
20053 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20054 || GET_CODE (reg = XEXP (sum, 0)) != REG
20055 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20056 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20057 return "wldrw%?\t%0, %1";
20059 /* Fix up an out-of-range load of a GR register. */
20060 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20061 wcgr = operands[0];
20063 output_asm_insn ("ldr%?\t%0, %1", operands);
20065 operands[0] = wcgr;
20067 output_asm_insn ("tmcr%?\t%0, %1", operands);
20068 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20073 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20075 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20076 named arg and all anonymous args onto the stack.
20077 XXX I know the prologue shouldn't be pushing registers, but it is faster
20081 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20082 enum machine_mode mode,
20085 int second_time ATTRIBUTE_UNUSED)
20089 cfun->machine->uses_anonymous_args = 1;
20090 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20092 nregs = pcum->aapcs_ncrn;
20093 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20097 nregs = pcum->nregs;
20099 if (nregs < NUM_ARG_REGS)
20100 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20103 /* Return nonzero if the CONSUMER instruction (a store) does not need
20104 PRODUCER's value to calculate the address. */
20107 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20109 rtx value = PATTERN (producer);
20110 rtx addr = PATTERN (consumer);
20112 if (GET_CODE (value) == COND_EXEC)
20113 value = COND_EXEC_CODE (value);
20114 if (GET_CODE (value) == PARALLEL)
20115 value = XVECEXP (value, 0, 0);
20116 value = XEXP (value, 0);
20117 if (GET_CODE (addr) == COND_EXEC)
20118 addr = COND_EXEC_CODE (addr);
20119 if (GET_CODE (addr) == PARALLEL)
20120 addr = XVECEXP (addr, 0, 0);
20121 addr = XEXP (addr, 0);
20123 return !reg_overlap_mentioned_p (value, addr);
20126 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20127 have an early register shift value or amount dependency on the
20128 result of PRODUCER. */
20131 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20133 rtx value = PATTERN (producer);
20134 rtx op = PATTERN (consumer);
20137 if (GET_CODE (value) == COND_EXEC)
20138 value = COND_EXEC_CODE (value);
20139 if (GET_CODE (value) == PARALLEL)
20140 value = XVECEXP (value, 0, 0);
20141 value = XEXP (value, 0);
20142 if (GET_CODE (op) == COND_EXEC)
20143 op = COND_EXEC_CODE (op);
20144 if (GET_CODE (op) == PARALLEL)
20145 op = XVECEXP (op, 0, 0);
20148 early_op = XEXP (op, 0);
20149 /* This is either an actual independent shift, or a shift applied to
20150 the first operand of another operation. We want the whole shift
20152 if (GET_CODE (early_op) == REG)
20155 return !reg_overlap_mentioned_p (value, early_op);
20158 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20159 have an early register shift value dependency on the result of
20163 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20165 rtx value = PATTERN (producer);
20166 rtx op = PATTERN (consumer);
20169 if (GET_CODE (value) == COND_EXEC)
20170 value = COND_EXEC_CODE (value);
20171 if (GET_CODE (value) == PARALLEL)
20172 value = XVECEXP (value, 0, 0);
20173 value = XEXP (value, 0);
20174 if (GET_CODE (op) == COND_EXEC)
20175 op = COND_EXEC_CODE (op);
20176 if (GET_CODE (op) == PARALLEL)
20177 op = XVECEXP (op, 0, 0);
20180 early_op = XEXP (op, 0);
20182 /* This is either an actual independent shift, or a shift applied to
20183 the first operand of another operation. We want the value being
20184 shifted, in either case. */
20185 if (GET_CODE (early_op) != REG)
20186 early_op = XEXP (early_op, 0);
20188 return !reg_overlap_mentioned_p (value, early_op);
20191 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20192 have an early register mult dependency on the result of
20196 arm_no_early_mul_dep (rtx producer, rtx consumer)
20198 rtx value = PATTERN (producer);
20199 rtx op = PATTERN (consumer);
20201 if (GET_CODE (value) == COND_EXEC)
20202 value = COND_EXEC_CODE (value);
20203 if (GET_CODE (value) == PARALLEL)
20204 value = XVECEXP (value, 0, 0);
20205 value = XEXP (value, 0);
20206 if (GET_CODE (op) == COND_EXEC)
20207 op = COND_EXEC_CODE (op);
20208 if (GET_CODE (op) == PARALLEL)
20209 op = XVECEXP (op, 0, 0);
20212 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20214 if (GET_CODE (XEXP (op, 0)) == MULT)
20215 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20217 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20223 /* We can't rely on the caller doing the proper promotion when
20224 using APCS or ATPCS. */
20227 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20229 return !TARGET_AAPCS_BASED;
20232 static enum machine_mode
20233 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20234 enum machine_mode mode,
20235 int *punsignedp ATTRIBUTE_UNUSED,
20236 const_tree fntype ATTRIBUTE_UNUSED,
20237 int for_return ATTRIBUTE_UNUSED)
20239 if (GET_MODE_CLASS (mode) == MODE_INT
20240 && GET_MODE_SIZE (mode) < 4)
20246 /* AAPCS based ABIs use short enums by default. */
20249 arm_default_short_enums (void)
20251 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20255 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20258 arm_align_anon_bitfield (void)
20260 return TARGET_AAPCS_BASED;
20264 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20267 arm_cxx_guard_type (void)
20269 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20272 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20273 has an accumulator dependency on the result of the producer (a
20274 multiplication instruction) and no other dependency on that result. */
20276 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20278 rtx mul = PATTERN (producer);
20279 rtx mac = PATTERN (consumer);
20281 rtx mac_op0, mac_op1, mac_acc;
20283 if (GET_CODE (mul) == COND_EXEC)
20284 mul = COND_EXEC_CODE (mul);
20285 if (GET_CODE (mac) == COND_EXEC)
20286 mac = COND_EXEC_CODE (mac);
20288 /* Check that mul is of the form (set (...) (mult ...))
20289 and mla is of the form (set (...) (plus (mult ...) (...))). */
20290 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20291 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20292 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20295 mul_result = XEXP (mul, 0);
20296 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20297 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20298 mac_acc = XEXP (XEXP (mac, 1), 1);
20300 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20301 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20302 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20306 /* The EABI says test the least significant bit of a guard variable. */
20309 arm_cxx_guard_mask_bit (void)
20311 return TARGET_AAPCS_BASED;
20315 /* The EABI specifies that all array cookies are 8 bytes long. */
20318 arm_get_cookie_size (tree type)
20322 if (!TARGET_AAPCS_BASED)
20323 return default_cxx_get_cookie_size (type);
20325 size = build_int_cst (sizetype, 8);
20330 /* The EABI says that array cookies should also contain the element size. */
20333 arm_cookie_has_size (void)
20335 return TARGET_AAPCS_BASED;
20339 /* The EABI says constructors and destructors should return a pointer to
20340 the object constructed/destroyed. */
20343 arm_cxx_cdtor_returns_this (void)
20345 return TARGET_AAPCS_BASED;
20348 /* The EABI says that an inline function may never be the key
20352 arm_cxx_key_method_may_be_inline (void)
20354 return !TARGET_AAPCS_BASED;
20358 arm_cxx_determine_class_data_visibility (tree decl)
20360 if (!TARGET_AAPCS_BASED
20361 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20364 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20365 is exported. However, on systems without dynamic vague linkage,
20366 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20367 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20368 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20370 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20371 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20375 arm_cxx_class_data_always_comdat (void)
20377 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20378 vague linkage if the class has no key function. */
20379 return !TARGET_AAPCS_BASED;
20383 /* The EABI says __aeabi_atexit should be used to register static
20387 arm_cxx_use_aeabi_atexit (void)
20389 return TARGET_AAPCS_BASED;
20394 arm_set_return_address (rtx source, rtx scratch)
20396 arm_stack_offsets *offsets;
20397 HOST_WIDE_INT delta;
20399 unsigned long saved_regs;
20401 offsets = arm_get_frame_offsets ();
20402 saved_regs = offsets->saved_regs_mask;
20404 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20405 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20408 if (frame_pointer_needed)
20409 addr = plus_constant(hard_frame_pointer_rtx, -4);
20412 /* LR will be the first saved register. */
20413 delta = offsets->outgoing_args - (offsets->frame + 4);
20418 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20419 GEN_INT (delta & ~4095)));
20424 addr = stack_pointer_rtx;
20426 addr = plus_constant (addr, delta);
20428 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20434 thumb_set_return_address (rtx source, rtx scratch)
20436 arm_stack_offsets *offsets;
20437 HOST_WIDE_INT delta;
20438 HOST_WIDE_INT limit;
20441 unsigned long mask;
20445 offsets = arm_get_frame_offsets ();
20446 mask = offsets->saved_regs_mask;
20447 if (mask & (1 << LR_REGNUM))
20450 /* Find the saved regs. */
20451 if (frame_pointer_needed)
20453 delta = offsets->soft_frame - offsets->saved_args;
20454 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20460 delta = offsets->outgoing_args - offsets->saved_args;
20463 /* Allow for the stack frame. */
20464 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20466 /* The link register is always the first saved register. */
20469 /* Construct the address. */
20470 addr = gen_rtx_REG (SImode, reg);
20473 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20474 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20478 addr = plus_constant (addr, delta);
20480 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20483 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20486 /* Implements target hook vector_mode_supported_p. */
20488 arm_vector_mode_supported_p (enum machine_mode mode)
20490 /* Neon also supports V2SImode, etc. listed in the clause below. */
20491 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20492 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20495 if ((TARGET_NEON || TARGET_IWMMXT)
20496 && ((mode == V2SImode)
20497 || (mode == V4HImode)
20498 || (mode == V8QImode)))
20504 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20505 ARM insns and therefore guarantee that the shift count is modulo 256.
20506 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20507 guarantee no particular behavior for out-of-range counts. */
20509 static unsigned HOST_WIDE_INT
20510 arm_shift_truncation_mask (enum machine_mode mode)
20512 return mode == SImode ? 255 : 0;
20516 /* Map internal gcc register numbers to DWARF2 register numbers. */
20519 arm_dbx_register_number (unsigned int regno)
20524 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20525 compatibility. The EABI defines them as registers 96-103. */
20526 if (IS_FPA_REGNUM (regno))
20527 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20529 /* FIXME: VFPv3 register numbering. */
20530 if (IS_VFP_REGNUM (regno))
20531 return 64 + regno - FIRST_VFP_REGNUM;
20533 if (IS_IWMMXT_GR_REGNUM (regno))
20534 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20536 if (IS_IWMMXT_REGNUM (regno))
20537 return 112 + regno - FIRST_IWMMXT_REGNUM;
20539 gcc_unreachable ();
20543 #ifdef TARGET_UNWIND_INFO
20544 /* Emit unwind directives for a store-multiple instruction or stack pointer
20545 push during alignment.
20546 These should only ever be generated by the function prologue code, so
20547 expect them to have a particular form. */
20550 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20553 HOST_WIDE_INT offset;
20554 HOST_WIDE_INT nregs;
20560 e = XVECEXP (p, 0, 0);
20561 if (GET_CODE (e) != SET)
20564 /* First insn will adjust the stack pointer. */
20565 if (GET_CODE (e) != SET
20566 || GET_CODE (XEXP (e, 0)) != REG
20567 || REGNO (XEXP (e, 0)) != SP_REGNUM
20568 || GET_CODE (XEXP (e, 1)) != PLUS)
20571 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20572 nregs = XVECLEN (p, 0) - 1;
20574 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20577 /* The function prologue may also push pc, but not annotate it as it is
20578 never restored. We turn this into a stack pointer adjustment. */
20579 if (nregs * 4 == offset - 4)
20581 fprintf (asm_out_file, "\t.pad #4\n");
20585 fprintf (asm_out_file, "\t.save {");
20587 else if (IS_VFP_REGNUM (reg))
20590 fprintf (asm_out_file, "\t.vsave {");
20592 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20594 /* FPA registers are done differently. */
20595 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20599 /* Unknown register type. */
20602 /* If the stack increment doesn't match the size of the saved registers,
20603 something has gone horribly wrong. */
20604 if (offset != nregs * reg_size)
20609 /* The remaining insns will describe the stores. */
20610 for (i = 1; i <= nregs; i++)
20612 /* Expect (set (mem <addr>) (reg)).
20613 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
20614 e = XVECEXP (p, 0, i);
20615 if (GET_CODE (e) != SET
20616 || GET_CODE (XEXP (e, 0)) != MEM
20617 || GET_CODE (XEXP (e, 1)) != REG)
20620 reg = REGNO (XEXP (e, 1));
20625 fprintf (asm_out_file, ", ");
20626 /* We can't use %r for vfp because we need to use the
20627 double precision register names. */
20628 if (IS_VFP_REGNUM (reg))
20629 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20631 asm_fprintf (asm_out_file, "%r", reg);
20633 #ifdef ENABLE_CHECKING
20634 /* Check that the addresses are consecutive. */
20635 e = XEXP (XEXP (e, 0), 0);
20636 if (GET_CODE (e) == PLUS)
20638 offset += reg_size;
20639 if (GET_CODE (XEXP (e, 0)) != REG
20640 || REGNO (XEXP (e, 0)) != SP_REGNUM
20641 || GET_CODE (XEXP (e, 1)) != CONST_INT
20642 || offset != INTVAL (XEXP (e, 1)))
20646 || GET_CODE (e) != REG
20647 || REGNO (e) != SP_REGNUM)
20651 fprintf (asm_out_file, "}\n");
20654 /* Emit unwind directives for a SET. */
20657 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20665 switch (GET_CODE (e0))
20668 /* Pushing a single register. */
20669 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
20670 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
20671 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
20674 asm_fprintf (asm_out_file, "\t.save ");
20675 if (IS_VFP_REGNUM (REGNO (e1)))
20676 asm_fprintf(asm_out_file, "{d%d}\n",
20677 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
20679 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
20683 if (REGNO (e0) == SP_REGNUM)
20685 /* A stack increment. */
20686 if (GET_CODE (e1) != PLUS
20687 || GET_CODE (XEXP (e1, 0)) != REG
20688 || REGNO (XEXP (e1, 0)) != SP_REGNUM
20689 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20692 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
20693 -INTVAL (XEXP (e1, 1)));
20695 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
20697 HOST_WIDE_INT offset;
20699 if (GET_CODE (e1) == PLUS)
20701 if (GET_CODE (XEXP (e1, 0)) != REG
20702 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
20704 reg = REGNO (XEXP (e1, 0));
20705 offset = INTVAL (XEXP (e1, 1));
20706 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20707 HARD_FRAME_POINTER_REGNUM, reg,
20708 INTVAL (XEXP (e1, 1)));
20710 else if (GET_CODE (e1) == REG)
20713 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
20714 HARD_FRAME_POINTER_REGNUM, reg);
20719 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
20721 /* Move from sp to reg. */
20722 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
20724 else if (GET_CODE (e1) == PLUS
20725 && GET_CODE (XEXP (e1, 0)) == REG
20726 && REGNO (XEXP (e1, 0)) == SP_REGNUM
20727 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
20729 /* Set reg to offset from sp. */
20730 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
20731 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
20733 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
20735 /* Stack pointer save before alignment. */
20737 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
20750 /* Emit unwind directives for the given insn. */
20753 arm_unwind_emit (FILE * asm_out_file, rtx insn)
20757 if (!ARM_EABI_UNWIND_TABLES)
20760 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20761 && (TREE_NOTHROW (current_function_decl)
20762 || crtl->all_throwers_are_sibcalls))
20765 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
20768 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
20770 pat = XEXP (pat, 0);
20772 pat = PATTERN (insn);
20774 switch (GET_CODE (pat))
20777 arm_unwind_emit_set (asm_out_file, pat);
20781 /* Store multiple. */
20782 arm_unwind_emit_sequence (asm_out_file, pat);
20791 /* Output a reference from a function exception table to the type_info
20792 object X. The EABI specifies that the symbol should be relocated by
20793 an R_ARM_TARGET2 relocation. */
20796 arm_output_ttype (rtx x)
20798 fputs ("\t.word\t", asm_out_file);
20799 output_addr_const (asm_out_file, x);
20800 /* Use special relocations for symbol references. */
20801 if (GET_CODE (x) != CONST_INT)
20802 fputs ("(TARGET2)", asm_out_file);
20803 fputc ('\n', asm_out_file);
20807 #endif /* TARGET_UNWIND_INFO */
20810 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
20811 stack alignment. */
20814 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
20816 rtx unspec = SET_SRC (pattern);
20817 gcc_assert (GET_CODE (unspec) == UNSPEC);
20821 case UNSPEC_STACK_ALIGN:
20822 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
20823 put anything on the stack, so hopefully it won't matter.
20824 CFA = SP will be correct after alignment. */
20825 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
20826 SET_DEST (pattern));
20829 gcc_unreachable ();
20834 /* Output unwind directives for the start/end of a function. */
20837 arm_output_fn_unwind (FILE * f, bool prologue)
20839 if (!ARM_EABI_UNWIND_TABLES)
20843 fputs ("\t.fnstart\n", f);
20846 /* If this function will never be unwound, then mark it as such.
20847 The came condition is used in arm_unwind_emit to suppress
20848 the frame annotations. */
20849 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
20850 && (TREE_NOTHROW (current_function_decl)
20851 || crtl->all_throwers_are_sibcalls))
20852 fputs("\t.cantunwind\n", f);
20854 fputs ("\t.fnend\n", f);
20859 arm_emit_tls_decoration (FILE *fp, rtx x)
20861 enum tls_reloc reloc;
20864 val = XVECEXP (x, 0, 0);
20865 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
20867 output_addr_const (fp, val);
20872 fputs ("(tlsgd)", fp);
20875 fputs ("(tlsldm)", fp);
20878 fputs ("(tlsldo)", fp);
20881 fputs ("(gottpoff)", fp);
20884 fputs ("(tpoff)", fp);
20887 gcc_unreachable ();
20895 fputs (" + (. - ", fp);
20896 output_addr_const (fp, XVECEXP (x, 0, 2));
20898 output_addr_const (fp, XVECEXP (x, 0, 3));
20908 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
20911 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
20913 gcc_assert (size == 4);
20914 fputs ("\t.word\t", file);
20915 output_addr_const (file, x);
20916 fputs ("(tlsldo)", file);
20920 arm_output_addr_const_extra (FILE *fp, rtx x)
20922 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
20923 return arm_emit_tls_decoration (fp, x);
20924 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
20927 int labelno = INTVAL (XVECEXP (x, 0, 0));
20929 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
20930 assemble_name_raw (fp, label);
20934 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
20936 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
20940 output_addr_const (fp, XVECEXP (x, 0, 0));
20944 else if (GET_CODE (x) == CONST_VECTOR)
20945 return arm_emit_vector_const (fp, x);
20950 /* Output assembly for a shift instruction.
20951 SET_FLAGS determines how the instruction modifies the condition codes.
20952 0 - Do not set condition codes.
20953 1 - Set condition codes.
20954 2 - Use smallest instruction. */
20956 arm_output_shift(rtx * operands, int set_flags)
20959 static const char flag_chars[3] = {'?', '.', '!'};
20964 c = flag_chars[set_flags];
20965 if (TARGET_UNIFIED_ASM)
20967 shift = shift_op(operands[3], &val);
20971 operands[2] = GEN_INT(val);
20972 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
20975 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
20978 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
20979 output_asm_insn (pattern, operands);
20983 /* Output a Thumb-1 casesi dispatch sequence. */
20985 thumb1_output_casesi (rtx *operands)
20987 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
20988 addr_diff_vec_flags flags;
20990 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
20992 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
20994 switch (GET_MODE(diff_vec))
20997 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
20998 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21000 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21001 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21003 return "bl\t%___gnu_thumb1_case_si";
21005 gcc_unreachable ();
21009 /* Output a Thumb-2 casesi instruction. */
21011 thumb2_output_casesi (rtx *operands)
21013 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21015 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21017 output_asm_insn ("cmp\t%0, %1", operands);
21018 output_asm_insn ("bhi\t%l3", operands);
21019 switch (GET_MODE(diff_vec))
21022 return "tbb\t[%|pc, %0]";
21024 return "tbh\t[%|pc, %0, lsl #1]";
21028 output_asm_insn ("adr\t%4, %l2", operands);
21029 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21030 output_asm_insn ("add\t%4, %4, %5", operands);
21035 output_asm_insn ("adr\t%4, %l2", operands);
21036 return "ldr\t%|pc, [%4, %0, lsl #2]";
21039 gcc_unreachable ();
21043 /* Most ARM cores are single issue, but some newer ones can dual issue.
21044 The scheduler descriptions rely on this being correct. */
21046 arm_issue_rate (void)
21061 /* A table and a function to perform ARM-specific name mangling for
21062 NEON vector types in order to conform to the AAPCS (see "Procedure
21063 Call Standard for the ARM Architecture", Appendix A). To qualify
21064 for emission with the mangled names defined in that document, a
21065 vector type must not only be of the correct mode but also be
21066 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21069 enum machine_mode mode;
21070 const char *element_type_name;
21071 const char *aapcs_name;
21072 } arm_mangle_map_entry;
21074 static arm_mangle_map_entry arm_mangle_map[] = {
21075 /* 64-bit containerized types. */
21076 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21077 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21078 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21079 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21080 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21081 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21082 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21083 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21084 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21085 /* 128-bit containerized types. */
21086 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21087 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21088 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21089 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21090 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21091 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21092 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21093 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21094 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21095 { VOIDmode, NULL, NULL }
21099 arm_mangle_type (const_tree type)
21101 arm_mangle_map_entry *pos = arm_mangle_map;
21103 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21104 has to be managled as if it is in the "std" namespace. */
21105 if (TARGET_AAPCS_BASED
21106 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21108 static bool warned;
21109 if (!warned && warn_psabi)
21112 inform (input_location,
21113 "the mangling of %<va_list%> has changed in GCC 4.4");
21115 return "St9__va_list";
21118 /* Half-precision float. */
21119 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21122 if (TREE_CODE (type) != VECTOR_TYPE)
21125 /* Check the mode of the vector type, and the name of the vector
21126 element type, against the table. */
21127 while (pos->mode != VOIDmode)
21129 tree elt_type = TREE_TYPE (type);
21131 if (pos->mode == TYPE_MODE (type)
21132 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21133 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21134 pos->element_type_name))
21135 return pos->aapcs_name;
21140 /* Use the default mangling for unrecognized (possibly user-defined)
21145 /* Order of allocation of core registers for Thumb: this allocation is
21146 written over the corresponding initial entries of the array
21147 initialized with REG_ALLOC_ORDER. We allocate all low registers
21148 first. Saving and restoring a low register is usually cheaper than
21149 using a call-clobbered high register. */
21151 static const int thumb_core_reg_alloc_order[] =
21153 3, 2, 1, 0, 4, 5, 6, 7,
21154 14, 12, 8, 9, 10, 11, 13, 15
21157 /* Adjust register allocation order when compiling for Thumb. */
21160 arm_order_regs_for_local_alloc (void)
21162 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21163 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21165 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21166 sizeof (thumb_core_reg_alloc_order));
21169 /* Set default optimization options. */
21171 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21173 /* Enable section anchors by default at -O1 or higher.
21174 Use 2 to distinguish from an explicit -fsection-anchors
21175 given on the command line. */
21177 flag_section_anchors = 2;
21180 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21183 arm_frame_pointer_required (void)
21185 return (cfun->has_nonlocal_label
21186 || SUBTARGET_FRAME_POINTER_REQUIRED
21187 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21190 /* Only thumb1 can't support conditional execution, so return true if
21191 the target is not thumb1. */
21193 arm_have_conditional_execution (void)
21195 return !TARGET_THUMB1;
21198 #include "gt-arm.h"