1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
57 /* Forward definitions of types. */
58 typedef struct minipool_node Mnode;
59 typedef struct minipool_fixup Mfix;
61 const struct attribute_spec arm_attribute_table[];
63 void (*arm_lang_output_object_attributes_hook)(void);
65 /* Forward function declarations. */
66 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int);
71 static unsigned bit_count (unsigned long);
72 static int arm_address_register_rtx_p (rtx, int);
73 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
74 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
75 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
76 inline static int thumb1_index_register_rtx_p (rtx, int);
77 static int thumb_far_jump_used_p (void);
78 static bool thumb_force_lr_save (void);
79 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
80 static rtx emit_sfm (int, int);
81 static unsigned arm_size_return_regs (void);
82 static bool arm_assemble_integer (rtx, unsigned int, int);
83 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
84 static arm_cc get_arm_condition_code (rtx);
85 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
86 static rtx is_jump_table (rtx);
87 static const char *output_multi_immediate (rtx *, const char *, const char *,
89 static const char *shift_op (rtx, HOST_WIDE_INT *);
90 static struct machine_function *arm_init_machine_status (void);
91 static void thumb_exit (FILE *, int);
92 static rtx is_jump_table (rtx);
93 static HOST_WIDE_INT get_jump_table_size (rtx);
94 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_forward_ref (Mfix *);
96 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
97 static Mnode *add_minipool_backward_ref (Mfix *);
98 static void assign_minipool_offsets (Mfix *);
99 static void arm_print_value (FILE *, rtx);
100 static void dump_minipool (rtx);
101 static int arm_barrier_cost (rtx);
102 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
103 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
104 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
106 static void arm_reorg (void);
107 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
108 static unsigned long arm_compute_save_reg0_reg12_mask (void);
109 static unsigned long arm_compute_save_reg_mask (void);
110 static unsigned long arm_isr_value (tree);
111 static unsigned long arm_compute_func_type (void);
112 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
113 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
114 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
115 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
117 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
118 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
120 static int arm_comp_type_attributes (const_tree, const_tree);
121 static void arm_set_default_type_attributes (tree);
122 static int arm_adjust_cost (rtx, rtx, rtx, int);
123 static int count_insns_for_constant (HOST_WIDE_INT, int);
124 static int arm_get_strip_length (int);
125 static bool arm_function_ok_for_sibcall (tree, tree);
126 static void arm_internal_label (FILE *, const char *, unsigned long);
127 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
129 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
130 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
131 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
132 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
133 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
134 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
135 static bool arm_rtx_costs (rtx, int, int, int *, bool);
136 static int arm_address_cost (rtx, bool);
137 static bool arm_memory_load_p (rtx);
138 static bool arm_cirrus_insn_p (rtx);
139 static void cirrus_reorg (rtx);
140 static void arm_init_builtins (void);
141 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
142 static void arm_init_iwmmxt_builtins (void);
143 static rtx safe_vector_operand (rtx, enum machine_mode);
144 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
145 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
146 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
147 static void emit_constant_insn (rtx cond, rtx pattern);
148 static rtx emit_set_insn (rtx, rtx);
149 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
152 #ifdef OBJECT_FORMAT_ELF
153 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
154 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
157 static void arm_encode_section_info (tree, rtx, int);
160 static void arm_file_end (void);
161 static void arm_file_start (void);
163 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
165 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
166 enum machine_mode, const_tree, bool);
167 static bool arm_promote_prototypes (const_tree);
168 static bool arm_default_short_enums (void);
169 static bool arm_align_anon_bitfield (void);
170 static bool arm_return_in_msb (const_tree);
171 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 #ifdef TARGET_UNWIND_INFO
174 static void arm_unwind_emit (FILE *, rtx);
175 static bool arm_output_ttype (rtx);
177 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
179 static tree arm_cxx_guard_type (void);
180 static bool arm_cxx_guard_mask_bit (void);
181 static tree arm_get_cookie_size (tree);
182 static bool arm_cookie_has_size (void);
183 static bool arm_cxx_cdtor_returns_this (void);
184 static bool arm_cxx_key_method_may_be_inline (void);
185 static void arm_cxx_determine_class_data_visibility (tree);
186 static bool arm_cxx_class_data_always_comdat (void);
187 static bool arm_cxx_use_aeabi_atexit (void);
188 static void arm_init_libfuncs (void);
189 static tree arm_build_builtin_va_list (void);
190 static void arm_expand_builtin_va_start (tree, rtx);
191 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
192 static bool arm_handle_option (size_t, const char *, int);
193 static void arm_target_help (void);
194 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
195 static bool arm_cannot_copy_insn_p (rtx);
196 static bool arm_tls_symbol_p (rtx x);
197 static int arm_issue_rate (void);
198 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
199 static bool arm_allocate_stack_slots_for_args (void);
202 /* Initialize the GCC target structure. */
203 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
204 #undef TARGET_MERGE_DECL_ATTRIBUTES
205 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
208 #undef TARGET_ATTRIBUTE_TABLE
209 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
211 #undef TARGET_ASM_FILE_START
212 #define TARGET_ASM_FILE_START arm_file_start
213 #undef TARGET_ASM_FILE_END
214 #define TARGET_ASM_FILE_END arm_file_end
216 #undef TARGET_ASM_ALIGNED_SI_OP
217 #define TARGET_ASM_ALIGNED_SI_OP NULL
218 #undef TARGET_ASM_INTEGER
219 #define TARGET_ASM_INTEGER arm_assemble_integer
221 #undef TARGET_ASM_FUNCTION_PROLOGUE
222 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
224 #undef TARGET_ASM_FUNCTION_EPILOGUE
225 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
227 #undef TARGET_DEFAULT_TARGET_FLAGS
228 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
229 #undef TARGET_HANDLE_OPTION
230 #define TARGET_HANDLE_OPTION arm_handle_option
232 #define TARGET_HELP arm_target_help
234 #undef TARGET_COMP_TYPE_ATTRIBUTES
235 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
237 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
238 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
240 #undef TARGET_SCHED_ADJUST_COST
241 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
243 #undef TARGET_ENCODE_SECTION_INFO
245 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
247 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
250 #undef TARGET_STRIP_NAME_ENCODING
251 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
253 #undef TARGET_ASM_INTERNAL_LABEL
254 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
256 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
257 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
259 #undef TARGET_ASM_OUTPUT_MI_THUNK
260 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264 #undef TARGET_RTX_COSTS
265 #define TARGET_RTX_COSTS arm_rtx_costs
266 #undef TARGET_ADDRESS_COST
267 #define TARGET_ADDRESS_COST arm_address_cost
269 #undef TARGET_SHIFT_TRUNCATION_MASK
270 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
271 #undef TARGET_VECTOR_MODE_SUPPORTED_P
272 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
274 #undef TARGET_MACHINE_DEPENDENT_REORG
275 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
277 #undef TARGET_INIT_BUILTINS
278 #define TARGET_INIT_BUILTINS arm_init_builtins
279 #undef TARGET_EXPAND_BUILTIN
280 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
282 #undef TARGET_INIT_LIBFUNCS
283 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
285 #undef TARGET_PROMOTE_FUNCTION_ARGS
286 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
287 #undef TARGET_PROMOTE_FUNCTION_RETURN
288 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
289 #undef TARGET_PROMOTE_PROTOTYPES
290 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
291 #undef TARGET_PASS_BY_REFERENCE
292 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
293 #undef TARGET_ARG_PARTIAL_BYTES
294 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
296 #undef TARGET_SETUP_INCOMING_VARARGS
297 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
299 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
300 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
302 #undef TARGET_DEFAULT_SHORT_ENUMS
303 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
305 #undef TARGET_ALIGN_ANON_BITFIELD
306 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
308 #undef TARGET_NARROW_VOLATILE_BITFIELD
309 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
311 #undef TARGET_CXX_GUARD_TYPE
312 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
314 #undef TARGET_CXX_GUARD_MASK_BIT
315 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
317 #undef TARGET_CXX_GET_COOKIE_SIZE
318 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
320 #undef TARGET_CXX_COOKIE_HAS_SIZE
321 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
323 #undef TARGET_CXX_CDTOR_RETURNS_THIS
324 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
326 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
327 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
329 #undef TARGET_CXX_USE_AEABI_ATEXIT
330 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
332 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
333 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
334 arm_cxx_determine_class_data_visibility
336 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
337 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
339 #undef TARGET_RETURN_IN_MSB
340 #define TARGET_RETURN_IN_MSB arm_return_in_msb
342 #undef TARGET_RETURN_IN_MEMORY
343 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
345 #undef TARGET_MUST_PASS_IN_STACK
346 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
348 #ifdef TARGET_UNWIND_INFO
349 #undef TARGET_UNWIND_EMIT
350 #define TARGET_UNWIND_EMIT arm_unwind_emit
352 /* EABI unwinding tables use a different format for the typeinfo tables. */
353 #undef TARGET_ASM_TTYPE
354 #define TARGET_ASM_TTYPE arm_output_ttype
356 #undef TARGET_ARM_EABI_UNWINDER
357 #define TARGET_ARM_EABI_UNWINDER true
358 #endif /* TARGET_UNWIND_INFO */
360 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
361 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
363 #undef TARGET_CANNOT_COPY_INSN_P
364 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
367 #undef TARGET_HAVE_TLS
368 #define TARGET_HAVE_TLS true
371 #undef TARGET_CANNOT_FORCE_CONST_MEM
372 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
374 #undef TARGET_MAX_ANCHOR_OFFSET
375 #define TARGET_MAX_ANCHOR_OFFSET 4095
377 /* The minimum is set such that the total size of the block
378 for a particular anchor is -4088 + 1 + 4095 bytes, which is
379 divisible by eight, ensuring natural spacing of anchors. */
380 #undef TARGET_MIN_ANCHOR_OFFSET
381 #define TARGET_MIN_ANCHOR_OFFSET -4088
383 #undef TARGET_SCHED_ISSUE_RATE
384 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
386 #undef TARGET_MANGLE_TYPE
387 #define TARGET_MANGLE_TYPE arm_mangle_type
389 #undef TARGET_BUILD_BUILTIN_VA_LIST
390 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
391 #undef TARGET_EXPAND_BUILTIN_VA_START
392 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
393 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
394 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
397 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
398 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
401 struct gcc_target targetm = TARGET_INITIALIZER;
403 /* Obstack for minipool constant handling. */
404 static struct obstack minipool_obstack;
405 static char * minipool_startobj;
407 /* The maximum number of insns skipped which
408 will be conditionalised if possible. */
409 static int max_insns_skipped = 5;
411 extern FILE * asm_out_file;
413 /* True if we are currently building a constant table. */
414 int making_const_table;
416 /* Define the information needed to generate branch insns. This is
417 stored from the compare operation. */
418 rtx arm_compare_op0, arm_compare_op1;
420 /* The processor for which instructions should be scheduled. */
421 enum processor_type arm_tune = arm_none;
423 /* The default processor used if not overridden by commandline. */
424 static enum processor_type arm_default_cpu = arm_none;
426 /* Which floating point model to use. */
427 enum arm_fp_model arm_fp_model;
429 /* Which floating point hardware is available. */
430 enum fputype arm_fpu_arch;
432 /* Which floating point hardware to schedule for. */
433 enum fputype arm_fpu_tune;
435 /* Whether to use floating point hardware. */
436 enum float_abi_type arm_float_abi;
438 /* Which ABI to use. */
439 enum arm_abi_type arm_abi;
441 /* Which thread pointer model to use. */
442 enum arm_tp_type target_thread_pointer = TP_AUTO;
444 /* Used to parse -mstructure_size_boundary command line option. */
445 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
447 /* Used for Thumb call_via trampolines. */
448 rtx thumb_call_via_label[14];
449 static int thumb_call_reg_needed;
451 /* Bit values used to identify processor capabilities. */
452 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
453 #define FL_ARCH3M (1 << 1) /* Extended multiply */
454 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
455 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
456 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
457 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
458 #define FL_THUMB (1 << 6) /* Thumb aware */
459 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
460 #define FL_STRONG (1 << 8) /* StrongARM */
461 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
462 #define FL_XSCALE (1 << 10) /* XScale */
463 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
464 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
465 media instructions. */
466 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
467 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
468 Note: ARM6 & 7 derivatives only. */
469 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
470 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
471 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
473 #define FL_DIV (1 << 18) /* Hardware divide. */
474 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
475 #define FL_NEON (1 << 20) /* Neon instructions. */
477 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
479 #define FL_FOR_ARCH2 FL_NOTM
480 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
481 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
482 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
483 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
484 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
485 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
486 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
487 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
488 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
489 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
490 #define FL_FOR_ARCH6J FL_FOR_ARCH6
491 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
492 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
493 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
494 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
495 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
496 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
497 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
498 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
499 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
501 /* The bits in this mask specify which
502 instructions we are allowed to generate. */
503 static unsigned long insn_flags = 0;
505 /* The bits in this mask specify which instruction scheduling options should
507 static unsigned long tune_flags = 0;
509 /* The following are used in the arm.md file as equivalents to bits
510 in the above two flag variables. */
512 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
515 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
518 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
521 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
524 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
527 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
530 /* Nonzero if this chip supports the ARM 6K extensions. */
533 /* Nonzero if instructions not present in the 'M' profile can be used. */
534 int arm_arch_notm = 0;
536 /* Nonzero if this chip can benefit from load scheduling. */
537 int arm_ld_sched = 0;
539 /* Nonzero if this chip is a StrongARM. */
540 int arm_tune_strongarm = 0;
542 /* Nonzero if this chip is a Cirrus variant. */
543 int arm_arch_cirrus = 0;
545 /* Nonzero if this chip supports Intel Wireless MMX technology. */
546 int arm_arch_iwmmxt = 0;
548 /* Nonzero if this chip is an XScale. */
549 int arm_arch_xscale = 0;
551 /* Nonzero if tuning for XScale */
552 int arm_tune_xscale = 0;
554 /* Nonzero if we want to tune for stores that access the write-buffer.
555 This typically means an ARM6 or ARM7 with MMU or MPU. */
556 int arm_tune_wbuf = 0;
558 /* Nonzero if tuning for Cortex-A9. */
559 int arm_tune_cortex_a9 = 0;
561 /* Nonzero if generating Thumb instructions. */
564 /* Nonzero if we should define __THUMB_INTERWORK__ in the
566 XXX This is a bit of a hack, it's intended to help work around
567 problems in GLD which doesn't understand that armv5t code is
568 interworking clean. */
569 int arm_cpp_interwork = 0;
571 /* Nonzero if chip supports Thumb 2. */
574 /* Nonzero if chip supports integer division instruction. */
577 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
578 must report the mode of the memory reference from PRINT_OPERAND to
579 PRINT_OPERAND_ADDRESS. */
580 enum machine_mode output_memory_reference_mode;
582 /* The register number to be used for the PIC offset register. */
583 unsigned arm_pic_register = INVALID_REGNUM;
585 /* Set to 1 when a return insn is output, this means that the epilogue
587 int return_used_this_function;
589 /* Set to 1 after arm_reorg has started. Reset to start at the start of
590 the next function. */
591 static int after_arm_reorg = 0;
593 /* The maximum number of insns to be used when loading a constant. */
594 static int arm_constant_limit = 3;
596 /* For an explanation of these variables, see final_prescan_insn below. */
598 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
599 enum arm_cond_code arm_current_cc;
601 int arm_target_label;
602 /* The number of conditionally executed insns, including the current insn. */
603 int arm_condexec_count = 0;
604 /* A bitmask specifying the patterns for the IT block.
605 Zero means do not output an IT block before this insn. */
606 int arm_condexec_mask = 0;
607 /* The number of bits used in arm_condexec_mask. */
608 int arm_condexec_masklen = 0;
610 /* The condition codes of the ARM, and the inverse function. */
611 static const char * const arm_condition_codes[] =
613 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
614 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
617 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
618 #define streq(string1, string2) (strcmp (string1, string2) == 0)
620 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
621 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
622 | (1 << PIC_OFFSET_TABLE_REGNUM)))
624 /* Initialization code. */
628 const char *const name;
629 enum processor_type core;
631 const unsigned long flags;
632 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
635 /* Not all of these give usefully different compilation alternatives,
636 but there is no simple way of generalizing them. */
637 static const struct processors all_cores[] =
640 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
641 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
642 #include "arm-cores.def"
644 {NULL, arm_none, NULL, 0, NULL}
647 static const struct processors all_architectures[] =
649 /* ARM Architectures */
650 /* We don't specify rtx_costs here as it will be figured out
653 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
654 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
655 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
656 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
657 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
658 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
659 implementations that support it, so we will leave it out for now. */
660 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
661 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
662 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
663 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
664 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
665 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
666 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
667 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
668 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
669 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
670 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
671 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
672 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
673 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
674 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
675 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
676 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
677 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
678 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
679 {NULL, arm_none, NULL, 0 , NULL}
682 struct arm_cpu_select
686 const struct processors * processors;
689 /* This is a magic structure. The 'string' field is magically filled in
690 with a pointer to the value specified by the user on the command line
691 assuming that the user has specified such a value. */
693 static struct arm_cpu_select arm_select[] =
695 /* string name processors */
696 { NULL, "-mcpu=", all_cores },
697 { NULL, "-march=", all_architectures },
698 { NULL, "-mtune=", all_cores }
701 /* Defines representing the indexes into the above table. */
702 #define ARM_OPT_SET_CPU 0
703 #define ARM_OPT_SET_ARCH 1
704 #define ARM_OPT_SET_TUNE 2
706 /* The name of the preprocessor macro to define for this architecture. */
708 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
717 /* Available values for -mfpu=. */
719 static const struct fpu_desc all_fpus[] =
721 {"fpa", FPUTYPE_FPA},
722 {"fpe2", FPUTYPE_FPA_EMU2},
723 {"fpe3", FPUTYPE_FPA_EMU2},
724 {"maverick", FPUTYPE_MAVERICK},
725 {"vfp", FPUTYPE_VFP},
726 {"vfp3", FPUTYPE_VFP3},
727 {"vfpv3", FPUTYPE_VFP3},
728 {"vfpv3-d16", FPUTYPE_VFP3D16},
729 {"neon", FPUTYPE_NEON}
733 /* Floating point models used by the different hardware.
734 See fputype in arm.h. */
736 static const enum fputype fp_model_for_fpu[] =
738 /* No FP hardware. */
739 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
740 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
741 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
742 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
743 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
744 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
745 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
746 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
747 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
754 enum float_abi_type abi_type;
758 /* Available values for -mfloat-abi=. */
760 static const struct float_abi all_float_abis[] =
762 {"soft", ARM_FLOAT_ABI_SOFT},
763 {"softfp", ARM_FLOAT_ABI_SOFTFP},
764 {"hard", ARM_FLOAT_ABI_HARD}
771 enum arm_abi_type abi_type;
775 /* Available values for -mabi=. */
777 static const struct abi_name arm_all_abis[] =
779 {"apcs-gnu", ARM_ABI_APCS},
780 {"atpcs", ARM_ABI_ATPCS},
781 {"aapcs", ARM_ABI_AAPCS},
782 {"iwmmxt", ARM_ABI_IWMMXT},
783 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
786 /* Supported TLS relocations. */
796 /* Emit an insn that's a simple single-set. Both the operands must be known
799 emit_set_insn (rtx x, rtx y)
801 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
804 /* Return the number of bits set in VALUE. */
806 bit_count (unsigned long value)
808 unsigned long count = 0;
813 value &= value - 1; /* Clear the least-significant set bit. */
819 /* Set up library functions unique to ARM. */
822 arm_init_libfuncs (void)
824 /* There are no special library functions unless we are using the
829 /* The functions below are described in Section 4 of the "Run-Time
830 ABI for the ARM architecture", Version 1.0. */
832 /* Double-precision floating-point arithmetic. Table 2. */
833 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
834 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
835 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
836 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
837 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
839 /* Double-precision comparisons. Table 3. */
840 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
841 set_optab_libfunc (ne_optab, DFmode, NULL);
842 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
843 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
844 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
845 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
846 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
848 /* Single-precision floating-point arithmetic. Table 4. */
849 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
850 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
851 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
852 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
853 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
855 /* Single-precision comparisons. Table 5. */
856 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
857 set_optab_libfunc (ne_optab, SFmode, NULL);
858 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
859 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
860 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
861 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
862 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
864 /* Floating-point to integer conversions. Table 6. */
865 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
866 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
867 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
868 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
869 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
870 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
871 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
872 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
874 /* Conversions between floating types. Table 7. */
875 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
876 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
878 /* Integer to floating-point conversions. Table 8. */
879 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
880 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
881 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
882 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
883 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
884 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
885 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
886 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
888 /* Long long. Table 9. */
889 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
890 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
891 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
892 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
893 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
894 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
895 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
896 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
898 /* Integer (32/32->32) division. \S 4.3.1. */
899 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
900 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
902 /* The divmod functions are designed so that they can be used for
903 plain division, even though they return both the quotient and the
904 remainder. The quotient is returned in the usual location (i.e.,
905 r0 for SImode, {r0, r1} for DImode), just as would be expected
906 for an ordinary division routine. Because the AAPCS calling
907 conventions specify that all of { r0, r1, r2, r3 } are
908 callee-saved registers, there is no need to tell the compiler
909 explicitly that those registers are clobbered by these
911 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
912 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
914 /* For SImode division the ABI provides div-without-mod routines,
916 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
917 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
919 /* We don't have mod libcalls. Fortunately gcc knows how to use the
920 divmod libcalls instead. */
921 set_optab_libfunc (smod_optab, DImode, NULL);
922 set_optab_libfunc (umod_optab, DImode, NULL);
923 set_optab_libfunc (smod_optab, SImode, NULL);
924 set_optab_libfunc (umod_optab, SImode, NULL);
927 /* On AAPCS systems, this is the "struct __va_list". */
928 static GTY(()) tree va_list_type;
930 /* Return the type to use as __builtin_va_list. */
932 arm_build_builtin_va_list (void)
937 if (!TARGET_AAPCS_BASED)
938 return std_build_builtin_va_list ();
940 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
948 The C Library ABI further reinforces this definition in \S
951 We must follow this definition exactly. The structure tag
952 name is visible in C++ mangled names, and thus forms a part
953 of the ABI. The field name may be used by people who
954 #include <stdarg.h>. */
955 /* Create the type. */
956 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
957 /* Give it the required name. */
958 va_list_name = build_decl (TYPE_DECL,
959 get_identifier ("__va_list"),
961 DECL_ARTIFICIAL (va_list_name) = 1;
962 TYPE_NAME (va_list_type) = va_list_name;
963 /* Create the __ap field. */
964 ap_field = build_decl (FIELD_DECL,
965 get_identifier ("__ap"),
967 DECL_ARTIFICIAL (ap_field) = 1;
968 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
969 TYPE_FIELDS (va_list_type) = ap_field;
970 /* Compute its layout. */
971 layout_type (va_list_type);
976 /* Return an expression of type "void *" pointing to the next
977 available argument in a variable-argument list. VALIST is the
978 user-level va_list object, of type __builtin_va_list. */
980 arm_extract_valist_ptr (tree valist)
982 if (TREE_TYPE (valist) == error_mark_node)
983 return error_mark_node;
985 /* On an AAPCS target, the pointer is stored within "struct
987 if (TARGET_AAPCS_BASED)
989 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
990 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
991 valist, ap_field, NULL_TREE);
997 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
999 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1001 valist = arm_extract_valist_ptr (valist);
1002 std_expand_builtin_va_start (valist, nextarg);
1005 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1007 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1010 valist = arm_extract_valist_ptr (valist);
1011 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1014 /* Implement TARGET_HANDLE_OPTION. */
1017 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1022 arm_select[1].string = arg;
1026 arm_select[0].string = arg;
1029 case OPT_mhard_float:
1030 target_float_abi_name = "hard";
1033 case OPT_msoft_float:
1034 target_float_abi_name = "soft";
1038 arm_select[2].string = arg;
1047 arm_target_help (void)
1050 static int columns = 0;
1053 /* If we have not done so already, obtain the desired maximum width of
1054 the output. Note - this is a duplication of the code at the start of
1055 gcc/opts.c:print_specific_help() - the two copies should probably be
1056 replaced by a single function. */
1061 GET_ENVIRONMENT (p, "COLUMNS");
1064 int value = atoi (p);
1071 /* Use a reasonable default. */
1075 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1077 /* The - 2 is because we know that the last entry in the array is NULL. */
1078 i = ARRAY_SIZE (all_cores) - 2;
1080 printf (" %s", all_cores[i].name);
1081 remaining = columns - (strlen (all_cores[i].name) + 4);
1082 gcc_assert (remaining >= 0);
1086 int len = strlen (all_cores[i].name);
1088 if (remaining > len + 2)
1090 printf (", %s", all_cores[i].name);
1091 remaining -= len + 2;
1097 printf ("\n %s", all_cores[i].name);
1098 remaining = columns - (len + 4);
1102 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1104 i = ARRAY_SIZE (all_architectures) - 2;
1107 printf (" %s", all_architectures[i].name);
1108 remaining = columns - (strlen (all_architectures[i].name) + 4);
1109 gcc_assert (remaining >= 0);
1113 int len = strlen (all_architectures[i].name);
1115 if (remaining > len + 2)
1117 printf (", %s", all_architectures[i].name);
1118 remaining -= len + 2;
1124 printf ("\n %s", all_architectures[i].name);
1125 remaining = columns - (len + 4);
1132 /* Fix up any incompatible options that the user has specified.
1133 This has now turned into a maze. */
1135 arm_override_options (void)
1138 enum processor_type target_arch_cpu = arm_none;
1139 enum processor_type selected_cpu = arm_none;
1141 /* Set up the flags based on the cpu/architecture selected by the user. */
1142 for (i = ARRAY_SIZE (arm_select); i--;)
1144 struct arm_cpu_select * ptr = arm_select + i;
1146 if (ptr->string != NULL && ptr->string[0] != '\0')
1148 const struct processors * sel;
1150 for (sel = ptr->processors; sel->name != NULL; sel++)
1151 if (streq (ptr->string, sel->name))
1153 /* Set the architecture define. */
1154 if (i != ARM_OPT_SET_TUNE)
1155 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1157 /* Determine the processor core for which we should
1158 tune code-generation. */
1159 if (/* -mcpu= is a sensible default. */
1160 i == ARM_OPT_SET_CPU
1161 /* -mtune= overrides -mcpu= and -march=. */
1162 || i == ARM_OPT_SET_TUNE)
1163 arm_tune = (enum processor_type) (sel - ptr->processors);
1165 /* Remember the CPU associated with this architecture.
1166 If no other option is used to set the CPU type,
1167 we'll use this to guess the most suitable tuning
1169 if (i == ARM_OPT_SET_ARCH)
1170 target_arch_cpu = sel->core;
1172 if (i == ARM_OPT_SET_CPU)
1173 selected_cpu = (enum processor_type) (sel - ptr->processors);
1175 if (i != ARM_OPT_SET_TUNE)
1177 /* If we have been given an architecture and a processor
1178 make sure that they are compatible. We only generate
1179 a warning though, and we prefer the CPU over the
1181 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1182 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1185 insn_flags = sel->flags;
1191 if (sel->name == NULL)
1192 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1196 /* Guess the tuning options from the architecture if necessary. */
1197 if (arm_tune == arm_none)
1198 arm_tune = target_arch_cpu;
1200 /* If the user did not specify a processor, choose one for them. */
1201 if (insn_flags == 0)
1203 const struct processors * sel;
1204 unsigned int sought;
1206 selected_cpu = TARGET_CPU_DEFAULT;
1207 if (selected_cpu == arm_none)
1209 #ifdef SUBTARGET_CPU_DEFAULT
1210 /* Use the subtarget default CPU if none was specified by
1212 selected_cpu = SUBTARGET_CPU_DEFAULT;
1214 /* Default to ARM6. */
1215 if (selected_cpu == arm_none)
1216 selected_cpu = arm6;
1218 sel = &all_cores[selected_cpu];
1220 insn_flags = sel->flags;
1222 /* Now check to see if the user has specified some command line
1223 switch that require certain abilities from the cpu. */
1226 if (TARGET_INTERWORK || TARGET_THUMB)
1228 sought |= (FL_THUMB | FL_MODE32);
1230 /* There are no ARM processors that support both APCS-26 and
1231 interworking. Therefore we force FL_MODE26 to be removed
1232 from insn_flags here (if it was set), so that the search
1233 below will always be able to find a compatible processor. */
1234 insn_flags &= ~FL_MODE26;
1237 if (sought != 0 && ((sought & insn_flags) != sought))
1239 /* Try to locate a CPU type that supports all of the abilities
1240 of the default CPU, plus the extra abilities requested by
1242 for (sel = all_cores; sel->name != NULL; sel++)
1243 if ((sel->flags & sought) == (sought | insn_flags))
1246 if (sel->name == NULL)
1248 unsigned current_bit_count = 0;
1249 const struct processors * best_fit = NULL;
1251 /* Ideally we would like to issue an error message here
1252 saying that it was not possible to find a CPU compatible
1253 with the default CPU, but which also supports the command
1254 line options specified by the programmer, and so they
1255 ought to use the -mcpu=<name> command line option to
1256 override the default CPU type.
1258 If we cannot find a cpu that has both the
1259 characteristics of the default cpu and the given
1260 command line options we scan the array again looking
1261 for a best match. */
1262 for (sel = all_cores; sel->name != NULL; sel++)
1263 if ((sel->flags & sought) == sought)
1267 count = bit_count (sel->flags & insn_flags);
1269 if (count >= current_bit_count)
1272 current_bit_count = count;
1276 gcc_assert (best_fit);
1280 insn_flags = sel->flags;
1282 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1283 arm_default_cpu = (enum processor_type) (sel - all_cores);
1284 if (arm_tune == arm_none)
1285 arm_tune = arm_default_cpu;
1288 /* The processor for which we should tune should now have been
1290 gcc_assert (arm_tune != arm_none);
1292 tune_flags = all_cores[(int)arm_tune].flags;
1294 if (target_abi_name)
1296 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1298 if (streq (arm_all_abis[i].name, target_abi_name))
1300 arm_abi = arm_all_abis[i].abi_type;
1304 if (i == ARRAY_SIZE (arm_all_abis))
1305 error ("invalid ABI option: -mabi=%s", target_abi_name);
1308 arm_abi = ARM_DEFAULT_ABI;
1310 /* Make sure that the processor choice does not conflict with any of the
1311 other command line choices. */
1312 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1313 error ("target CPU does not support ARM mode");
1315 /* BPABI targets use linker tricks to allow interworking on cores
1316 without thumb support. */
1317 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1319 warning (0, "target CPU does not support interworking" );
1320 target_flags &= ~MASK_INTERWORK;
1323 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1325 warning (0, "target CPU does not support THUMB instructions");
1326 target_flags &= ~MASK_THUMB;
1329 if (TARGET_APCS_FRAME && TARGET_THUMB)
1331 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1332 target_flags &= ~MASK_APCS_FRAME;
1335 /* Callee super interworking implies thumb interworking. Adding
1336 this to the flags here simplifies the logic elsewhere. */
1337 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1338 target_flags |= MASK_INTERWORK;
1340 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1341 from here where no function is being compiled currently. */
1342 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1343 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1345 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1346 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1348 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1349 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1351 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1353 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1354 target_flags |= MASK_APCS_FRAME;
1357 if (TARGET_POKE_FUNCTION_NAME)
1358 target_flags |= MASK_APCS_FRAME;
1360 if (TARGET_APCS_REENT && flag_pic)
1361 error ("-fpic and -mapcs-reent are incompatible");
1363 if (TARGET_APCS_REENT)
1364 warning (0, "APCS reentrant code not supported. Ignored");
1366 /* If this target is normally configured to use APCS frames, warn if they
1367 are turned off and debugging is turned on. */
1369 && write_symbols != NO_DEBUG
1370 && !TARGET_APCS_FRAME
1371 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1372 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1374 if (TARGET_APCS_FLOAT)
1375 warning (0, "passing floating point arguments in fp regs not yet supported");
1377 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1378 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1379 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1380 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1381 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1382 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1383 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1384 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1385 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1386 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1387 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1388 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1390 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1391 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1392 thumb_code = (TARGET_ARM == 0);
1393 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1394 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1395 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1396 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1397 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1399 /* If we are not using the default (ARM mode) section anchor offset
1400 ranges, then set the correct ranges now. */
1403 /* Thumb-1 LDR instructions cannot have negative offsets.
1404 Permissible positive offset ranges are 5-bit (for byte loads),
1405 6-bit (for halfword loads), or 7-bit (for word loads).
1406 Empirical results suggest a 7-bit anchor range gives the best
1407 overall code size. */
1408 targetm.min_anchor_offset = 0;
1409 targetm.max_anchor_offset = 127;
1411 else if (TARGET_THUMB2)
1413 /* The minimum is set such that the total size of the block
1414 for a particular anchor is 248 + 1 + 4095 bytes, which is
1415 divisible by eight, ensuring natural spacing of anchors. */
1416 targetm.min_anchor_offset = -248;
1417 targetm.max_anchor_offset = 4095;
1420 /* V5 code we generate is completely interworking capable, so we turn off
1421 TARGET_INTERWORK here to avoid many tests later on. */
1423 /* XXX However, we must pass the right pre-processor defines to CPP
1424 or GLD can get confused. This is a hack. */
1425 if (TARGET_INTERWORK)
1426 arm_cpp_interwork = 1;
1429 target_flags &= ~MASK_INTERWORK;
1431 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1432 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1434 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1435 error ("iwmmxt abi requires an iwmmxt capable cpu");
1437 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1438 if (target_fpu_name == NULL && target_fpe_name != NULL)
1440 if (streq (target_fpe_name, "2"))
1441 target_fpu_name = "fpe2";
1442 else if (streq (target_fpe_name, "3"))
1443 target_fpu_name = "fpe3";
1445 error ("invalid floating point emulation option: -mfpe=%s",
1448 if (target_fpu_name != NULL)
1450 /* The user specified a FPU. */
1451 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1453 if (streq (all_fpus[i].name, target_fpu_name))
1455 arm_fpu_arch = all_fpus[i].fpu;
1456 arm_fpu_tune = arm_fpu_arch;
1457 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1461 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1462 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1466 #ifdef FPUTYPE_DEFAULT
1467 /* Use the default if it is specified for this platform. */
1468 arm_fpu_arch = FPUTYPE_DEFAULT;
1469 arm_fpu_tune = FPUTYPE_DEFAULT;
1471 /* Pick one based on CPU type. */
1472 /* ??? Some targets assume FPA is the default.
1473 if ((insn_flags & FL_VFP) != 0)
1474 arm_fpu_arch = FPUTYPE_VFP;
1477 if (arm_arch_cirrus)
1478 arm_fpu_arch = FPUTYPE_MAVERICK;
1480 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1482 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1483 arm_fpu_tune = FPUTYPE_FPA;
1485 arm_fpu_tune = arm_fpu_arch;
1486 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1487 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1490 if (target_float_abi_name != NULL)
1492 /* The user specified a FP ABI. */
1493 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1495 if (streq (all_float_abis[i].name, target_float_abi_name))
1497 arm_float_abi = all_float_abis[i].abi_type;
1501 if (i == ARRAY_SIZE (all_float_abis))
1502 error ("invalid floating point abi: -mfloat-abi=%s",
1503 target_float_abi_name);
1506 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1508 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1509 sorry ("-mfloat-abi=hard and VFP");
1511 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1512 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1513 will ever exist. GCC makes no attempt to support this combination. */
1514 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1515 sorry ("iWMMXt and hardware floating point");
1517 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1518 if (TARGET_THUMB2 && TARGET_IWMMXT)
1519 sorry ("Thumb-2 iWMMXt");
1521 /* If soft-float is specified then don't use FPU. */
1522 if (TARGET_SOFT_FLOAT)
1523 arm_fpu_arch = FPUTYPE_NONE;
1525 /* For arm2/3 there is no need to do any scheduling if there is only
1526 a floating point emulator, or we are doing software floating-point. */
1527 if ((TARGET_SOFT_FLOAT
1528 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1529 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1530 && (tune_flags & FL_MODE32) == 0)
1531 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1533 if (target_thread_switch)
1535 if (strcmp (target_thread_switch, "soft") == 0)
1536 target_thread_pointer = TP_SOFT;
1537 else if (strcmp (target_thread_switch, "auto") == 0)
1538 target_thread_pointer = TP_AUTO;
1539 else if (strcmp (target_thread_switch, "cp15") == 0)
1540 target_thread_pointer = TP_CP15;
1542 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1545 /* Use the cp15 method if it is available. */
1546 if (target_thread_pointer == TP_AUTO)
1548 if (arm_arch6k && !TARGET_THUMB)
1549 target_thread_pointer = TP_CP15;
1551 target_thread_pointer = TP_SOFT;
1554 if (TARGET_HARD_TP && TARGET_THUMB1)
1555 error ("can not use -mtp=cp15 with 16-bit Thumb");
1557 /* Override the default structure alignment for AAPCS ABI. */
1558 if (TARGET_AAPCS_BASED)
1559 arm_structure_size_boundary = 8;
1561 if (structure_size_string != NULL)
1563 int size = strtol (structure_size_string, NULL, 0);
1565 if (size == 8 || size == 32
1566 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1567 arm_structure_size_boundary = size;
1569 warning (0, "structure size boundary can only be set to %s",
1570 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1573 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1575 error ("RTP PIC is incompatible with Thumb");
1579 /* If stack checking is disabled, we can use r10 as the PIC register,
1580 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1581 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1583 if (TARGET_VXWORKS_RTP)
1584 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1585 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1588 if (flag_pic && TARGET_VXWORKS_RTP)
1589 arm_pic_register = 9;
1591 if (arm_pic_register_string != NULL)
1593 int pic_register = decode_reg_name (arm_pic_register_string);
1596 warning (0, "-mpic-register= is useless without -fpic");
1598 /* Prevent the user from choosing an obviously stupid PIC register. */
1599 else if (pic_register < 0 || call_used_regs[pic_register]
1600 || pic_register == HARD_FRAME_POINTER_REGNUM
1601 || pic_register == STACK_POINTER_REGNUM
1602 || pic_register >= PC_REGNUM
1603 || (TARGET_VXWORKS_RTP
1604 && (unsigned int) pic_register != arm_pic_register))
1605 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1607 arm_pic_register = pic_register;
1610 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1611 if (fix_cm3_ldrd == 2)
1613 if (selected_cpu == cortexm3)
1619 /* ??? We might want scheduling for thumb2. */
1620 if (TARGET_THUMB && flag_schedule_insns)
1622 /* Don't warn since it's on by default in -O2. */
1623 flag_schedule_insns = 0;
1628 arm_constant_limit = 1;
1630 /* If optimizing for size, bump the number of instructions that we
1631 are prepared to conditionally execute (even on a StrongARM). */
1632 max_insns_skipped = 6;
1636 /* For processors with load scheduling, it never costs more than
1637 2 cycles to load a constant, and the load scheduler may well
1638 reduce that to 1. */
1640 arm_constant_limit = 1;
1642 /* On XScale the longer latency of a load makes it more difficult
1643 to achieve a good schedule, so it's faster to synthesize
1644 constants that can be done in two insns. */
1645 if (arm_tune_xscale)
1646 arm_constant_limit = 2;
1648 /* StrongARM has early execution of branches, so a sequence
1649 that is worth skipping is shorter. */
1650 if (arm_tune_strongarm)
1651 max_insns_skipped = 3;
1654 /* Register global variables with the garbage collector. */
1655 arm_add_gc_roots ();
1659 arm_add_gc_roots (void)
1661 gcc_obstack_init(&minipool_obstack);
1662 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1665 /* A table of known ARM exception types.
1666 For use with the interrupt function attribute. */
1670 const char *const arg;
1671 const unsigned long return_value;
1675 static const isr_attribute_arg isr_attribute_args [] =
1677 { "IRQ", ARM_FT_ISR },
1678 { "irq", ARM_FT_ISR },
1679 { "FIQ", ARM_FT_FIQ },
1680 { "fiq", ARM_FT_FIQ },
1681 { "ABORT", ARM_FT_ISR },
1682 { "abort", ARM_FT_ISR },
1683 { "ABORT", ARM_FT_ISR },
1684 { "abort", ARM_FT_ISR },
1685 { "UNDEF", ARM_FT_EXCEPTION },
1686 { "undef", ARM_FT_EXCEPTION },
1687 { "SWI", ARM_FT_EXCEPTION },
1688 { "swi", ARM_FT_EXCEPTION },
1689 { NULL, ARM_FT_NORMAL }
1692 /* Returns the (interrupt) function type of the current
1693 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1695 static unsigned long
1696 arm_isr_value (tree argument)
1698 const isr_attribute_arg * ptr;
1702 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1704 /* No argument - default to IRQ. */
1705 if (argument == NULL_TREE)
1708 /* Get the value of the argument. */
1709 if (TREE_VALUE (argument) == NULL_TREE
1710 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1711 return ARM_FT_UNKNOWN;
1713 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1715 /* Check it against the list of known arguments. */
1716 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1717 if (streq (arg, ptr->arg))
1718 return ptr->return_value;
1720 /* An unrecognized interrupt type. */
1721 return ARM_FT_UNKNOWN;
1724 /* Computes the type of the current function. */
1726 static unsigned long
1727 arm_compute_func_type (void)
1729 unsigned long type = ARM_FT_UNKNOWN;
1733 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1735 /* Decide if the current function is volatile. Such functions
1736 never return, and many memory cycles can be saved by not storing
1737 register values that will never be needed again. This optimization
1738 was added to speed up context switching in a kernel application. */
1740 && (TREE_NOTHROW (current_function_decl)
1741 || !(flag_unwind_tables
1742 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1743 && TREE_THIS_VOLATILE (current_function_decl))
1744 type |= ARM_FT_VOLATILE;
1746 if (cfun->static_chain_decl != NULL)
1747 type |= ARM_FT_NESTED;
1749 attr = DECL_ATTRIBUTES (current_function_decl);
1751 a = lookup_attribute ("naked", attr);
1753 type |= ARM_FT_NAKED;
1755 a = lookup_attribute ("isr", attr);
1757 a = lookup_attribute ("interrupt", attr);
1760 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1762 type |= arm_isr_value (TREE_VALUE (a));
1767 /* Returns the type of the current function. */
1770 arm_current_func_type (void)
1772 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1773 cfun->machine->func_type = arm_compute_func_type ();
1775 return cfun->machine->func_type;
1779 arm_allocate_stack_slots_for_args (void)
1781 /* Naked functions should not allocate stack slots for arguments. */
1782 return !IS_NAKED (arm_current_func_type ());
1786 /* Return 1 if it is possible to return using a single instruction.
1787 If SIBLING is non-null, this is a test for a return before a sibling
1788 call. SIBLING is the call insn, so we can examine its register usage. */
1791 use_return_insn (int iscond, rtx sibling)
1794 unsigned int func_type;
1795 unsigned long saved_int_regs;
1796 unsigned HOST_WIDE_INT stack_adjust;
1797 arm_stack_offsets *offsets;
1799 /* Never use a return instruction before reload has run. */
1800 if (!reload_completed)
1803 func_type = arm_current_func_type ();
1805 /* Naked, volatile and stack alignment functions need special
1807 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1810 /* So do interrupt functions that use the frame pointer and Thumb
1811 interrupt functions. */
1812 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1815 offsets = arm_get_frame_offsets ();
1816 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1818 /* As do variadic functions. */
1819 if (crtl->args.pretend_args_size
1820 || cfun->machine->uses_anonymous_args
1821 /* Or if the function calls __builtin_eh_return () */
1822 || crtl->calls_eh_return
1823 /* Or if the function calls alloca */
1824 || cfun->calls_alloca
1825 /* Or if there is a stack adjustment. However, if the stack pointer
1826 is saved on the stack, we can use a pre-incrementing stack load. */
1827 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1828 && stack_adjust == 4)))
1831 saved_int_regs = offsets->saved_regs_mask;
1833 /* Unfortunately, the insn
1835 ldmib sp, {..., sp, ...}
1837 triggers a bug on most SA-110 based devices, such that the stack
1838 pointer won't be correctly restored if the instruction takes a
1839 page fault. We work around this problem by popping r3 along with
1840 the other registers, since that is never slower than executing
1841 another instruction.
1843 We test for !arm_arch5 here, because code for any architecture
1844 less than this could potentially be run on one of the buggy
1846 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1848 /* Validate that r3 is a call-clobbered register (always true in
1849 the default abi) ... */
1850 if (!call_used_regs[3])
1853 /* ... that it isn't being used for a return value ... */
1854 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1857 /* ... or for a tail-call argument ... */
1860 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1862 if (find_regno_fusage (sibling, USE, 3))
1866 /* ... and that there are no call-saved registers in r0-r2
1867 (always true in the default ABI). */
1868 if (saved_int_regs & 0x7)
1872 /* Can't be done if interworking with Thumb, and any registers have been
1874 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1877 /* On StrongARM, conditional returns are expensive if they aren't
1878 taken and multiple registers have been stacked. */
1879 if (iscond && arm_tune_strongarm)
1881 /* Conditional return when just the LR is stored is a simple
1882 conditional-load instruction, that's not expensive. */
1883 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1887 && arm_pic_register != INVALID_REGNUM
1888 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1892 /* If there are saved registers but the LR isn't saved, then we need
1893 two instructions for the return. */
1894 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1897 /* Can't be done if any of the FPA regs are pushed,
1898 since this also requires an insn. */
1899 if (TARGET_HARD_FLOAT && TARGET_FPA)
1900 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1901 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1904 /* Likewise VFP regs. */
1905 if (TARGET_HARD_FLOAT && TARGET_VFP)
1906 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1907 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1910 if (TARGET_REALLY_IWMMXT)
1911 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1912 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1918 /* Return TRUE if int I is a valid immediate ARM constant. */
1921 const_ok_for_arm (HOST_WIDE_INT i)
1925 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1926 be all zero, or all one. */
1927 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1928 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1929 != ((~(unsigned HOST_WIDE_INT) 0)
1930 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1933 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1935 /* Fast return for 0 and small values. We must do this for zero, since
1936 the code below can't handle that one case. */
1937 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1940 /* Get the number of trailing zeros. */
1941 lowbit = ffs((int) i) - 1;
1943 /* Only even shifts are allowed in ARM mode so round down to the
1944 nearest even number. */
1948 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1953 /* Allow rotated constants in ARM mode. */
1955 && ((i & ~0xc000003f) == 0
1956 || (i & ~0xf000000f) == 0
1957 || (i & ~0xfc000003) == 0))
1964 /* Allow repeated pattern. */
1967 if (i == v || i == (v | (v << 8)))
1974 /* Return true if I is a valid constant for the operation CODE. */
1976 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1978 if (const_ok_for_arm (i))
2002 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2004 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2010 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2017 /* Emit a sequence of insns to handle a large constant.
2018 CODE is the code of the operation required, it can be any of SET, PLUS,
2019 IOR, AND, XOR, MINUS;
2020 MODE is the mode in which the operation is being performed;
2021 VAL is the integer to operate on;
2022 SOURCE is the other operand (a register, or a null-pointer for SET);
2023 SUBTARGETS means it is safe to create scratch registers if that will
2024 either produce a simpler sequence, or we will want to cse the values.
2025 Return value is the number of insns emitted. */
2027 /* ??? Tweak this for thumb2. */
2029 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2030 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2034 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2035 cond = COND_EXEC_TEST (PATTERN (insn));
2039 if (subtargets || code == SET
2040 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2041 && REGNO (target) != REGNO (source)))
2043 /* After arm_reorg has been called, we can't fix up expensive
2044 constants by pushing them into memory so we must synthesize
2045 them in-line, regardless of the cost. This is only likely to
2046 be more costly on chips that have load delay slots and we are
2047 compiling without running the scheduler (so no splitting
2048 occurred before the final instruction emission).
2050 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2052 if (!after_arm_reorg
2054 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2056 > arm_constant_limit + (code != SET)))
2060 /* Currently SET is the only monadic value for CODE, all
2061 the rest are diadic. */
2062 if (TARGET_USE_MOVT)
2063 arm_emit_movpair (target, GEN_INT (val));
2065 emit_set_insn (target, GEN_INT (val));
2071 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2073 if (TARGET_USE_MOVT)
2074 arm_emit_movpair (temp, GEN_INT (val));
2076 emit_set_insn (temp, GEN_INT (val));
2078 /* For MINUS, the value is subtracted from, since we never
2079 have subtraction of a constant. */
2081 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2083 emit_set_insn (target,
2084 gen_rtx_fmt_ee (code, mode, source, temp));
2090 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2094 /* Return the number of ARM instructions required to synthesize the given
2097 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2099 HOST_WIDE_INT temp1;
2107 if (remainder & (3 << (i - 2)))
2112 temp1 = remainder & ((0x0ff << end)
2113 | ((i < end) ? (0xff >> (32 - end)) : 0));
2114 remainder &= ~temp1;
2119 } while (remainder);
2123 /* Emit an instruction with the indicated PATTERN. If COND is
2124 non-NULL, conditionalize the execution of the instruction on COND
2128 emit_constant_insn (rtx cond, rtx pattern)
2131 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2132 emit_insn (pattern);
2135 /* As above, but extra parameter GENERATE which, if clear, suppresses
2137 /* ??? This needs more work for thumb2. */
2140 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2141 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2146 int can_negate_initial = 0;
2149 int num_bits_set = 0;
2150 int set_sign_bit_copies = 0;
2151 int clear_sign_bit_copies = 0;
2152 int clear_zero_bit_copies = 0;
2153 int set_zero_bit_copies = 0;
2155 unsigned HOST_WIDE_INT temp1, temp2;
2156 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2158 /* Find out which operations are safe for a given CODE. Also do a quick
2159 check for degenerate cases; these can occur when DImode operations
2171 can_negate_initial = 1;
2175 if (remainder == 0xffffffff)
2178 emit_constant_insn (cond,
2179 gen_rtx_SET (VOIDmode, target,
2180 GEN_INT (ARM_SIGN_EXTEND (val))));
2185 if (reload_completed && rtx_equal_p (target, source))
2188 emit_constant_insn (cond,
2189 gen_rtx_SET (VOIDmode, target, source));
2198 emit_constant_insn (cond,
2199 gen_rtx_SET (VOIDmode, target, const0_rtx));
2202 if (remainder == 0xffffffff)
2204 if (reload_completed && rtx_equal_p (target, source))
2207 emit_constant_insn (cond,
2208 gen_rtx_SET (VOIDmode, target, source));
2217 if (reload_completed && rtx_equal_p (target, source))
2220 emit_constant_insn (cond,
2221 gen_rtx_SET (VOIDmode, target, source));
2225 /* We don't know how to handle other cases yet. */
2226 gcc_assert (remainder == 0xffffffff);
2229 emit_constant_insn (cond,
2230 gen_rtx_SET (VOIDmode, target,
2231 gen_rtx_NOT (mode, source)));
2235 /* We treat MINUS as (val - source), since (source - val) is always
2236 passed as (source + (-val)). */
2240 emit_constant_insn (cond,
2241 gen_rtx_SET (VOIDmode, target,
2242 gen_rtx_NEG (mode, source)));
2245 if (const_ok_for_arm (val))
2248 emit_constant_insn (cond,
2249 gen_rtx_SET (VOIDmode, target,
2250 gen_rtx_MINUS (mode, GEN_INT (val),
2262 /* If we can do it in one insn get out quickly. */
2263 if (const_ok_for_arm (val)
2264 || (can_negate_initial && const_ok_for_arm (-val))
2265 || (can_invert && const_ok_for_arm (~val)))
2268 emit_constant_insn (cond,
2269 gen_rtx_SET (VOIDmode, target,
2271 ? gen_rtx_fmt_ee (code, mode, source,
2277 /* Calculate a few attributes that may be useful for specific
2279 for (i = 31; i >= 0; i--)
2281 if ((remainder & (1 << i)) == 0)
2282 clear_sign_bit_copies++;
2287 for (i = 31; i >= 0; i--)
2289 if ((remainder & (1 << i)) != 0)
2290 set_sign_bit_copies++;
2295 for (i = 0; i <= 31; i++)
2297 if ((remainder & (1 << i)) == 0)
2298 clear_zero_bit_copies++;
2303 for (i = 0; i <= 31; i++)
2305 if ((remainder & (1 << i)) != 0)
2306 set_zero_bit_copies++;
2314 /* See if we can use movw. */
2315 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2318 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2323 /* See if we can do this by sign_extending a constant that is known
2324 to be negative. This is a good, way of doing it, since the shift
2325 may well merge into a subsequent insn. */
2326 if (set_sign_bit_copies > 1)
2328 if (const_ok_for_arm
2329 (temp1 = ARM_SIGN_EXTEND (remainder
2330 << (set_sign_bit_copies - 1))))
2334 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2335 emit_constant_insn (cond,
2336 gen_rtx_SET (VOIDmode, new_src,
2338 emit_constant_insn (cond,
2339 gen_ashrsi3 (target, new_src,
2340 GEN_INT (set_sign_bit_copies - 1)));
2344 /* For an inverted constant, we will need to set the low bits,
2345 these will be shifted out of harm's way. */
2346 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2347 if (const_ok_for_arm (~temp1))
2351 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2352 emit_constant_insn (cond,
2353 gen_rtx_SET (VOIDmode, new_src,
2355 emit_constant_insn (cond,
2356 gen_ashrsi3 (target, new_src,
2357 GEN_INT (set_sign_bit_copies - 1)));
2363 /* See if we can calculate the value as the difference between two
2364 valid immediates. */
2365 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2367 int topshift = clear_sign_bit_copies & ~1;
2369 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2370 & (0xff000000 >> topshift));
2372 /* If temp1 is zero, then that means the 9 most significant
2373 bits of remainder were 1 and we've caused it to overflow.
2374 When topshift is 0 we don't need to do anything since we
2375 can borrow from 'bit 32'. */
2376 if (temp1 == 0 && topshift != 0)
2377 temp1 = 0x80000000 >> (topshift - 1);
2379 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2381 if (const_ok_for_arm (temp2))
2385 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2386 emit_constant_insn (cond,
2387 gen_rtx_SET (VOIDmode, new_src,
2389 emit_constant_insn (cond,
2390 gen_addsi3 (target, new_src,
2398 /* See if we can generate this by setting the bottom (or the top)
2399 16 bits, and then shifting these into the other half of the
2400 word. We only look for the simplest cases, to do more would cost
2401 too much. Be careful, however, not to generate this when the
2402 alternative would take fewer insns. */
2403 if (val & 0xffff0000)
2405 temp1 = remainder & 0xffff0000;
2406 temp2 = remainder & 0x0000ffff;
2408 /* Overlaps outside this range are best done using other methods. */
2409 for (i = 9; i < 24; i++)
2411 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2412 && !const_ok_for_arm (temp2))
2414 rtx new_src = (subtargets
2415 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2417 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2418 source, subtargets, generate);
2426 gen_rtx_ASHIFT (mode, source,
2433 /* Don't duplicate cases already considered. */
2434 for (i = 17; i < 24; i++)
2436 if (((temp1 | (temp1 >> i)) == remainder)
2437 && !const_ok_for_arm (temp1))
2439 rtx new_src = (subtargets
2440 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2442 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2443 source, subtargets, generate);
2448 gen_rtx_SET (VOIDmode, target,
2451 gen_rtx_LSHIFTRT (mode, source,
2462 /* If we have IOR or XOR, and the constant can be loaded in a
2463 single instruction, and we can find a temporary to put it in,
2464 then this can be done in two instructions instead of 3-4. */
2466 /* TARGET can't be NULL if SUBTARGETS is 0 */
2467 || (reload_completed && !reg_mentioned_p (target, source)))
2469 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2473 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2475 emit_constant_insn (cond,
2476 gen_rtx_SET (VOIDmode, sub,
2478 emit_constant_insn (cond,
2479 gen_rtx_SET (VOIDmode, target,
2480 gen_rtx_fmt_ee (code, mode,
2490 if (set_sign_bit_copies > 8
2491 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2495 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2496 rtx shift = GEN_INT (set_sign_bit_copies);
2500 gen_rtx_SET (VOIDmode, sub,
2502 gen_rtx_ASHIFT (mode,
2507 gen_rtx_SET (VOIDmode, target,
2509 gen_rtx_LSHIFTRT (mode, sub,
2515 if (set_zero_bit_copies > 8
2516 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2520 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2521 rtx shift = GEN_INT (set_zero_bit_copies);
2525 gen_rtx_SET (VOIDmode, sub,
2527 gen_rtx_LSHIFTRT (mode,
2532 gen_rtx_SET (VOIDmode, target,
2534 gen_rtx_ASHIFT (mode, sub,
2540 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2544 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2545 emit_constant_insn (cond,
2546 gen_rtx_SET (VOIDmode, sub,
2547 gen_rtx_NOT (mode, source)));
2550 sub = gen_reg_rtx (mode);
2551 emit_constant_insn (cond,
2552 gen_rtx_SET (VOIDmode, sub,
2553 gen_rtx_AND (mode, source,
2555 emit_constant_insn (cond,
2556 gen_rtx_SET (VOIDmode, target,
2557 gen_rtx_NOT (mode, sub)));
2564 /* See if two shifts will do 2 or more insn's worth of work. */
2565 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2567 HOST_WIDE_INT shift_mask = ((0xffffffff
2568 << (32 - clear_sign_bit_copies))
2571 if ((remainder | shift_mask) != 0xffffffff)
2575 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2576 insns = arm_gen_constant (AND, mode, cond,
2577 remainder | shift_mask,
2578 new_src, source, subtargets, 1);
2583 rtx targ = subtargets ? NULL_RTX : target;
2584 insns = arm_gen_constant (AND, mode, cond,
2585 remainder | shift_mask,
2586 targ, source, subtargets, 0);
2592 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2593 rtx shift = GEN_INT (clear_sign_bit_copies);
2595 emit_insn (gen_ashlsi3 (new_src, source, shift));
2596 emit_insn (gen_lshrsi3 (target, new_src, shift));
2602 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2604 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2606 if ((remainder | shift_mask) != 0xffffffff)
2610 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2612 insns = arm_gen_constant (AND, mode, cond,
2613 remainder | shift_mask,
2614 new_src, source, subtargets, 1);
2619 rtx targ = subtargets ? NULL_RTX : target;
2621 insns = arm_gen_constant (AND, mode, cond,
2622 remainder | shift_mask,
2623 targ, source, subtargets, 0);
2629 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2630 rtx shift = GEN_INT (clear_zero_bit_copies);
2632 emit_insn (gen_lshrsi3 (new_src, source, shift));
2633 emit_insn (gen_ashlsi3 (target, new_src, shift));
2645 for (i = 0; i < 32; i++)
2646 if (remainder & (1 << i))
2649 if (code == AND || (can_invert && num_bits_set > 16))
2650 remainder = (~remainder) & 0xffffffff;
2651 else if (code == PLUS && num_bits_set > 16)
2652 remainder = (-remainder) & 0xffffffff;
2659 /* Now try and find a way of doing the job in either two or three
2661 We start by looking for the largest block of zeros that are aligned on
2662 a 2-bit boundary, we then fill up the temps, wrapping around to the
2663 top of the word when we drop off the bottom.
2664 In the worst case this code should produce no more than four insns.
2665 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2666 best place to start. */
2668 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2674 int best_consecutive_zeros = 0;
2676 for (i = 0; i < 32; i += 2)
2678 int consecutive_zeros = 0;
2680 if (!(remainder & (3 << i)))
2682 while ((i < 32) && !(remainder & (3 << i)))
2684 consecutive_zeros += 2;
2687 if (consecutive_zeros > best_consecutive_zeros)
2689 best_consecutive_zeros = consecutive_zeros;
2690 best_start = i - consecutive_zeros;
2696 /* So long as it won't require any more insns to do so, it's
2697 desirable to emit a small constant (in bits 0...9) in the last
2698 insn. This way there is more chance that it can be combined with
2699 a later addressing insn to form a pre-indexed load or store
2700 operation. Consider:
2702 *((volatile int *)0xe0000100) = 1;
2703 *((volatile int *)0xe0000110) = 2;
2705 We want this to wind up as:
2709 str rB, [rA, #0x100]
2711 str rB, [rA, #0x110]
2713 rather than having to synthesize both large constants from scratch.
2715 Therefore, we calculate how many insns would be required to emit
2716 the constant starting from `best_start', and also starting from
2717 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2718 yield a shorter sequence, we may as well use zero. */
2720 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2721 && (count_insns_for_constant (remainder, 0) <=
2722 count_insns_for_constant (remainder, best_start)))
2726 /* Now start emitting the insns. */
2734 if (remainder & (3 << (i - 2)))
2739 temp1 = remainder & ((0x0ff << end)
2740 | ((i < end) ? (0xff >> (32 - end)) : 0));
2741 remainder &= ~temp1;
2745 rtx new_src, temp1_rtx;
2747 if (code == SET || code == MINUS)
2749 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2750 if (can_invert && code != MINUS)
2755 if (remainder && subtargets)
2756 new_src = gen_reg_rtx (mode);
2761 else if (can_negate)
2765 temp1 = trunc_int_for_mode (temp1, mode);
2766 temp1_rtx = GEN_INT (temp1);
2770 else if (code == MINUS)
2771 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2773 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2775 emit_constant_insn (cond,
2776 gen_rtx_SET (VOIDmode, new_src,
2786 else if (code == MINUS)
2795 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2808 /* Canonicalize a comparison so that we are more likely to recognize it.
2809 This can be done for a few constant compares, where we can make the
2810 immediate value easier to load. */
2813 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2816 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2817 unsigned HOST_WIDE_INT maxval;
2818 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2829 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2831 *op1 = GEN_INT (i + 1);
2832 return code == GT ? GE : LT;
2839 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2841 *op1 = GEN_INT (i - 1);
2842 return code == GE ? GT : LE;
2848 if (i != ~((unsigned HOST_WIDE_INT) 0)
2849 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2851 *op1 = GEN_INT (i + 1);
2852 return code == GTU ? GEU : LTU;
2859 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2861 *op1 = GEN_INT (i - 1);
2862 return code == GEU ? GTU : LEU;
2874 /* Define how to find the value returned by a function. */
2877 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2879 enum machine_mode mode;
2880 int unsignedp ATTRIBUTE_UNUSED;
2881 rtx r ATTRIBUTE_UNUSED;
2883 mode = TYPE_MODE (type);
2884 /* Promote integer types. */
2885 if (INTEGRAL_TYPE_P (type))
2886 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2888 /* Promotes small structs returned in a register to full-word size
2889 for big-endian AAPCS. */
2890 if (arm_return_in_msb (type))
2892 HOST_WIDE_INT size = int_size_in_bytes (type);
2893 if (size % UNITS_PER_WORD != 0)
2895 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2896 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2900 return LIBCALL_VALUE(mode);
2903 /* Determine the amount of memory needed to store the possible return
2904 registers of an untyped call. */
2906 arm_apply_result_size (void)
2912 if (TARGET_HARD_FLOAT_ABI)
2916 if (TARGET_MAVERICK)
2919 if (TARGET_IWMMXT_ABI)
2926 /* Decide whether a type should be returned in memory (true)
2927 or in a register (false). This is called as the target hook
2928 TARGET_RETURN_IN_MEMORY. */
2930 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2934 size = int_size_in_bytes (type);
2936 /* Vector values should be returned using ARM registers, not memory (unless
2937 they're over 16 bytes, which will break since we only have four
2938 call-clobbered registers to play with). */
2939 if (TREE_CODE (type) == VECTOR_TYPE)
2940 return (size < 0 || size > (4 * UNITS_PER_WORD));
2942 if (!AGGREGATE_TYPE_P (type) &&
2943 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2944 /* All simple types are returned in registers.
2945 For AAPCS, complex types are treated the same as aggregates. */
2948 if (arm_abi != ARM_ABI_APCS)
2950 /* ATPCS and later return aggregate types in memory only if they are
2951 larger than a word (or are variable size). */
2952 return (size < 0 || size > UNITS_PER_WORD);
2955 /* For the arm-wince targets we choose to be compatible with Microsoft's
2956 ARM and Thumb compilers, which always return aggregates in memory. */
2958 /* All structures/unions bigger than one word are returned in memory.
2959 Also catch the case where int_size_in_bytes returns -1. In this case
2960 the aggregate is either huge or of variable size, and in either case
2961 we will want to return it via memory and not in a register. */
2962 if (size < 0 || size > UNITS_PER_WORD)
2965 if (TREE_CODE (type) == RECORD_TYPE)
2969 /* For a struct the APCS says that we only return in a register
2970 if the type is 'integer like' and every addressable element
2971 has an offset of zero. For practical purposes this means
2972 that the structure can have at most one non bit-field element
2973 and that this element must be the first one in the structure. */
2975 /* Find the first field, ignoring non FIELD_DECL things which will
2976 have been created by C++. */
2977 for (field = TYPE_FIELDS (type);
2978 field && TREE_CODE (field) != FIELD_DECL;
2979 field = TREE_CHAIN (field))
2983 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2985 /* Check that the first field is valid for returning in a register. */
2987 /* ... Floats are not allowed */
2988 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2991 /* ... Aggregates that are not themselves valid for returning in
2992 a register are not allowed. */
2993 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2996 /* Now check the remaining fields, if any. Only bitfields are allowed,
2997 since they are not addressable. */
2998 for (field = TREE_CHAIN (field);
3000 field = TREE_CHAIN (field))
3002 if (TREE_CODE (field) != FIELD_DECL)
3005 if (!DECL_BIT_FIELD_TYPE (field))
3012 if (TREE_CODE (type) == UNION_TYPE)
3016 /* Unions can be returned in registers if every element is
3017 integral, or can be returned in an integer register. */
3018 for (field = TYPE_FIELDS (type);
3020 field = TREE_CHAIN (field))
3022 if (TREE_CODE (field) != FIELD_DECL)
3025 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3028 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3034 #endif /* not ARM_WINCE */
3036 /* Return all other types in memory. */
3040 /* Indicate whether or not words of a double are in big-endian order. */
3043 arm_float_words_big_endian (void)
3045 if (TARGET_MAVERICK)
3048 /* For FPA, float words are always big-endian. For VFP, floats words
3049 follow the memory system mode. */
3057 return (TARGET_BIG_END ? 1 : 0);
3062 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3063 for a call to a function whose data type is FNTYPE.
3064 For a library call, FNTYPE is NULL. */
3066 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3067 rtx libname ATTRIBUTE_UNUSED,
3068 tree fndecl ATTRIBUTE_UNUSED)
3070 /* On the ARM, the offset starts at 0. */
3072 pcum->iwmmxt_nregs = 0;
3073 pcum->can_split = true;
3075 /* Varargs vectors are treated the same as long long.
3076 named_count avoids having to change the way arm handles 'named' */
3077 pcum->named_count = 0;
3080 if (TARGET_REALLY_IWMMXT && fntype)
3084 for (fn_arg = TYPE_ARG_TYPES (fntype);
3086 fn_arg = TREE_CHAIN (fn_arg))
3087 pcum->named_count += 1;
3089 if (! pcum->named_count)
3090 pcum->named_count = INT_MAX;
3095 /* Return true if mode/type need doubleword alignment. */
3097 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3099 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3100 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3104 /* Determine where to put an argument to a function.
3105 Value is zero to push the argument on the stack,
3106 or a hard register in which to store the argument.
3108 MODE is the argument's machine mode.
3109 TYPE is the data type of the argument (as a tree).
3110 This is null for libcalls where that information may
3112 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3113 the preceding args and about the function being called.
3114 NAMED is nonzero if this argument is a named parameter
3115 (otherwise it is an extra parameter matching an ellipsis). */
3118 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3119 tree type, int named)
3123 /* Varargs vectors are treated the same as long long.
3124 named_count avoids having to change the way arm handles 'named' */
3125 if (TARGET_IWMMXT_ABI
3126 && arm_vector_mode_supported_p (mode)
3127 && pcum->named_count > pcum->nargs + 1)
3129 if (pcum->iwmmxt_nregs <= 9)
3130 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3133 pcum->can_split = false;
3138 /* Put doubleword aligned quantities in even register pairs. */
3140 && ARM_DOUBLEWORD_ALIGN
3141 && arm_needs_doubleword_align (mode, type))
3144 if (mode == VOIDmode)
3145 /* Pick an arbitrary value for operand 2 of the call insn. */
3148 /* Only allow splitting an arg between regs and memory if all preceding
3149 args were allocated to regs. For args passed by reference we only count
3150 the reference pointer. */
3151 if (pcum->can_split)
3154 nregs = ARM_NUM_REGS2 (mode, type);
3156 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3159 return gen_rtx_REG (mode, pcum->nregs);
3163 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3164 tree type, bool named ATTRIBUTE_UNUSED)
3166 int nregs = pcum->nregs;
3168 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3171 if (NUM_ARG_REGS > nregs
3172 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3174 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3179 /* Variable sized types are passed by reference. This is a GCC
3180 extension to the ARM ABI. */
3183 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3184 enum machine_mode mode ATTRIBUTE_UNUSED,
3185 const_tree type, bool named ATTRIBUTE_UNUSED)
3187 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3190 /* Encode the current state of the #pragma [no_]long_calls. */
3193 OFF, /* No #pragma [no_]long_calls is in effect. */
3194 LONG, /* #pragma long_calls is in effect. */
3195 SHORT /* #pragma no_long_calls is in effect. */
3198 static arm_pragma_enum arm_pragma_long_calls = OFF;
3201 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3203 arm_pragma_long_calls = LONG;
3207 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3209 arm_pragma_long_calls = SHORT;
3213 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3215 arm_pragma_long_calls = OFF;
3218 /* Table of machine attributes. */
3219 const struct attribute_spec arm_attribute_table[] =
3221 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3222 /* Function calls made to this symbol must be done indirectly, because
3223 it may lie outside of the 26 bit addressing range of a normal function
3225 { "long_call", 0, 0, false, true, true, NULL },
3226 /* Whereas these functions are always known to reside within the 26 bit
3227 addressing range. */
3228 { "short_call", 0, 0, false, true, true, NULL },
3229 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3230 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3231 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3232 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3234 /* ARM/PE has three new attributes:
3236 dllexport - for exporting a function/variable that will live in a dll
3237 dllimport - for importing a function/variable from a dll
3239 Microsoft allows multiple declspecs in one __declspec, separating
3240 them with spaces. We do NOT support this. Instead, use __declspec
3243 { "dllimport", 0, 0, true, false, false, NULL },
3244 { "dllexport", 0, 0, true, false, false, NULL },
3245 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3246 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3247 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3248 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3249 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3251 { NULL, 0, 0, false, false, false, NULL }
3254 /* Handle an attribute requiring a FUNCTION_DECL;
3255 arguments as in struct attribute_spec.handler. */
3257 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3258 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3260 if (TREE_CODE (*node) != FUNCTION_DECL)
3262 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3263 IDENTIFIER_POINTER (name));
3264 *no_add_attrs = true;
3270 /* Handle an "interrupt" or "isr" attribute;
3271 arguments as in struct attribute_spec.handler. */
3273 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3278 if (TREE_CODE (*node) != FUNCTION_DECL)
3280 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3281 IDENTIFIER_POINTER (name));
3282 *no_add_attrs = true;
3284 /* FIXME: the argument if any is checked for type attributes;
3285 should it be checked for decl ones? */
3289 if (TREE_CODE (*node) == FUNCTION_TYPE
3290 || TREE_CODE (*node) == METHOD_TYPE)
3292 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3294 warning (OPT_Wattributes, "%qs attribute ignored",
3295 IDENTIFIER_POINTER (name));
3296 *no_add_attrs = true;
3299 else if (TREE_CODE (*node) == POINTER_TYPE
3300 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3301 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3302 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3304 *node = build_variant_type_copy (*node);
3305 TREE_TYPE (*node) = build_type_attribute_variant
3307 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3308 *no_add_attrs = true;
3312 /* Possibly pass this attribute on from the type to a decl. */
3313 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3314 | (int) ATTR_FLAG_FUNCTION_NEXT
3315 | (int) ATTR_FLAG_ARRAY_NEXT))
3317 *no_add_attrs = true;
3318 return tree_cons (name, args, NULL_TREE);
3322 warning (OPT_Wattributes, "%qs attribute ignored",
3323 IDENTIFIER_POINTER (name));
3331 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3332 /* Handle the "notshared" attribute. This attribute is another way of
3333 requesting hidden visibility. ARM's compiler supports
3334 "__declspec(notshared)"; we support the same thing via an
3338 arm_handle_notshared_attribute (tree *node,
3339 tree name ATTRIBUTE_UNUSED,
3340 tree args ATTRIBUTE_UNUSED,
3341 int flags ATTRIBUTE_UNUSED,
3344 tree decl = TYPE_NAME (*node);
3348 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3349 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3350 *no_add_attrs = false;
3356 /* Return 0 if the attributes for two types are incompatible, 1 if they
3357 are compatible, and 2 if they are nearly compatible (which causes a
3358 warning to be generated). */
3360 arm_comp_type_attributes (const_tree type1, const_tree type2)
3364 /* Check for mismatch of non-default calling convention. */
3365 if (TREE_CODE (type1) != FUNCTION_TYPE)
3368 /* Check for mismatched call attributes. */
3369 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3370 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3371 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3372 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3374 /* Only bother to check if an attribute is defined. */
3375 if (l1 | l2 | s1 | s2)
3377 /* If one type has an attribute, the other must have the same attribute. */
3378 if ((l1 != l2) || (s1 != s2))
3381 /* Disallow mixed attributes. */
3382 if ((l1 & s2) || (l2 & s1))
3386 /* Check for mismatched ISR attribute. */
3387 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3389 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3390 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3392 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3399 /* Assigns default attributes to newly defined type. This is used to
3400 set short_call/long_call attributes for function types of
3401 functions defined inside corresponding #pragma scopes. */
3403 arm_set_default_type_attributes (tree type)
3405 /* Add __attribute__ ((long_call)) to all functions, when
3406 inside #pragma long_calls or __attribute__ ((short_call)),
3407 when inside #pragma no_long_calls. */
3408 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3410 tree type_attr_list, attr_name;
3411 type_attr_list = TYPE_ATTRIBUTES (type);
3413 if (arm_pragma_long_calls == LONG)
3414 attr_name = get_identifier ("long_call");
3415 else if (arm_pragma_long_calls == SHORT)
3416 attr_name = get_identifier ("short_call");
3420 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3421 TYPE_ATTRIBUTES (type) = type_attr_list;
3425 /* Return true if DECL is known to be linked into section SECTION. */
3428 arm_function_in_section_p (tree decl, section *section)
3430 /* We can only be certain about functions defined in the same
3431 compilation unit. */
3432 if (!TREE_STATIC (decl))
3435 /* Make sure that SYMBOL always binds to the definition in this
3436 compilation unit. */
3437 if (!targetm.binds_local_p (decl))
3440 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3441 if (!DECL_SECTION_NAME (decl))
3443 /* Make sure that we will not create a unique section for DECL. */
3444 if (flag_function_sections || DECL_ONE_ONLY (decl))
3448 return function_section (decl) == section;
3451 /* Return nonzero if a 32-bit "long_call" should be generated for
3452 a call from the current function to DECL. We generate a long_call
3455 a. has an __attribute__((long call))
3456 or b. is within the scope of a #pragma long_calls
3457 or c. the -mlong-calls command line switch has been specified
3459 However we do not generate a long call if the function:
3461 d. has an __attribute__ ((short_call))
3462 or e. is inside the scope of a #pragma no_long_calls
3463 or f. is defined in the same section as the current function. */
3466 arm_is_long_call_p (tree decl)
3471 return TARGET_LONG_CALLS;
3473 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3474 if (lookup_attribute ("short_call", attrs))
3477 /* For "f", be conservative, and only cater for cases in which the
3478 whole of the current function is placed in the same section. */
3479 if (!flag_reorder_blocks_and_partition
3480 && TREE_CODE (decl) == FUNCTION_DECL
3481 && arm_function_in_section_p (decl, current_function_section ()))
3484 if (lookup_attribute ("long_call", attrs))
3487 return TARGET_LONG_CALLS;
3490 /* Return nonzero if it is ok to make a tail-call to DECL. */
3492 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3494 unsigned long func_type;
3496 if (cfun->machine->sibcall_blocked)
3499 /* Never tailcall something for which we have no decl, or if we
3500 are in Thumb mode. */
3501 if (decl == NULL || TARGET_THUMB)
3504 /* The PIC register is live on entry to VxWorks PLT entries, so we
3505 must make the call before restoring the PIC register. */
3506 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3509 /* Cannot tail-call to long calls, since these are out of range of
3510 a branch instruction. */
3511 if (arm_is_long_call_p (decl))
3514 /* If we are interworking and the function is not declared static
3515 then we can't tail-call it unless we know that it exists in this
3516 compilation unit (since it might be a Thumb routine). */
3517 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3520 func_type = arm_current_func_type ();
3521 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3522 if (IS_INTERRUPT (func_type))
3525 /* Never tailcall if function may be called with a misaligned SP. */
3526 if (IS_STACKALIGN (func_type))
3529 /* Everything else is ok. */
3534 /* Addressing mode support functions. */
3536 /* Return nonzero if X is a legitimate immediate operand when compiling
3537 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3539 legitimate_pic_operand_p (rtx x)
3541 if (GET_CODE (x) == SYMBOL_REF
3542 || (GET_CODE (x) == CONST
3543 && GET_CODE (XEXP (x, 0)) == PLUS
3544 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3550 /* Record that the current function needs a PIC register. Initialize
3551 cfun->machine->pic_reg if we have not already done so. */
3554 require_pic_register (void)
3556 /* A lot of the logic here is made obscure by the fact that this
3557 routine gets called as part of the rtx cost estimation process.
3558 We don't want those calls to affect any assumptions about the real
3559 function; and further, we can't call entry_of_function() until we
3560 start the real expansion process. */
3561 if (!crtl->uses_pic_offset_table)
3563 gcc_assert (can_create_pseudo_p ());
3564 if (arm_pic_register != INVALID_REGNUM)
3566 if (!cfun->machine->pic_reg)
3567 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3569 /* Play games to avoid marking the function as needing pic
3570 if we are being called as part of the cost-estimation
3572 if (current_ir_type () != IR_GIMPLE)
3573 crtl->uses_pic_offset_table = 1;
3579 if (!cfun->machine->pic_reg)
3580 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3582 /* Play games to avoid marking the function as needing pic
3583 if we are being called as part of the cost-estimation
3585 if (current_ir_type () != IR_GIMPLE)
3587 crtl->uses_pic_offset_table = 1;
3590 arm_load_pic_register (0UL);
3594 emit_insn_after (seq, entry_of_function ());
3601 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3603 if (GET_CODE (orig) == SYMBOL_REF
3604 || GET_CODE (orig) == LABEL_REF)
3606 rtx pic_ref, address;
3610 /* If this function doesn't have a pic register, create one now. */
3611 require_pic_register ();
3615 gcc_assert (can_create_pseudo_p ());
3616 reg = gen_reg_rtx (Pmode);
3622 address = gen_reg_rtx (Pmode);
3627 emit_insn (gen_pic_load_addr_arm (address, orig));
3628 else if (TARGET_THUMB2)
3629 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3630 else /* TARGET_THUMB1 */
3631 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3633 /* VxWorks does not impose a fixed gap between segments; the run-time
3634 gap can be different from the object-file gap. We therefore can't
3635 use GOTOFF unless we are absolutely sure that the symbol is in the
3636 same segment as the GOT. Unfortunately, the flexibility of linker
3637 scripts means that we can't be sure of that in general, so assume
3638 that GOTOFF is never valid on VxWorks. */
3639 if ((GET_CODE (orig) == LABEL_REF
3640 || (GET_CODE (orig) == SYMBOL_REF &&
3641 SYMBOL_REF_LOCAL_P (orig)))
3643 && !TARGET_VXWORKS_RTP)
3644 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3647 pic_ref = gen_const_mem (Pmode,
3648 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3652 insn = emit_move_insn (reg, pic_ref);
3654 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3656 set_unique_reg_note (insn, REG_EQUAL, orig);
3660 else if (GET_CODE (orig) == CONST)
3664 if (GET_CODE (XEXP (orig, 0)) == PLUS
3665 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3668 /* Handle the case where we have: const (UNSPEC_TLS). */
3669 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3670 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3673 /* Handle the case where we have:
3674 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3676 if (GET_CODE (XEXP (orig, 0)) == PLUS
3677 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3678 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3680 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3686 gcc_assert (can_create_pseudo_p ());
3687 reg = gen_reg_rtx (Pmode);
3690 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3692 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3693 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3694 base == reg ? 0 : reg);
3696 if (GET_CODE (offset) == CONST_INT)
3698 /* The base register doesn't really matter, we only want to
3699 test the index for the appropriate mode. */
3700 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3702 gcc_assert (can_create_pseudo_p ());
3703 offset = force_reg (Pmode, offset);
3706 if (GET_CODE (offset) == CONST_INT)
3707 return plus_constant (base, INTVAL (offset));
3710 if (GET_MODE_SIZE (mode) > 4
3711 && (GET_MODE_CLASS (mode) == MODE_INT
3712 || TARGET_SOFT_FLOAT))
3714 emit_insn (gen_addsi3 (reg, base, offset));
3718 return gen_rtx_PLUS (Pmode, base, offset);
3725 /* Find a spare register to use during the prolog of a function. */
3728 thumb_find_work_register (unsigned long pushed_regs_mask)
3732 /* Check the argument registers first as these are call-used. The
3733 register allocation order means that sometimes r3 might be used
3734 but earlier argument registers might not, so check them all. */
3735 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3736 if (!df_regs_ever_live_p (reg))
3739 /* Before going on to check the call-saved registers we can try a couple
3740 more ways of deducing that r3 is available. The first is when we are
3741 pushing anonymous arguments onto the stack and we have less than 4
3742 registers worth of fixed arguments(*). In this case r3 will be part of
3743 the variable argument list and so we can be sure that it will be
3744 pushed right at the start of the function. Hence it will be available
3745 for the rest of the prologue.
3746 (*): ie crtl->args.pretend_args_size is greater than 0. */
3747 if (cfun->machine->uses_anonymous_args
3748 && crtl->args.pretend_args_size > 0)
3749 return LAST_ARG_REGNUM;
3751 /* The other case is when we have fixed arguments but less than 4 registers
3752 worth. In this case r3 might be used in the body of the function, but
3753 it is not being used to convey an argument into the function. In theory
3754 we could just check crtl->args.size to see how many bytes are
3755 being passed in argument registers, but it seems that it is unreliable.
3756 Sometimes it will have the value 0 when in fact arguments are being
3757 passed. (See testcase execute/20021111-1.c for an example). So we also
3758 check the args_info.nregs field as well. The problem with this field is
3759 that it makes no allowances for arguments that are passed to the
3760 function but which are not used. Hence we could miss an opportunity
3761 when a function has an unused argument in r3. But it is better to be
3762 safe than to be sorry. */
3763 if (! cfun->machine->uses_anonymous_args
3764 && crtl->args.size >= 0
3765 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3766 && crtl->args.info.nregs < 4)
3767 return LAST_ARG_REGNUM;
3769 /* Otherwise look for a call-saved register that is going to be pushed. */
3770 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3771 if (pushed_regs_mask & (1 << reg))
3776 /* Thumb-2 can use high regs. */
3777 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3778 if (pushed_regs_mask & (1 << reg))
3781 /* Something went wrong - thumb_compute_save_reg_mask()
3782 should have arranged for a suitable register to be pushed. */
3786 static GTY(()) int pic_labelno;
3788 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3792 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3794 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3796 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3799 gcc_assert (flag_pic);
3801 pic_reg = cfun->machine->pic_reg;
3802 if (TARGET_VXWORKS_RTP)
3804 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3805 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3806 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3808 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3810 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3811 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3815 /* We use an UNSPEC rather than a LABEL_REF because this label
3816 never appears in the code stream. */
3818 labelno = GEN_INT (pic_labelno++);
3819 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3820 l1 = gen_rtx_CONST (VOIDmode, l1);
3822 /* On the ARM the PC register contains 'dot + 8' at the time of the
3823 addition, on the Thumb it is 'dot + 4'. */
3824 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3825 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3827 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3831 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3832 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3834 else if (TARGET_THUMB2)
3836 /* Thumb-2 only allows very limited access to the PC. Calculate the
3837 address in a temporary register. */
3838 if (arm_pic_register != INVALID_REGNUM)
3840 pic_tmp = gen_rtx_REG (SImode,
3841 thumb_find_work_register (saved_regs));
3845 gcc_assert (can_create_pseudo_p ());
3846 pic_tmp = gen_reg_rtx (Pmode);
3849 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3850 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3851 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3853 else /* TARGET_THUMB1 */
3855 if (arm_pic_register != INVALID_REGNUM
3856 && REGNO (pic_reg) > LAST_LO_REGNUM)
3858 /* We will have pushed the pic register, so we should always be
3859 able to find a work register. */
3860 pic_tmp = gen_rtx_REG (SImode,
3861 thumb_find_work_register (saved_regs));
3862 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3863 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3866 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3867 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3871 /* Need to emit this whether or not we obey regdecls,
3872 since setjmp/longjmp can cause life info to screw up. */
3877 /* Return nonzero if X is valid as an ARM state addressing register. */
3879 arm_address_register_rtx_p (rtx x, int strict_p)
3883 if (GET_CODE (x) != REG)
3889 return ARM_REGNO_OK_FOR_BASE_P (regno);
3891 return (regno <= LAST_ARM_REGNUM
3892 || regno >= FIRST_PSEUDO_REGISTER
3893 || regno == FRAME_POINTER_REGNUM
3894 || regno == ARG_POINTER_REGNUM);
3897 /* Return TRUE if this rtx is the difference of a symbol and a label,
3898 and will reduce to a PC-relative relocation in the object file.
3899 Expressions like this can be left alone when generating PIC, rather
3900 than forced through the GOT. */
3902 pcrel_constant_p (rtx x)
3904 if (GET_CODE (x) == MINUS)
3905 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3910 /* Return nonzero if X is a valid ARM state address operand. */
3912 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3916 enum rtx_code code = GET_CODE (x);
3918 if (arm_address_register_rtx_p (x, strict_p))
3921 use_ldrd = (TARGET_LDRD
3923 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3925 if (code == POST_INC || code == PRE_DEC
3926 || ((code == PRE_INC || code == POST_DEC)
3927 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3928 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3930 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3931 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3932 && GET_CODE (XEXP (x, 1)) == PLUS
3933 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3935 rtx addend = XEXP (XEXP (x, 1), 1);
3937 /* Don't allow ldrd post increment by register because it's hard
3938 to fixup invalid register choices. */
3940 && GET_CODE (x) == POST_MODIFY
3941 && GET_CODE (addend) == REG)
3944 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3945 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3948 /* After reload constants split into minipools will have addresses
3949 from a LABEL_REF. */
3950 else if (reload_completed
3951 && (code == LABEL_REF
3953 && GET_CODE (XEXP (x, 0)) == PLUS
3954 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3955 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3958 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3961 else if (code == PLUS)
3963 rtx xop0 = XEXP (x, 0);
3964 rtx xop1 = XEXP (x, 1);
3966 return ((arm_address_register_rtx_p (xop0, strict_p)
3967 && GET_CODE(xop1) == CONST_INT
3968 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3969 || (arm_address_register_rtx_p (xop1, strict_p)
3970 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3974 /* Reload currently can't handle MINUS, so disable this for now */
3975 else if (GET_CODE (x) == MINUS)
3977 rtx xop0 = XEXP (x, 0);
3978 rtx xop1 = XEXP (x, 1);
3980 return (arm_address_register_rtx_p (xop0, strict_p)
3981 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3985 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3986 && code == SYMBOL_REF
3987 && CONSTANT_POOL_ADDRESS_P (x)
3989 && symbol_mentioned_p (get_pool_constant (x))
3990 && ! pcrel_constant_p (get_pool_constant (x))))
3996 /* Return nonzero if X is a valid Thumb-2 address operand. */
3998 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4001 enum rtx_code code = GET_CODE (x);
4003 if (arm_address_register_rtx_p (x, strict_p))
4006 use_ldrd = (TARGET_LDRD
4008 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4010 if (code == POST_INC || code == PRE_DEC
4011 || ((code == PRE_INC || code == POST_DEC)
4012 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4013 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4015 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4016 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4017 && GET_CODE (XEXP (x, 1)) == PLUS
4018 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4020 /* Thumb-2 only has autoincrement by constant. */
4021 rtx addend = XEXP (XEXP (x, 1), 1);
4022 HOST_WIDE_INT offset;
4024 if (GET_CODE (addend) != CONST_INT)
4027 offset = INTVAL(addend);
4028 if (GET_MODE_SIZE (mode) <= 4)
4029 return (offset > -256 && offset < 256);
4031 return (use_ldrd && offset > -1024 && offset < 1024
4032 && (offset & 3) == 0);
4035 /* After reload constants split into minipools will have addresses
4036 from a LABEL_REF. */
4037 else if (reload_completed
4038 && (code == LABEL_REF
4040 && GET_CODE (XEXP (x, 0)) == PLUS
4041 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4042 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4045 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4048 else if (code == PLUS)
4050 rtx xop0 = XEXP (x, 0);
4051 rtx xop1 = XEXP (x, 1);
4053 return ((arm_address_register_rtx_p (xop0, strict_p)
4054 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4055 || (arm_address_register_rtx_p (xop1, strict_p)
4056 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4059 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4060 && code == SYMBOL_REF
4061 && CONSTANT_POOL_ADDRESS_P (x)
4063 && symbol_mentioned_p (get_pool_constant (x))
4064 && ! pcrel_constant_p (get_pool_constant (x))))
4070 /* Return nonzero if INDEX is valid for an address index operand in
4073 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4076 HOST_WIDE_INT range;
4077 enum rtx_code code = GET_CODE (index);
4079 /* Standard coprocessor addressing modes. */
4080 if (TARGET_HARD_FLOAT
4081 && (TARGET_FPA || TARGET_MAVERICK)
4082 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4083 || (TARGET_MAVERICK && mode == DImode)))
4084 return (code == CONST_INT && INTVAL (index) < 1024
4085 && INTVAL (index) > -1024
4086 && (INTVAL (index) & 3) == 0);
4089 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4090 return (code == CONST_INT
4091 && INTVAL (index) < 1016
4092 && INTVAL (index) > -1024
4093 && (INTVAL (index) & 3) == 0);
4095 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4096 return (code == CONST_INT
4097 && INTVAL (index) < 1024
4098 && INTVAL (index) > -1024
4099 && (INTVAL (index) & 3) == 0);
4101 if (arm_address_register_rtx_p (index, strict_p)
4102 && (GET_MODE_SIZE (mode) <= 4))
4105 if (mode == DImode || mode == DFmode)
4107 if (code == CONST_INT)
4109 HOST_WIDE_INT val = INTVAL (index);
4112 return val > -256 && val < 256;
4114 return val > -4096 && val < 4092;
4117 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4120 if (GET_MODE_SIZE (mode) <= 4
4123 || (mode == QImode && outer == SIGN_EXTEND))))
4127 rtx xiop0 = XEXP (index, 0);
4128 rtx xiop1 = XEXP (index, 1);
4130 return ((arm_address_register_rtx_p (xiop0, strict_p)
4131 && power_of_two_operand (xiop1, SImode))
4132 || (arm_address_register_rtx_p (xiop1, strict_p)
4133 && power_of_two_operand (xiop0, SImode)));
4135 else if (code == LSHIFTRT || code == ASHIFTRT
4136 || code == ASHIFT || code == ROTATERT)
4138 rtx op = XEXP (index, 1);
4140 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4141 && GET_CODE (op) == CONST_INT
4143 && INTVAL (op) <= 31);
4147 /* For ARM v4 we may be doing a sign-extend operation during the
4151 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
4157 range = (mode == HImode) ? 4095 : 4096;
4159 return (code == CONST_INT
4160 && INTVAL (index) < range
4161 && INTVAL (index) > -range);
4164 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4165 index operand. i.e. 1, 2, 4 or 8. */
4167 thumb2_index_mul_operand (rtx op)
4171 if (GET_CODE(op) != CONST_INT)
4175 return (val == 1 || val == 2 || val == 4 || val == 8);
4178 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4180 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4182 enum rtx_code code = GET_CODE (index);
4184 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4185 /* Standard coprocessor addressing modes. */
4186 if (TARGET_HARD_FLOAT
4187 && (TARGET_FPA || TARGET_MAVERICK)
4188 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4189 || (TARGET_MAVERICK && mode == DImode)))
4190 return (code == CONST_INT && INTVAL (index) < 1024
4191 && INTVAL (index) > -1024
4192 && (INTVAL (index) & 3) == 0);
4194 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4196 /* For DImode assume values will usually live in core regs
4197 and only allow LDRD addressing modes. */
4198 if (!TARGET_LDRD || mode != DImode)
4199 return (code == CONST_INT
4200 && INTVAL (index) < 1024
4201 && INTVAL (index) > -1024
4202 && (INTVAL (index) & 3) == 0);
4206 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4207 return (code == CONST_INT
4208 && INTVAL (index) < 1016
4209 && INTVAL (index) > -1024
4210 && (INTVAL (index) & 3) == 0);
4212 if (arm_address_register_rtx_p (index, strict_p)
4213 && (GET_MODE_SIZE (mode) <= 4))
4216 if (mode == DImode || mode == DFmode)
4218 HOST_WIDE_INT val = INTVAL (index);
4219 /* ??? Can we assume ldrd for thumb2? */
4220 /* Thumb-2 ldrd only has reg+const addressing modes. */
4221 if (code != CONST_INT)
4224 /* ldrd supports offsets of +-1020.
4225 However the ldr fallback does not. */
4226 return val > -256 && val < 256 && (val & 3) == 0;
4231 rtx xiop0 = XEXP (index, 0);
4232 rtx xiop1 = XEXP (index, 1);
4234 return ((arm_address_register_rtx_p (xiop0, strict_p)
4235 && thumb2_index_mul_operand (xiop1))
4236 || (arm_address_register_rtx_p (xiop1, strict_p)
4237 && thumb2_index_mul_operand (xiop0)));
4239 else if (code == ASHIFT)
4241 rtx op = XEXP (index, 1);
4243 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4244 && GET_CODE (op) == CONST_INT
4246 && INTVAL (op) <= 3);
4249 return (code == CONST_INT
4250 && INTVAL (index) < 4096
4251 && INTVAL (index) > -256);
4254 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4256 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4260 if (GET_CODE (x) != REG)
4266 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4268 return (regno <= LAST_LO_REGNUM
4269 || regno > LAST_VIRTUAL_REGISTER
4270 || regno == FRAME_POINTER_REGNUM
4271 || (GET_MODE_SIZE (mode) >= 4
4272 && (regno == STACK_POINTER_REGNUM
4273 || regno >= FIRST_PSEUDO_REGISTER
4274 || x == hard_frame_pointer_rtx
4275 || x == arg_pointer_rtx)));
4278 /* Return nonzero if x is a legitimate index register. This is the case
4279 for any base register that can access a QImode object. */
4281 thumb1_index_register_rtx_p (rtx x, int strict_p)
4283 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4286 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4288 The AP may be eliminated to either the SP or the FP, so we use the
4289 least common denominator, e.g. SImode, and offsets from 0 to 64.
4291 ??? Verify whether the above is the right approach.
4293 ??? Also, the FP may be eliminated to the SP, so perhaps that
4294 needs special handling also.
4296 ??? Look at how the mips16 port solves this problem. It probably uses
4297 better ways to solve some of these problems.
4299 Although it is not incorrect, we don't accept QImode and HImode
4300 addresses based on the frame pointer or arg pointer until the
4301 reload pass starts. This is so that eliminating such addresses
4302 into stack based ones won't produce impossible code. */
4304 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4306 /* ??? Not clear if this is right. Experiment. */
4307 if (GET_MODE_SIZE (mode) < 4
4308 && !(reload_in_progress || reload_completed)
4309 && (reg_mentioned_p (frame_pointer_rtx, x)
4310 || reg_mentioned_p (arg_pointer_rtx, x)
4311 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4312 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4313 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4314 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4317 /* Accept any base register. SP only in SImode or larger. */
4318 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4321 /* This is PC relative data before arm_reorg runs. */
4322 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4323 && GET_CODE (x) == SYMBOL_REF
4324 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4327 /* This is PC relative data after arm_reorg runs. */
4328 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4329 && (GET_CODE (x) == LABEL_REF
4330 || (GET_CODE (x) == CONST
4331 && GET_CODE (XEXP (x, 0)) == PLUS
4332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4333 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4336 /* Post-inc indexing only supported for SImode and larger. */
4337 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4338 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4341 else if (GET_CODE (x) == PLUS)
4343 /* REG+REG address can be any two index registers. */
4344 /* We disallow FRAME+REG addressing since we know that FRAME
4345 will be replaced with STACK, and SP relative addressing only
4346 permits SP+OFFSET. */
4347 if (GET_MODE_SIZE (mode) <= 4
4348 && XEXP (x, 0) != frame_pointer_rtx
4349 && XEXP (x, 1) != frame_pointer_rtx
4350 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4351 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4354 /* REG+const has 5-7 bit offset for non-SP registers. */
4355 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4356 || XEXP (x, 0) == arg_pointer_rtx)
4357 && GET_CODE (XEXP (x, 1)) == CONST_INT
4358 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4361 /* REG+const has 10-bit offset for SP, but only SImode and
4362 larger is supported. */
4363 /* ??? Should probably check for DI/DFmode overflow here
4364 just like GO_IF_LEGITIMATE_OFFSET does. */
4365 else if (GET_CODE (XEXP (x, 0)) == REG
4366 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4367 && GET_MODE_SIZE (mode) >= 4
4368 && GET_CODE (XEXP (x, 1)) == CONST_INT
4369 && INTVAL (XEXP (x, 1)) >= 0
4370 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4371 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4374 else if (GET_CODE (XEXP (x, 0)) == REG
4375 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4376 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4377 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4378 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4379 && GET_MODE_SIZE (mode) >= 4
4380 && GET_CODE (XEXP (x, 1)) == CONST_INT
4381 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4385 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4386 && GET_MODE_SIZE (mode) == 4
4387 && GET_CODE (x) == SYMBOL_REF
4388 && CONSTANT_POOL_ADDRESS_P (x)
4390 && symbol_mentioned_p (get_pool_constant (x))
4391 && ! pcrel_constant_p (get_pool_constant (x))))
4397 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4398 instruction of mode MODE. */
4400 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4402 switch (GET_MODE_SIZE (mode))
4405 return val >= 0 && val < 32;
4408 return val >= 0 && val < 64 && (val & 1) == 0;
4412 && (val + GET_MODE_SIZE (mode)) <= 128
4417 /* Build the SYMBOL_REF for __tls_get_addr. */
4419 static GTY(()) rtx tls_get_addr_libfunc;
4422 get_tls_get_addr (void)
4424 if (!tls_get_addr_libfunc)
4425 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4426 return tls_get_addr_libfunc;
4430 arm_load_tp (rtx target)
4433 target = gen_reg_rtx (SImode);
4437 /* Can return in any reg. */
4438 emit_insn (gen_load_tp_hard (target));
4442 /* Always returned in r0. Immediately copy the result into a pseudo,
4443 otherwise other uses of r0 (e.g. setting up function arguments) may
4444 clobber the value. */
4448 emit_insn (gen_load_tp_soft ());
4450 tmp = gen_rtx_REG (SImode, 0);
4451 emit_move_insn (target, tmp);
4457 load_tls_operand (rtx x, rtx reg)
4461 if (reg == NULL_RTX)
4462 reg = gen_reg_rtx (SImode);
4464 tmp = gen_rtx_CONST (SImode, x);
4466 emit_move_insn (reg, tmp);
4472 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4474 rtx insns, label, labelno, sum;
4478 labelno = GEN_INT (pic_labelno++);
4479 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4480 label = gen_rtx_CONST (VOIDmode, label);
4482 sum = gen_rtx_UNSPEC (Pmode,
4483 gen_rtvec (4, x, GEN_INT (reloc), label,
4484 GEN_INT (TARGET_ARM ? 8 : 4)),
4486 reg = load_tls_operand (sum, reg);
4489 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4490 else if (TARGET_THUMB2)
4493 /* Thumb-2 only allows very limited access to the PC. Calculate
4494 the address in a temporary register. */
4495 tmp = gen_reg_rtx (SImode);
4496 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4497 emit_insn (gen_addsi3(reg, reg, tmp));
4499 else /* TARGET_THUMB1 */
4500 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4502 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4503 Pmode, 1, reg, Pmode);
4505 insns = get_insns ();
4512 legitimize_tls_address (rtx x, rtx reg)
4514 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4515 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4519 case TLS_MODEL_GLOBAL_DYNAMIC:
4520 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4521 dest = gen_reg_rtx (Pmode);
4522 emit_libcall_block (insns, dest, ret, x);
4525 case TLS_MODEL_LOCAL_DYNAMIC:
4526 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4528 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4529 share the LDM result with other LD model accesses. */
4530 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4532 dest = gen_reg_rtx (Pmode);
4533 emit_libcall_block (insns, dest, ret, eqv);
4535 /* Load the addend. */
4536 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4538 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4539 return gen_rtx_PLUS (Pmode, dest, addend);
4541 case TLS_MODEL_INITIAL_EXEC:
4542 labelno = GEN_INT (pic_labelno++);
4543 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4544 label = gen_rtx_CONST (VOIDmode, label);
4545 sum = gen_rtx_UNSPEC (Pmode,
4546 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4547 GEN_INT (TARGET_ARM ? 8 : 4)),
4549 reg = load_tls_operand (sum, reg);
4552 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4553 else if (TARGET_THUMB2)
4556 /* Thumb-2 only allows very limited access to the PC. Calculate
4557 the address in a temporary register. */
4558 tmp = gen_reg_rtx (SImode);
4559 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4560 emit_insn (gen_addsi3(reg, reg, tmp));
4561 emit_move_insn (reg, gen_const_mem (SImode, reg));
4565 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4566 emit_move_insn (reg, gen_const_mem (SImode, reg));
4569 tp = arm_load_tp (NULL_RTX);
4571 return gen_rtx_PLUS (Pmode, tp, reg);
4573 case TLS_MODEL_LOCAL_EXEC:
4574 tp = arm_load_tp (NULL_RTX);
4576 reg = gen_rtx_UNSPEC (Pmode,
4577 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4579 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4581 return gen_rtx_PLUS (Pmode, tp, reg);
4588 /* Try machine-dependent ways of modifying an illegitimate address
4589 to be legitimate. If we find one, return the new, valid address. */
4591 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4593 if (arm_tls_symbol_p (x))
4594 return legitimize_tls_address (x, NULL_RTX);
4596 if (GET_CODE (x) == PLUS)
4598 rtx xop0 = XEXP (x, 0);
4599 rtx xop1 = XEXP (x, 1);
4601 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4602 xop0 = force_reg (SImode, xop0);
4604 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4605 xop1 = force_reg (SImode, xop1);
4607 if (ARM_BASE_REGISTER_RTX_P (xop0)
4608 && GET_CODE (xop1) == CONST_INT)
4610 HOST_WIDE_INT n, low_n;
4614 /* VFP addressing modes actually allow greater offsets, but for
4615 now we just stick with the lowest common denominator. */
4617 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4629 low_n = ((mode) == TImode ? 0
4630 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4634 base_reg = gen_reg_rtx (SImode);
4635 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4636 emit_move_insn (base_reg, val);
4637 x = plus_constant (base_reg, low_n);
4639 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4640 x = gen_rtx_PLUS (SImode, xop0, xop1);
4643 /* XXX We don't allow MINUS any more -- see comment in
4644 arm_legitimate_address_p (). */
4645 else if (GET_CODE (x) == MINUS)
4647 rtx xop0 = XEXP (x, 0);
4648 rtx xop1 = XEXP (x, 1);
4650 if (CONSTANT_P (xop0))
4651 xop0 = force_reg (SImode, xop0);
4653 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4654 xop1 = force_reg (SImode, xop1);
4656 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4657 x = gen_rtx_MINUS (SImode, xop0, xop1);
4660 /* Make sure to take full advantage of the pre-indexed addressing mode
4661 with absolute addresses which often allows for the base register to
4662 be factorized for multiple adjacent memory references, and it might
4663 even allows for the mini pool to be avoided entirely. */
4664 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4667 HOST_WIDE_INT mask, base, index;
4670 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4671 use a 8-bit index. So let's use a 12-bit index for SImode only and
4672 hope that arm_gen_constant will enable ldrb to use more bits. */
4673 bits = (mode == SImode) ? 12 : 8;
4674 mask = (1 << bits) - 1;
4675 base = INTVAL (x) & ~mask;
4676 index = INTVAL (x) & mask;
4677 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4679 /* It'll most probably be more efficient to generate the base
4680 with more bits set and use a negative index instead. */
4684 base_reg = force_reg (SImode, GEN_INT (base));
4685 x = plus_constant (base_reg, index);
4690 /* We need to find and carefully transform any SYMBOL and LABEL
4691 references; so go back to the original address expression. */
4692 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4694 if (new_x != orig_x)
4702 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4703 to be legitimate. If we find one, return the new, valid address. */
4705 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4707 if (arm_tls_symbol_p (x))
4708 return legitimize_tls_address (x, NULL_RTX);
4710 if (GET_CODE (x) == PLUS
4711 && GET_CODE (XEXP (x, 1)) == CONST_INT
4712 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4713 || INTVAL (XEXP (x, 1)) < 0))
4715 rtx xop0 = XEXP (x, 0);
4716 rtx xop1 = XEXP (x, 1);
4717 HOST_WIDE_INT offset = INTVAL (xop1);
4719 /* Try and fold the offset into a biasing of the base register and
4720 then offsetting that. Don't do this when optimizing for space
4721 since it can cause too many CSEs. */
4722 if (optimize_size && offset >= 0
4723 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4725 HOST_WIDE_INT delta;
4728 delta = offset - (256 - GET_MODE_SIZE (mode));
4729 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4730 delta = 31 * GET_MODE_SIZE (mode);
4732 delta = offset & (~31 * GET_MODE_SIZE (mode));
4734 xop0 = force_operand (plus_constant (xop0, offset - delta),
4736 x = plus_constant (xop0, delta);
4738 else if (offset < 0 && offset > -256)
4739 /* Small negative offsets are best done with a subtract before the
4740 dereference, forcing these into a register normally takes two
4742 x = force_operand (x, NULL_RTX);
4745 /* For the remaining cases, force the constant into a register. */
4746 xop1 = force_reg (SImode, xop1);
4747 x = gen_rtx_PLUS (SImode, xop0, xop1);
4750 else if (GET_CODE (x) == PLUS
4751 && s_register_operand (XEXP (x, 1), SImode)
4752 && !s_register_operand (XEXP (x, 0), SImode))
4754 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4756 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4761 /* We need to find and carefully transform any SYMBOL and LABEL
4762 references; so go back to the original address expression. */
4763 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4765 if (new_x != orig_x)
4773 thumb_legitimize_reload_address (rtx *x_p,
4774 enum machine_mode mode,
4775 int opnum, int type,
4776 int ind_levels ATTRIBUTE_UNUSED)
4780 if (GET_CODE (x) == PLUS
4781 && GET_MODE_SIZE (mode) < 4
4782 && REG_P (XEXP (x, 0))
4783 && XEXP (x, 0) == stack_pointer_rtx
4784 && GET_CODE (XEXP (x, 1)) == CONST_INT
4785 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4790 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4791 Pmode, VOIDmode, 0, 0, opnum, type);
4795 /* If both registers are hi-regs, then it's better to reload the
4796 entire expression rather than each register individually. That
4797 only requires one reload register rather than two. */
4798 if (GET_CODE (x) == PLUS
4799 && REG_P (XEXP (x, 0))
4800 && REG_P (XEXP (x, 1))
4801 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4802 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4807 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4808 Pmode, VOIDmode, 0, 0, opnum, type);
4815 /* Test for various thread-local symbols. */
4817 /* Return TRUE if X is a thread-local symbol. */
4820 arm_tls_symbol_p (rtx x)
4822 if (! TARGET_HAVE_TLS)
4825 if (GET_CODE (x) != SYMBOL_REF)
4828 return SYMBOL_REF_TLS_MODEL (x) != 0;
4831 /* Helper for arm_tls_referenced_p. */
4834 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4836 if (GET_CODE (*x) == SYMBOL_REF)
4837 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4839 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4840 TLS offsets, not real symbol references. */
4841 if (GET_CODE (*x) == UNSPEC
4842 && XINT (*x, 1) == UNSPEC_TLS)
4848 /* Return TRUE if X contains any TLS symbol references. */
4851 arm_tls_referenced_p (rtx x)
4853 if (! TARGET_HAVE_TLS)
4856 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4859 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4862 arm_cannot_force_const_mem (rtx x)
4866 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4868 split_const (x, &base, &offset);
4869 if (GET_CODE (base) == SYMBOL_REF
4870 && !offset_within_block_p (base, INTVAL (offset)))
4873 return arm_tls_referenced_p (x);
4876 #define REG_OR_SUBREG_REG(X) \
4877 (GET_CODE (X) == REG \
4878 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4880 #define REG_OR_SUBREG_RTX(X) \
4881 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4883 #ifndef COSTS_N_INSNS
4884 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4887 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4889 enum machine_mode mode = GET_MODE (x);
4902 return COSTS_N_INSNS (1);
4905 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4908 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4915 return COSTS_N_INSNS (2) + cycles;
4917 return COSTS_N_INSNS (1) + 16;
4920 return (COSTS_N_INSNS (1)
4921 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4922 + GET_CODE (SET_DEST (x)) == MEM));
4927 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4929 if (thumb_shiftable_const (INTVAL (x)))
4930 return COSTS_N_INSNS (2);
4931 return COSTS_N_INSNS (3);
4933 else if ((outer == PLUS || outer == COMPARE)
4934 && INTVAL (x) < 256 && INTVAL (x) > -256)
4936 else if (outer == AND
4937 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4938 return COSTS_N_INSNS (1);
4939 else if (outer == ASHIFT || outer == ASHIFTRT
4940 || outer == LSHIFTRT)
4942 return COSTS_N_INSNS (2);
4948 return COSTS_N_INSNS (3);
4966 /* XXX another guess. */
4967 /* Memory costs quite a lot for the first word, but subsequent words
4968 load at the equivalent of a single insn each. */
4969 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4970 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4975 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4980 /* XXX still guessing. */
4981 switch (GET_MODE (XEXP (x, 0)))
4984 return (1 + (mode == DImode ? 4 : 0)
4985 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4988 return (4 + (mode == DImode ? 4 : 0)
4989 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4992 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5004 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5006 enum machine_mode mode = GET_MODE (x);
5007 enum rtx_code subcode;
5009 enum rtx_code code = GET_CODE (x);
5016 /* Memory costs quite a lot for the first word, but subsequent words
5017 load at the equivalent of a single insn each. */
5018 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5025 if (TARGET_HARD_FLOAT && mode == SFmode)
5026 *total = COSTS_N_INSNS (2);
5027 else if (TARGET_HARD_FLOAT && mode == DFmode)
5028 *total = COSTS_N_INSNS (4);
5030 *total = COSTS_N_INSNS (20);
5034 if (GET_CODE (XEXP (x, 1)) == REG)
5035 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5036 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5037 *total = rtx_cost (XEXP (x, 1), code, speed);
5043 *total += COSTS_N_INSNS (4);
5048 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5049 *total += rtx_cost (XEXP (x, 0), code, speed);
5052 *total += COSTS_N_INSNS (3);
5056 *total += COSTS_N_INSNS (1);
5057 /* Increase the cost of complex shifts because they aren't any faster,
5058 and reduce dual issue opportunities. */
5059 if (arm_tune_cortex_a9
5060 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5068 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5070 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5071 *total = COSTS_N_INSNS (1);
5073 *total = COSTS_N_INSNS (20);
5076 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5077 /* Thumb2 does not have RSB, so all arguments must be
5078 registers (subtracting a constant is canonicalized as
5079 addition of the negated constant). */
5085 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5086 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5087 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5089 *total += rtx_cost (XEXP (x, 1), code, speed);
5093 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5094 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5096 *total += rtx_cost (XEXP (x, 0), code, speed);
5103 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5105 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5107 *total = COSTS_N_INSNS (1);
5108 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5109 && arm_const_double_rtx (XEXP (x, 0)))
5111 *total += rtx_cost (XEXP (x, 1), code, speed);
5115 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5116 && arm_const_double_rtx (XEXP (x, 1)))
5118 *total += rtx_cost (XEXP (x, 0), code, speed);
5124 *total = COSTS_N_INSNS (20);
5128 *total = COSTS_N_INSNS (1);
5129 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5130 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5132 *total += rtx_cost (XEXP (x, 1), code, speed);
5136 subcode = GET_CODE (XEXP (x, 1));
5137 if (subcode == ASHIFT || subcode == ASHIFTRT
5138 || subcode == LSHIFTRT
5139 || subcode == ROTATE || subcode == ROTATERT)
5141 *total += rtx_cost (XEXP (x, 0), code, speed);
5142 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5147 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5148 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
5149 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))
5151 *total += rtx_cost (XEXP (x, 0), code, speed);
5152 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5156 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5157 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5159 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5160 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5161 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5162 *total += COSTS_N_INSNS (1);
5170 if (code == PLUS && arm_arch6 && mode == SImode
5171 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5172 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5174 *total = COSTS_N_INSNS (1);
5175 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5177 *total += rtx_cost (XEXP (x, 1), code, speed);
5181 /* MLA: All arguments must be registers. We filter out
5182 multiplication by a power of two, so that we fall down into
5184 if (GET_CODE (XEXP (x, 0)) == MULT
5185 && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5186 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
5187 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
5189 /* The cost comes from the cost of the multiply. */
5193 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5195 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5197 *total = COSTS_N_INSNS (1);
5198 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5199 && arm_const_double_rtx (XEXP (x, 1)))
5201 *total += rtx_cost (XEXP (x, 0), code, speed);
5208 *total = COSTS_N_INSNS (20);
5212 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5213 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5215 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5216 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5217 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5218 *total += COSTS_N_INSNS (1);
5224 case AND: case XOR: case IOR:
5227 /* Normally the frame registers will be spilt into reg+const during
5228 reload, so it is a bad idea to combine them with other instructions,
5229 since then they might not be moved outside of loops. As a compromise
5230 we allow integration with ops that have a constant as their second
5232 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5233 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5234 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5235 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5236 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5241 *total += COSTS_N_INSNS (2);
5242 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5243 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5245 *total += rtx_cost (XEXP (x, 0), code, speed);
5252 *total += COSTS_N_INSNS (1);
5253 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5254 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5256 *total += rtx_cost (XEXP (x, 0), code, speed);
5259 subcode = GET_CODE (XEXP (x, 0));
5260 if (subcode == ASHIFT || subcode == ASHIFTRT
5261 || subcode == LSHIFTRT
5262 || subcode == ROTATE || subcode == ROTATERT)
5264 *total += rtx_cost (XEXP (x, 1), code, speed);
5265 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5270 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5271 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
5272 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
5274 *total += rtx_cost (XEXP (x, 1), code, speed);
5275 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5279 if (subcode == UMIN || subcode == UMAX
5280 || subcode == SMIN || subcode == SMAX)
5282 *total = COSTS_N_INSNS (3);
5289 /* This should have been handled by the CPU specific routines. */
5293 if (arm_arch3m && mode == SImode
5294 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5296 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5297 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5298 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5299 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5301 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5304 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5308 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5310 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5312 *total = COSTS_N_INSNS (1);
5315 *total = COSTS_N_INSNS (2);
5321 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5322 if (mode == SImode && code == NOT)
5324 subcode = GET_CODE (XEXP (x, 0));
5325 if (subcode == ASHIFT || subcode == ASHIFTRT
5326 || subcode == LSHIFTRT
5327 || subcode == ROTATE || subcode == ROTATERT
5329 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5330 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
5331 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
5333 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5334 /* Register shifts cost an extra cycle. */
5335 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5336 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5345 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5347 *total = COSTS_N_INSNS (4);
5351 operand = XEXP (x, 0);
5353 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5354 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5355 && GET_CODE (XEXP (operand, 0)) == REG
5356 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5357 *total += COSTS_N_INSNS (1);
5358 *total += (rtx_cost (XEXP (x, 1), code, speed)
5359 + rtx_cost (XEXP (x, 2), code, speed));
5363 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5365 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5371 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5372 && mode == SImode && XEXP (x, 1) == const0_rtx)
5374 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5380 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5381 && mode == SImode && XEXP (x, 1) == const0_rtx)
5383 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5403 /* SCC insns. In the case where the comparison has already been
5404 performed, then they cost 2 instructions. Otherwise they need
5405 an additional comparison before them. */
5406 *total = COSTS_N_INSNS (2);
5407 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5414 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5420 *total += COSTS_N_INSNS (1);
5421 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5422 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5424 *total += rtx_cost (XEXP (x, 0), code, speed);
5428 subcode = GET_CODE (XEXP (x, 0));
5429 if (subcode == ASHIFT || subcode == ASHIFTRT
5430 || subcode == LSHIFTRT
5431 || subcode == ROTATE || subcode == ROTATERT)
5433 *total += rtx_cost (XEXP (x, 1), code, speed);
5434 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5439 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5440 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
5441 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
5443 *total += rtx_cost (XEXP (x, 1), code, speed);
5444 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5454 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5455 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5456 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5457 *total += rtx_cost (XEXP (x, 1), code, speed);
5461 if (GET_MODE_CLASS (mode == MODE_FLOAT))
5463 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5465 *total = COSTS_N_INSNS (1);
5468 *total = COSTS_N_INSNS (20);
5471 *total = COSTS_N_INSNS (1);
5473 *total += COSTS_N_INSNS (3);
5477 if (GET_MODE_CLASS (mode) == MODE_INT)
5481 *total += COSTS_N_INSNS (1);
5483 if (GET_MODE (XEXP (x, 0)) != SImode)
5487 if (GET_CODE (XEXP (x, 0)) != MEM)
5488 *total += COSTS_N_INSNS (1);
5490 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5491 *total += COSTS_N_INSNS (2);
5500 if (GET_MODE_CLASS (mode) == MODE_INT)
5503 *total += COSTS_N_INSNS (1);
5505 if (GET_MODE (XEXP (x, 0)) != SImode)
5509 if (GET_CODE (XEXP (x, 0)) != MEM)
5510 *total += COSTS_N_INSNS (1);
5512 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5513 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5520 switch (GET_MODE (XEXP (x, 0)))
5527 *total = COSTS_N_INSNS (1);
5537 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5541 if (const_ok_for_arm (INTVAL (x))
5542 || const_ok_for_arm (~INTVAL (x)))
5543 *total = COSTS_N_INSNS (1);
5545 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5546 INTVAL (x), NULL_RTX,
5553 *total = COSTS_N_INSNS (3);
5557 *total = COSTS_N_INSNS (1);
5561 *total = COSTS_N_INSNS (1);
5562 *total += rtx_cost (XEXP (x, 0), code, speed);
5566 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5567 *total = COSTS_N_INSNS (1);
5569 *total = COSTS_N_INSNS (4);
5573 *total = COSTS_N_INSNS (4);
5578 /* RTX costs when optimizing for size. */
5580 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5583 enum machine_mode mode = GET_MODE (x);
5586 /* XXX TBD. For now, use the standard costs. */
5587 *total = thumb1_rtx_costs (x, code, outer_code);
5591 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5595 /* A memory access costs 1 insn if the mode is small, or the address is
5596 a single register, otherwise it costs one insn per word. */
5597 if (REG_P (XEXP (x, 0)))
5598 *total = COSTS_N_INSNS (1);
5600 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5607 /* Needs a libcall, so it costs about this. */
5608 *total = COSTS_N_INSNS (2);
5612 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5614 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5622 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5624 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5627 else if (mode == SImode)
5629 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5630 /* Slightly disparage register shifts, but not by much. */
5631 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5632 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5636 /* Needs a libcall. */
5637 *total = COSTS_N_INSNS (2);
5641 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5643 *total = COSTS_N_INSNS (1);
5649 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5650 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5652 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5653 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5654 || subcode1 == ROTATE || subcode1 == ROTATERT
5655 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5656 || subcode1 == ASHIFTRT)
5658 /* It's just the cost of the two operands. */
5663 *total = COSTS_N_INSNS (1);
5667 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5671 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5673 *total = COSTS_N_INSNS (1);
5678 case AND: case XOR: case IOR:
5681 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5683 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5684 || subcode == LSHIFTRT || subcode == ASHIFTRT
5685 || (code == AND && subcode == NOT))
5687 /* It's just the cost of the two operands. */
5693 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5697 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5701 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5703 *total = COSTS_N_INSNS (1);
5709 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5718 if (cc_register (XEXP (x, 0), VOIDmode))
5721 *total = COSTS_N_INSNS (1);
5725 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5726 *total = COSTS_N_INSNS (1);
5728 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5733 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5735 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5736 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5739 *total += COSTS_N_INSNS (1);
5744 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5746 switch (GET_MODE (XEXP (x, 0)))
5749 *total += COSTS_N_INSNS (1);
5753 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5759 *total += COSTS_N_INSNS (2);
5764 *total += COSTS_N_INSNS (1);
5769 if (const_ok_for_arm (INTVAL (x)))
5770 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5771 else if (const_ok_for_arm (~INTVAL (x)))
5772 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5773 else if (const_ok_for_arm (-INTVAL (x)))
5775 if (outer_code == COMPARE || outer_code == PLUS
5776 || outer_code == MINUS)
5779 *total = COSTS_N_INSNS (1);
5782 *total = COSTS_N_INSNS (2);
5788 *total = COSTS_N_INSNS (2);
5792 *total = COSTS_N_INSNS (4);
5797 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5798 cost of these slightly. */
5799 *total = COSTS_N_INSNS (1) + 1;
5803 if (mode != VOIDmode)
5804 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5806 *total = COSTS_N_INSNS (4); /* How knows? */
5811 /* RTX costs when optimizing for size. */
5813 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
5817 return arm_size_rtx_costs (x, code, outer_code, total);
5819 return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total,
5823 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5824 supported on any "slowmul" cores, so it can be ignored. */
5827 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5828 int *total, bool speed)
5830 enum machine_mode mode = GET_MODE (x);
5834 *total = thumb1_rtx_costs (x, code, outer_code);
5841 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5844 *total = COSTS_N_INSNS (20);
5848 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5850 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5851 & (unsigned HOST_WIDE_INT) 0xffffffff);
5852 int cost, const_ok = const_ok_for_arm (i);
5853 int j, booth_unit_size;
5855 /* Tune as appropriate. */
5856 cost = const_ok ? 4 : 8;
5857 booth_unit_size = 2;
5858 for (j = 0; i && j < 32; j += booth_unit_size)
5860 i >>= booth_unit_size;
5864 *total = COSTS_N_INSNS (cost);
5865 *total += rtx_cost (XEXP (x, 0), code, speed);
5869 *total = COSTS_N_INSNS (20);
5873 return arm_rtx_costs_1 (x, outer_code, total, speed);;
5878 /* RTX cost for cores with a fast multiply unit (M variants). */
5881 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5882 int *total, bool speed)
5884 enum machine_mode mode = GET_MODE (x);
5888 *total = thumb1_rtx_costs (x, code, outer_code);
5892 /* ??? should thumb2 use different costs? */
5896 /* There is no point basing this on the tuning, since it is always the
5897 fast variant if it exists at all. */
5899 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5900 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5901 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5903 *total = COSTS_N_INSNS(2);
5910 *total = COSTS_N_INSNS (5);
5914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5916 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5917 & (unsigned HOST_WIDE_INT) 0xffffffff);
5918 int cost, const_ok = const_ok_for_arm (i);
5919 int j, booth_unit_size;
5921 /* Tune as appropriate. */
5922 cost = const_ok ? 4 : 8;
5923 booth_unit_size = 8;
5924 for (j = 0; i && j < 32; j += booth_unit_size)
5926 i >>= booth_unit_size;
5930 *total = COSTS_N_INSNS(cost);
5936 *total = COSTS_N_INSNS (4);
5940 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5942 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5944 *total = COSTS_N_INSNS (1);
5949 /* Requires a lib call */
5950 *total = COSTS_N_INSNS (20);
5954 return arm_rtx_costs_1 (x, outer_code, total, speed);
5959 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5960 so it can be ignored. */
5963 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
5965 enum machine_mode mode = GET_MODE (x);
5969 *total = thumb1_rtx_costs (x, code, outer_code);
5976 if (GET_CODE (XEXP (x, 0)) != MULT)
5977 return arm_rtx_costs_1 (x, outer_code, total, speed);
5979 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5980 will stall until the multiplication is complete. */
5981 *total = COSTS_N_INSNS (3);
5985 /* There is no point basing this on the tuning, since it is always the
5986 fast variant if it exists at all. */
5988 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5989 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5990 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5992 *total = COSTS_N_INSNS (2);
5999 *total = COSTS_N_INSNS (5);
6003 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6005 /* If operand 1 is a constant we can more accurately
6006 calculate the cost of the multiply. The multiplier can
6007 retire 15 bits on the first cycle and a further 12 on the
6008 second. We do, of course, have to load the constant into
6009 a register first. */
6010 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6011 /* There's a general overhead of one cycle. */
6013 unsigned HOST_WIDE_INT masked_const;
6018 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6020 masked_const = i & 0xffff8000;
6021 if (masked_const != 0)
6024 masked_const = i & 0xf8000000;
6025 if (masked_const != 0)
6028 *total = COSTS_N_INSNS (cost);
6034 *total = COSTS_N_INSNS (3);
6038 /* Requires a lib call */
6039 *total = COSTS_N_INSNS (20);
6043 return arm_rtx_costs_1 (x, outer_code, total, speed);
6048 /* RTX costs for 9e (and later) cores. */
6051 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6052 int *total, bool speed)
6054 enum machine_mode mode = GET_MODE (x);
6061 *total = COSTS_N_INSNS (3);
6065 *total = thumb1_rtx_costs (x, code, outer_code);
6073 /* There is no point basing this on the tuning, since it is always the
6074 fast variant if it exists at all. */
6076 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6077 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6078 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6080 *total = COSTS_N_INSNS (2);
6087 *total = COSTS_N_INSNS (5);
6093 *total = COSTS_N_INSNS (2);
6097 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6099 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6101 *total = COSTS_N_INSNS (1);
6106 *total = COSTS_N_INSNS (20);
6110 return arm_rtx_costs_1 (x, outer_code, total, speed);
6113 /* All address computations that can be done are free, but rtx cost returns
6114 the same for practically all of them. So we weight the different types
6115 of address here in the order (most pref first):
6116 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6118 arm_arm_address_cost (rtx x)
6120 enum rtx_code c = GET_CODE (x);
6122 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6124 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6127 if (c == PLUS || c == MINUS)
6129 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6132 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6142 arm_thumb_address_cost (rtx x)
6144 enum rtx_code c = GET_CODE (x);
6149 && GET_CODE (XEXP (x, 0)) == REG
6150 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6157 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6159 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6163 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6167 /* Some true dependencies can have a higher cost depending
6168 on precisely how certain input operands are used. */
6170 && REG_NOTE_KIND (link) == 0
6171 && recog_memoized (insn) >= 0
6172 && recog_memoized (dep) >= 0)
6174 int shift_opnum = get_attr_shift (insn);
6175 enum attr_type attr_type = get_attr_type (dep);
6177 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6178 operand for INSN. If we have a shifted input operand and the
6179 instruction we depend on is another ALU instruction, then we may
6180 have to account for an additional stall. */
6181 if (shift_opnum != 0
6182 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6184 rtx shifted_operand;
6187 /* Get the shifted operand. */
6188 extract_insn (insn);
6189 shifted_operand = recog_data.operand[shift_opnum];
6191 /* Iterate over all the operands in DEP. If we write an operand
6192 that overlaps with SHIFTED_OPERAND, then we have increase the
6193 cost of this dependency. */
6195 preprocess_constraints ();
6196 for (opno = 0; opno < recog_data.n_operands; opno++)
6198 /* We can ignore strict inputs. */
6199 if (recog_data.operand_type[opno] == OP_IN)
6202 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6209 /* XXX This is not strictly true for the FPA. */
6210 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6211 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6214 /* Call insns don't incur a stall, even if they follow a load. */
6215 if (REG_NOTE_KIND (link) == 0
6216 && GET_CODE (insn) == CALL_INSN)
6219 if ((i_pat = single_set (insn)) != NULL
6220 && GET_CODE (SET_SRC (i_pat)) == MEM
6221 && (d_pat = single_set (dep)) != NULL
6222 && GET_CODE (SET_DEST (d_pat)) == MEM)
6224 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6225 /* This is a load after a store, there is no conflict if the load reads
6226 from a cached area. Assume that loads from the stack, and from the
6227 constant pool are cached, and that others will miss. This is a
6230 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6231 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6232 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6233 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6240 static int fp_consts_inited = 0;
6242 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6243 static const char * const strings_fp[8] =
6246 "4", "5", "0.5", "10"
6249 static REAL_VALUE_TYPE values_fp[8];
6252 init_fp_table (void)
6258 fp_consts_inited = 1;
6260 fp_consts_inited = 8;
6262 for (i = 0; i < fp_consts_inited; i++)
6264 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6269 /* Return TRUE if rtx X is a valid immediate FP constant. */
6271 arm_const_double_rtx (rtx x)
6276 if (!fp_consts_inited)
6279 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6280 if (REAL_VALUE_MINUS_ZERO (r))
6283 for (i = 0; i < fp_consts_inited; i++)
6284 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6290 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6292 neg_const_double_rtx_ok_for_fpa (rtx x)
6297 if (!fp_consts_inited)
6300 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6301 r = REAL_VALUE_NEGATE (r);
6302 if (REAL_VALUE_MINUS_ZERO (r))
6305 for (i = 0; i < 8; i++)
6306 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6313 /* VFPv3 has a fairly wide range of representable immediates, formed from
6314 "quarter-precision" floating-point values. These can be evaluated using this
6315 formula (with ^ for exponentiation):
6319 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6320 16 <= n <= 31 and 0 <= r <= 7.
6322 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6324 - A (most-significant) is the sign bit.
6325 - BCD are the exponent (encoded as r XOR 3).
6326 - EFGH are the mantissa (encoded as n - 16).
6329 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6330 fconst[sd] instruction, or -1 if X isn't suitable. */
6332 vfp3_const_double_index (rtx x)
6334 REAL_VALUE_TYPE r, m;
6336 unsigned HOST_WIDE_INT mantissa, mant_hi;
6337 unsigned HOST_WIDE_INT mask;
6338 HOST_WIDE_INT m1, m2;
6339 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6341 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6344 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6346 /* We can't represent these things, so detect them first. */
6347 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6350 /* Extract sign, exponent and mantissa. */
6351 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6352 r = REAL_VALUE_ABS (r);
6353 exponent = REAL_EXP (&r);
6354 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6355 highest (sign) bit, with a fixed binary point at bit point_pos.
6356 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6357 bits for the mantissa, this may fail (low bits would be lost). */
6358 real_ldexp (&m, &r, point_pos - exponent);
6359 REAL_VALUE_TO_INT (&m1, &m2, m);
6363 /* If there are bits set in the low part of the mantissa, we can't
6364 represent this value. */
6368 /* Now make it so that mantissa contains the most-significant bits, and move
6369 the point_pos to indicate that the least-significant bits have been
6371 point_pos -= HOST_BITS_PER_WIDE_INT;
6374 /* We can permit four significant bits of mantissa only, plus a high bit
6375 which is always 1. */
6376 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6377 if ((mantissa & mask) != 0)
6380 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6381 mantissa >>= point_pos - 5;
6383 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6384 floating-point immediate zero with Neon using an integer-zero load, but
6385 that case is handled elsewhere.) */
6389 gcc_assert (mantissa >= 16 && mantissa <= 31);
6391 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6392 normalized significands are in the range [1, 2). (Our mantissa is shifted
6393 left 4 places at this point relative to normalized IEEE754 values). GCC
6394 internally uses [0.5, 1) (see real.c), so the exponent returned from
6395 REAL_EXP must be altered. */
6396 exponent = 5 - exponent;
6398 if (exponent < 0 || exponent > 7)
6401 /* Sign, mantissa and exponent are now in the correct form to plug into the
6402 formula described in the comment above. */
6403 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6406 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6408 vfp3_const_double_rtx (rtx x)
6413 return vfp3_const_double_index (x) != -1;
6416 /* Recognize immediates which can be used in various Neon instructions. Legal
6417 immediates are described by the following table (for VMVN variants, the
6418 bitwise inverse of the constant shown is recognized. In either case, VMOV
6419 is output and the correct instruction to use for a given constant is chosen
6420 by the assembler). The constant shown is replicated across all elements of
6421 the destination vector.
6423 insn elems variant constant (binary)
6424 ---- ----- ------- -----------------
6425 vmov i32 0 00000000 00000000 00000000 abcdefgh
6426 vmov i32 1 00000000 00000000 abcdefgh 00000000
6427 vmov i32 2 00000000 abcdefgh 00000000 00000000
6428 vmov i32 3 abcdefgh 00000000 00000000 00000000
6429 vmov i16 4 00000000 abcdefgh
6430 vmov i16 5 abcdefgh 00000000
6431 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6432 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6433 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6434 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6435 vmvn i16 10 00000000 abcdefgh
6436 vmvn i16 11 abcdefgh 00000000
6437 vmov i32 12 00000000 00000000 abcdefgh 11111111
6438 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6439 vmov i32 14 00000000 abcdefgh 11111111 11111111
6440 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6442 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6443 eeeeeeee ffffffff gggggggg hhhhhhhh
6444 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6446 For case 18, B = !b. Representable values are exactly those accepted by
6447 vfp3_const_double_index, but are output as floating-point numbers rather
6450 Variants 0-5 (inclusive) may also be used as immediates for the second
6451 operand of VORR/VBIC instructions.
6453 The INVERSE argument causes the bitwise inverse of the given operand to be
6454 recognized instead (used for recognizing legal immediates for the VAND/VORN
6455 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6456 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6457 output, rather than the real insns vbic/vorr).
6459 INVERSE makes no difference to the recognition of float vectors.
6461 The return value is the variant of immediate as shown in the above table, or
6462 -1 if the given value doesn't match any of the listed patterns.
6465 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6466 rtx *modconst, int *elementwidth)
6468 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6470 for (i = 0; i < idx; i += (STRIDE)) \
6475 immtype = (CLASS); \
6476 elsize = (ELSIZE); \
6480 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6481 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6482 unsigned char bytes[16];
6483 int immtype = -1, matches;
6484 unsigned int invmask = inverse ? 0xff : 0;
6486 /* Vectors of float constants. */
6487 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6489 rtx el0 = CONST_VECTOR_ELT (op, 0);
6492 if (!vfp3_const_double_rtx (el0))
6495 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6497 for (i = 1; i < n_elts; i++)
6499 rtx elt = CONST_VECTOR_ELT (op, i);
6502 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6504 if (!REAL_VALUES_EQUAL (r0, re))
6509 *modconst = CONST_VECTOR_ELT (op, 0);
6517 /* Splat vector constant out into a byte vector. */
6518 for (i = 0; i < n_elts; i++)
6520 rtx el = CONST_VECTOR_ELT (op, i);
6521 unsigned HOST_WIDE_INT elpart;
6522 unsigned int part, parts;
6524 if (GET_CODE (el) == CONST_INT)
6526 elpart = INTVAL (el);
6529 else if (GET_CODE (el) == CONST_DOUBLE)
6531 elpart = CONST_DOUBLE_LOW (el);
6537 for (part = 0; part < parts; part++)
6540 for (byte = 0; byte < innersize; byte++)
6542 bytes[idx++] = (elpart & 0xff) ^ invmask;
6543 elpart >>= BITS_PER_UNIT;
6545 if (GET_CODE (el) == CONST_DOUBLE)
6546 elpart = CONST_DOUBLE_HIGH (el);
6551 gcc_assert (idx == GET_MODE_SIZE (mode));
6555 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6556 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6558 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6559 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6561 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6562 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6564 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6565 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6567 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6569 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6571 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6572 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6574 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6575 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6577 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6578 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6580 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6581 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6583 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6585 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6587 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6588 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6590 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6591 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6593 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6594 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6596 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6597 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6599 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6601 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6602 && bytes[i] == bytes[(i + 8) % idx]);
6610 *elementwidth = elsize;
6614 unsigned HOST_WIDE_INT imm = 0;
6616 /* Un-invert bytes of recognized vector, if necessary. */
6618 for (i = 0; i < idx; i++)
6619 bytes[i] ^= invmask;
6623 /* FIXME: Broken on 32-bit H_W_I hosts. */
6624 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6626 for (i = 0; i < 8; i++)
6627 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6628 << (i * BITS_PER_UNIT);
6630 *modconst = GEN_INT (imm);
6634 unsigned HOST_WIDE_INT imm = 0;
6636 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6637 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6639 *modconst = GEN_INT (imm);
6647 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6648 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6649 float elements), and a modified constant (whatever should be output for a
6650 VMOV) in *MODCONST. */
6653 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6654 rtx *modconst, int *elementwidth)
6658 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6664 *modconst = tmpconst;
6667 *elementwidth = tmpwidth;
6672 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6673 the immediate is valid, write a constant suitable for using as an operand
6674 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6675 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6678 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6679 rtx *modconst, int *elementwidth)
6683 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6685 if (retval < 0 || retval > 5)
6689 *modconst = tmpconst;
6692 *elementwidth = tmpwidth;
6697 /* Return a string suitable for output of Neon immediate logic operation
6701 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6702 int inverse, int quad)
6704 int width, is_valid;
6705 static char templ[40];
6707 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6709 gcc_assert (is_valid != 0);
6712 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6714 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6719 /* Output a sequence of pairwise operations to implement a reduction.
6720 NOTE: We do "too much work" here, because pairwise operations work on two
6721 registers-worth of operands in one go. Unfortunately we can't exploit those
6722 extra calculations to do the full operation in fewer steps, I don't think.
6723 Although all vector elements of the result but the first are ignored, we
6724 actually calculate the same result in each of the elements. An alternative
6725 such as initially loading a vector with zero to use as each of the second
6726 operands would use up an additional register and take an extra instruction,
6727 for no particular gain. */
6730 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6731 rtx (*reduc) (rtx, rtx, rtx))
6733 enum machine_mode inner = GET_MODE_INNER (mode);
6734 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6737 for (i = parts / 2; i >= 1; i /= 2)
6739 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6740 emit_insn (reduc (dest, tmpsum, tmpsum));
6745 /* Initialize a vector with non-constant elements. FIXME: We can do better
6746 than the current implementation (building a vector on the stack and then
6747 loading it) in many cases. See rs6000.c. */
6750 neon_expand_vector_init (rtx target, rtx vals)
6752 enum machine_mode mode = GET_MODE (target);
6753 enum machine_mode inner = GET_MODE_INNER (mode);
6754 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6757 gcc_assert (VECTOR_MODE_P (mode));
6759 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6760 for (i = 0; i < n_elts; i++)
6761 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6762 XVECEXP (vals, 0, i));
6764 emit_move_insn (target, mem);
6767 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6768 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6769 reported source locations are bogus. */
6772 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6777 gcc_assert (GET_CODE (operand) == CONST_INT);
6779 lane = INTVAL (operand);
6781 if (lane < low || lane >= high)
6785 /* Bounds-check lanes. */
6788 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6790 bounds_check (operand, low, high, "lane out of range");
6793 /* Bounds-check constants. */
6796 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6798 bounds_check (operand, low, high, "constant out of range");
6802 neon_element_bits (enum machine_mode mode)
6805 return GET_MODE_BITSIZE (mode);
6807 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6811 /* Predicates for `match_operand' and `match_operator'. */
6813 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6815 cirrus_memory_offset (rtx op)
6817 /* Reject eliminable registers. */
6818 if (! (reload_in_progress || reload_completed)
6819 && ( reg_mentioned_p (frame_pointer_rtx, op)
6820 || reg_mentioned_p (arg_pointer_rtx, op)
6821 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6822 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6823 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6824 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6827 if (GET_CODE (op) == MEM)
6833 /* Match: (mem (reg)). */
6834 if (GET_CODE (ind) == REG)
6840 if (GET_CODE (ind) == PLUS
6841 && GET_CODE (XEXP (ind, 0)) == REG
6842 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6843 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6850 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6851 WB is true if full writeback address modes are allowed and is false
6852 if limited writeback address modes (POST_INC and PRE_DEC) are
6856 arm_coproc_mem_operand (rtx op, bool wb)
6860 /* Reject eliminable registers. */
6861 if (! (reload_in_progress || reload_completed)
6862 && ( reg_mentioned_p (frame_pointer_rtx, op)
6863 || reg_mentioned_p (arg_pointer_rtx, op)
6864 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6865 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6866 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6867 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6870 /* Constants are converted into offsets from labels. */
6871 if (GET_CODE (op) != MEM)
6876 if (reload_completed
6877 && (GET_CODE (ind) == LABEL_REF
6878 || (GET_CODE (ind) == CONST
6879 && GET_CODE (XEXP (ind, 0)) == PLUS
6880 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6881 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6884 /* Match: (mem (reg)). */
6885 if (GET_CODE (ind) == REG)
6886 return arm_address_register_rtx_p (ind, 0);
6888 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6889 acceptable in any case (subject to verification by
6890 arm_address_register_rtx_p). We need WB to be true to accept
6891 PRE_INC and POST_DEC. */
6892 if (GET_CODE (ind) == POST_INC
6893 || GET_CODE (ind) == PRE_DEC
6895 && (GET_CODE (ind) == PRE_INC
6896 || GET_CODE (ind) == POST_DEC)))
6897 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6900 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6901 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6902 && GET_CODE (XEXP (ind, 1)) == PLUS
6903 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6904 ind = XEXP (ind, 1);
6909 if (GET_CODE (ind) == PLUS
6910 && GET_CODE (XEXP (ind, 0)) == REG
6911 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6912 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6913 && INTVAL (XEXP (ind, 1)) > -1024
6914 && INTVAL (XEXP (ind, 1)) < 1024
6915 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6921 /* Return TRUE if OP is a memory operand which we can load or store a vector
6922 to/from. If CORE is true, we're moving from ARM registers not Neon
6925 neon_vector_mem_operand (rtx op, bool core)
6929 /* Reject eliminable registers. */
6930 if (! (reload_in_progress || reload_completed)
6931 && ( reg_mentioned_p (frame_pointer_rtx, op)
6932 || reg_mentioned_p (arg_pointer_rtx, op)
6933 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6934 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6935 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6936 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6939 /* Constants are converted into offsets from labels. */
6940 if (GET_CODE (op) != MEM)
6945 if (reload_completed
6946 && (GET_CODE (ind) == LABEL_REF
6947 || (GET_CODE (ind) == CONST
6948 && GET_CODE (XEXP (ind, 0)) == PLUS
6949 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6950 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6953 /* Match: (mem (reg)). */
6954 if (GET_CODE (ind) == REG)
6955 return arm_address_register_rtx_p (ind, 0);
6957 /* Allow post-increment with Neon registers. */
6958 if (!core && GET_CODE (ind) == POST_INC)
6959 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6962 /* FIXME: We can support this too if we use VLD1/VST1. */
6964 && GET_CODE (ind) == POST_MODIFY
6965 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6966 && GET_CODE (XEXP (ind, 1)) == PLUS
6967 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6968 ind = XEXP (ind, 1);
6975 && GET_CODE (ind) == PLUS
6976 && GET_CODE (XEXP (ind, 0)) == REG
6977 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6978 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6979 && INTVAL (XEXP (ind, 1)) > -1024
6980 && INTVAL (XEXP (ind, 1)) < 1016
6981 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6987 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6990 neon_struct_mem_operand (rtx op)
6994 /* Reject eliminable registers. */
6995 if (! (reload_in_progress || reload_completed)
6996 && ( reg_mentioned_p (frame_pointer_rtx, op)
6997 || reg_mentioned_p (arg_pointer_rtx, op)
6998 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6999 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7000 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7001 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7004 /* Constants are converted into offsets from labels. */
7005 if (GET_CODE (op) != MEM)
7010 if (reload_completed
7011 && (GET_CODE (ind) == LABEL_REF
7012 || (GET_CODE (ind) == CONST
7013 && GET_CODE (XEXP (ind, 0)) == PLUS
7014 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7015 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7018 /* Match: (mem (reg)). */
7019 if (GET_CODE (ind) == REG)
7020 return arm_address_register_rtx_p (ind, 0);
7025 /* Return true if X is a register that will be eliminated later on. */
7027 arm_eliminable_register (rtx x)
7029 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7030 || REGNO (x) == ARG_POINTER_REGNUM
7031 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7032 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7035 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7036 coprocessor registers. Otherwise return NO_REGS. */
7039 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7042 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7043 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7044 && neon_vector_mem_operand (x, FALSE))
7047 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7050 return GENERAL_REGS;
7053 /* Values which must be returned in the most-significant end of the return
7057 arm_return_in_msb (const_tree valtype)
7059 return (TARGET_AAPCS_BASED
7061 && (AGGREGATE_TYPE_P (valtype)
7062 || TREE_CODE (valtype) == COMPLEX_TYPE));
7065 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7066 Use by the Cirrus Maverick code which has to workaround
7067 a hardware bug triggered by such instructions. */
7069 arm_memory_load_p (rtx insn)
7071 rtx body, lhs, rhs;;
7073 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7076 body = PATTERN (insn);
7078 if (GET_CODE (body) != SET)
7081 lhs = XEXP (body, 0);
7082 rhs = XEXP (body, 1);
7084 lhs = REG_OR_SUBREG_RTX (lhs);
7086 /* If the destination is not a general purpose
7087 register we do not have to worry. */
7088 if (GET_CODE (lhs) != REG
7089 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7092 /* As well as loads from memory we also have to react
7093 to loads of invalid constants which will be turned
7094 into loads from the minipool. */
7095 return (GET_CODE (rhs) == MEM
7096 || GET_CODE (rhs) == SYMBOL_REF
7097 || note_invalid_constants (insn, -1, false));
7100 /* Return TRUE if INSN is a Cirrus instruction. */
7102 arm_cirrus_insn_p (rtx insn)
7104 enum attr_cirrus attr;
7106 /* get_attr cannot accept USE or CLOBBER. */
7108 || GET_CODE (insn) != INSN
7109 || GET_CODE (PATTERN (insn)) == USE
7110 || GET_CODE (PATTERN (insn)) == CLOBBER)
7113 attr = get_attr_cirrus (insn);
7115 return attr != CIRRUS_NOT;
7118 /* Cirrus reorg for invalid instruction combinations. */
7120 cirrus_reorg (rtx first)
7122 enum attr_cirrus attr;
7123 rtx body = PATTERN (first);
7127 /* Any branch must be followed by 2 non Cirrus instructions. */
7128 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7131 t = next_nonnote_insn (first);
7133 if (arm_cirrus_insn_p (t))
7136 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7140 emit_insn_after (gen_nop (), first);
7145 /* (float (blah)) is in parallel with a clobber. */
7146 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7147 body = XVECEXP (body, 0, 0);
7149 if (GET_CODE (body) == SET)
7151 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7153 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7154 be followed by a non Cirrus insn. */
7155 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7157 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7158 emit_insn_after (gen_nop (), first);
7162 else if (arm_memory_load_p (first))
7164 unsigned int arm_regno;
7166 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7167 ldr/cfmv64hr combination where the Rd field is the same
7168 in both instructions must be split with a non Cirrus
7175 /* Get Arm register number for ldr insn. */
7176 if (GET_CODE (lhs) == REG)
7177 arm_regno = REGNO (lhs);
7180 gcc_assert (GET_CODE (rhs) == REG);
7181 arm_regno = REGNO (rhs);
7185 first = next_nonnote_insn (first);
7187 if (! arm_cirrus_insn_p (first))
7190 body = PATTERN (first);
7192 /* (float (blah)) is in parallel with a clobber. */
7193 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7194 body = XVECEXP (body, 0, 0);
7196 if (GET_CODE (body) == FLOAT)
7197 body = XEXP (body, 0);
7199 if (get_attr_cirrus (first) == CIRRUS_MOVE
7200 && GET_CODE (XEXP (body, 1)) == REG
7201 && arm_regno == REGNO (XEXP (body, 1)))
7202 emit_insn_after (gen_nop (), first);
7208 /* get_attr cannot accept USE or CLOBBER. */
7210 || GET_CODE (first) != INSN
7211 || GET_CODE (PATTERN (first)) == USE
7212 || GET_CODE (PATTERN (first)) == CLOBBER)
7215 attr = get_attr_cirrus (first);
7217 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7218 must be followed by a non-coprocessor instruction. */
7219 if (attr == CIRRUS_COMPARE)
7223 t = next_nonnote_insn (first);
7225 if (arm_cirrus_insn_p (t))
7228 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7232 emit_insn_after (gen_nop (), first);
7238 /* Return TRUE if X references a SYMBOL_REF. */
7240 symbol_mentioned_p (rtx x)
7245 if (GET_CODE (x) == SYMBOL_REF)
7248 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7249 are constant offsets, not symbols. */
7250 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7253 fmt = GET_RTX_FORMAT (GET_CODE (x));
7255 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7261 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7262 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7265 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7272 /* Return TRUE if X references a LABEL_REF. */
7274 label_mentioned_p (rtx x)
7279 if (GET_CODE (x) == LABEL_REF)
7282 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7283 instruction, but they are constant offsets, not symbols. */
7284 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7287 fmt = GET_RTX_FORMAT (GET_CODE (x));
7288 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7294 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7295 if (label_mentioned_p (XVECEXP (x, i, j)))
7298 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7306 tls_mentioned_p (rtx x)
7308 switch (GET_CODE (x))
7311 return tls_mentioned_p (XEXP (x, 0));
7314 if (XINT (x, 1) == UNSPEC_TLS)
7322 /* Must not copy a SET whose source operand is PC-relative. */
7325 arm_cannot_copy_insn_p (rtx insn)
7327 rtx pat = PATTERN (insn);
7329 if (GET_CODE (pat) == SET)
7331 rtx rhs = SET_SRC (pat);
7333 if (GET_CODE (rhs) == UNSPEC
7334 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7337 if (GET_CODE (rhs) == MEM
7338 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7339 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7349 enum rtx_code code = GET_CODE (x);
7366 /* Return 1 if memory locations are adjacent. */
7368 adjacent_mem_locations (rtx a, rtx b)
7370 /* We don't guarantee to preserve the order of these memory refs. */
7371 if (volatile_refs_p (a) || volatile_refs_p (b))
7374 if ((GET_CODE (XEXP (a, 0)) == REG
7375 || (GET_CODE (XEXP (a, 0)) == PLUS
7376 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7377 && (GET_CODE (XEXP (b, 0)) == REG
7378 || (GET_CODE (XEXP (b, 0)) == PLUS
7379 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7381 HOST_WIDE_INT val0 = 0, val1 = 0;
7385 if (GET_CODE (XEXP (a, 0)) == PLUS)
7387 reg0 = XEXP (XEXP (a, 0), 0);
7388 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7393 if (GET_CODE (XEXP (b, 0)) == PLUS)
7395 reg1 = XEXP (XEXP (b, 0), 0);
7396 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7401 /* Don't accept any offset that will require multiple
7402 instructions to handle, since this would cause the
7403 arith_adjacentmem pattern to output an overlong sequence. */
7404 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
7407 /* Don't allow an eliminable register: register elimination can make
7408 the offset too large. */
7409 if (arm_eliminable_register (reg0))
7412 val_diff = val1 - val0;
7416 /* If the target has load delay slots, then there's no benefit
7417 to using an ldm instruction unless the offset is zero and
7418 we are optimizing for size. */
7419 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7420 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7421 && (val_diff == 4 || val_diff == -4));
7424 return ((REGNO (reg0) == REGNO (reg1))
7425 && (val_diff == 4 || val_diff == -4));
7432 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7433 HOST_WIDE_INT *load_offset)
7435 int unsorted_regs[4];
7436 HOST_WIDE_INT unsorted_offsets[4];
7441 /* Can only handle 2, 3, or 4 insns at present,
7442 though could be easily extended if required. */
7443 gcc_assert (nops >= 2 && nops <= 4);
7445 memset (order, 0, 4 * sizeof (int));
7447 /* Loop over the operands and check that the memory references are
7448 suitable (i.e. immediate offsets from the same base register). At
7449 the same time, extract the target register, and the memory
7451 for (i = 0; i < nops; i++)
7456 /* Convert a subreg of a mem into the mem itself. */
7457 if (GET_CODE (operands[nops + i]) == SUBREG)
7458 operands[nops + i] = alter_subreg (operands + (nops + i));
7460 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7462 /* Don't reorder volatile memory references; it doesn't seem worth
7463 looking for the case where the order is ok anyway. */
7464 if (MEM_VOLATILE_P (operands[nops + i]))
7467 offset = const0_rtx;
7469 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7470 || (GET_CODE (reg) == SUBREG
7471 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7472 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7473 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7475 || (GET_CODE (reg) == SUBREG
7476 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7477 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7482 base_reg = REGNO (reg);
7483 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7484 ? REGNO (operands[i])
7485 : REGNO (SUBREG_REG (operands[i])));
7490 if (base_reg != (int) REGNO (reg))
7491 /* Not addressed from the same base register. */
7494 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7495 ? REGNO (operands[i])
7496 : REGNO (SUBREG_REG (operands[i])));
7497 if (unsorted_regs[i] < unsorted_regs[order[0]])
7501 /* If it isn't an integer register, or if it overwrites the
7502 base register but isn't the last insn in the list, then
7503 we can't do this. */
7504 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7505 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7508 unsorted_offsets[i] = INTVAL (offset);
7511 /* Not a suitable memory address. */
7515 /* All the useful information has now been extracted from the
7516 operands into unsorted_regs and unsorted_offsets; additionally,
7517 order[0] has been set to the lowest numbered register in the
7518 list. Sort the registers into order, and check that the memory
7519 offsets are ascending and adjacent. */
7521 for (i = 1; i < nops; i++)
7525 order[i] = order[i - 1];
7526 for (j = 0; j < nops; j++)
7527 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7528 && (order[i] == order[i - 1]
7529 || unsorted_regs[j] < unsorted_regs[order[i]]))
7532 /* Have we found a suitable register? if not, one must be used more
7534 if (order[i] == order[i - 1])
7537 /* Is the memory address adjacent and ascending? */
7538 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7546 for (i = 0; i < nops; i++)
7547 regs[i] = unsorted_regs[order[i]];
7549 *load_offset = unsorted_offsets[order[0]];
7552 if (unsorted_offsets[order[0]] == 0)
7553 return 1; /* ldmia */
7555 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7556 return 2; /* ldmib */
7558 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7559 return 3; /* ldmda */
7561 if (unsorted_offsets[order[nops - 1]] == -4)
7562 return 4; /* ldmdb */
7564 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7565 if the offset isn't small enough. The reason 2 ldrs are faster
7566 is because these ARMs are able to do more than one cache access
7567 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7568 whilst the ARM8 has a double bandwidth cache. This means that
7569 these cores can do both an instruction fetch and a data fetch in
7570 a single cycle, so the trick of calculating the address into a
7571 scratch register (one of the result regs) and then doing a load
7572 multiple actually becomes slower (and no smaller in code size).
7573 That is the transformation
7575 ldr rd1, [rbase + offset]
7576 ldr rd2, [rbase + offset + 4]
7580 add rd1, rbase, offset
7581 ldmia rd1, {rd1, rd2}
7583 produces worse code -- '3 cycles + any stalls on rd2' instead of
7584 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7585 access per cycle, the first sequence could never complete in less
7586 than 6 cycles, whereas the ldm sequence would only take 5 and
7587 would make better use of sequential accesses if not hitting the
7590 We cheat here and test 'arm_ld_sched' which we currently know to
7591 only be true for the ARM8, ARM9 and StrongARM. If this ever
7592 changes, then the test below needs to be reworked. */
7593 if (nops == 2 && arm_ld_sched)
7596 /* Can't do it without setting up the offset, only do this if it takes
7597 no more than one insn. */
7598 return (const_ok_for_arm (unsorted_offsets[order[0]])
7599 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7603 emit_ldm_seq (rtx *operands, int nops)
7607 HOST_WIDE_INT offset;
7611 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7614 strcpy (buf, "ldm%(ia%)\t");
7618 strcpy (buf, "ldm%(ib%)\t");
7622 strcpy (buf, "ldm%(da%)\t");
7626 strcpy (buf, "ldm%(db%)\t");
7631 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7632 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7635 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7636 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7638 output_asm_insn (buf, operands);
7640 strcpy (buf, "ldm%(ia%)\t");
7647 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7648 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7650 for (i = 1; i < nops; i++)
7651 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7652 reg_names[regs[i]]);
7654 strcat (buf, "}\t%@ phole ldm");
7656 output_asm_insn (buf, operands);
7661 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7662 HOST_WIDE_INT * load_offset)
7664 int unsorted_regs[4];
7665 HOST_WIDE_INT unsorted_offsets[4];
7670 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7671 extended if required. */
7672 gcc_assert (nops >= 2 && nops <= 4);
7674 memset (order, 0, 4 * sizeof (int));
7676 /* Loop over the operands and check that the memory references are
7677 suitable (i.e. immediate offsets from the same base register). At
7678 the same time, extract the target register, and the memory
7680 for (i = 0; i < nops; i++)
7685 /* Convert a subreg of a mem into the mem itself. */
7686 if (GET_CODE (operands[nops + i]) == SUBREG)
7687 operands[nops + i] = alter_subreg (operands + (nops + i));
7689 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7691 /* Don't reorder volatile memory references; it doesn't seem worth
7692 looking for the case where the order is ok anyway. */
7693 if (MEM_VOLATILE_P (operands[nops + i]))
7696 offset = const0_rtx;
7698 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7699 || (GET_CODE (reg) == SUBREG
7700 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7701 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7702 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7704 || (GET_CODE (reg) == SUBREG
7705 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7706 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7711 base_reg = REGNO (reg);
7712 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7713 ? REGNO (operands[i])
7714 : REGNO (SUBREG_REG (operands[i])));
7719 if (base_reg != (int) REGNO (reg))
7720 /* Not addressed from the same base register. */
7723 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7724 ? REGNO (operands[i])
7725 : REGNO (SUBREG_REG (operands[i])));
7726 if (unsorted_regs[i] < unsorted_regs[order[0]])
7730 /* If it isn't an integer register, then we can't do this. */
7731 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7734 unsorted_offsets[i] = INTVAL (offset);
7737 /* Not a suitable memory address. */
7741 /* All the useful information has now been extracted from the
7742 operands into unsorted_regs and unsorted_offsets; additionally,
7743 order[0] has been set to the lowest numbered register in the
7744 list. Sort the registers into order, and check that the memory
7745 offsets are ascending and adjacent. */
7747 for (i = 1; i < nops; i++)
7751 order[i] = order[i - 1];
7752 for (j = 0; j < nops; j++)
7753 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7754 && (order[i] == order[i - 1]
7755 || unsorted_regs[j] < unsorted_regs[order[i]]))
7758 /* Have we found a suitable register? if not, one must be used more
7760 if (order[i] == order[i - 1])
7763 /* Is the memory address adjacent and ascending? */
7764 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7772 for (i = 0; i < nops; i++)
7773 regs[i] = unsorted_regs[order[i]];
7775 *load_offset = unsorted_offsets[order[0]];
7778 if (unsorted_offsets[order[0]] == 0)
7779 return 1; /* stmia */
7781 if (unsorted_offsets[order[0]] == 4)
7782 return 2; /* stmib */
7784 if (unsorted_offsets[order[nops - 1]] == 0)
7785 return 3; /* stmda */
7787 if (unsorted_offsets[order[nops - 1]] == -4)
7788 return 4; /* stmdb */
7794 emit_stm_seq (rtx *operands, int nops)
7798 HOST_WIDE_INT offset;
7802 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7805 strcpy (buf, "stm%(ia%)\t");
7809 strcpy (buf, "stm%(ib%)\t");
7813 strcpy (buf, "stm%(da%)\t");
7817 strcpy (buf, "stm%(db%)\t");
7824 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7825 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7827 for (i = 1; i < nops; i++)
7828 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7829 reg_names[regs[i]]);
7831 strcat (buf, "}\t%@ phole stm");
7833 output_asm_insn (buf, operands);
7837 /* Routines for use in generating RTL. */
7840 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7841 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7843 HOST_WIDE_INT offset = *offsetp;
7846 int sign = up ? 1 : -1;
7849 /* XScale has load-store double instructions, but they have stricter
7850 alignment requirements than load-store multiple, so we cannot
7853 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7854 the pipeline until completion.
7862 An ldr instruction takes 1-3 cycles, but does not block the
7871 Best case ldr will always win. However, the more ldr instructions
7872 we issue, the less likely we are to be able to schedule them well.
7873 Using ldr instructions also increases code size.
7875 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7876 for counts of 3 or 4 regs. */
7877 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7883 for (i = 0; i < count; i++)
7885 addr = plus_constant (from, i * 4 * sign);
7886 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7887 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7893 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7903 result = gen_rtx_PARALLEL (VOIDmode,
7904 rtvec_alloc (count + (write_back ? 1 : 0)));
7907 XVECEXP (result, 0, 0)
7908 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7913 for (j = 0; i < count; i++, j++)
7915 addr = plus_constant (from, j * 4 * sign);
7916 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7917 XVECEXP (result, 0, i)
7918 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7929 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7930 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7932 HOST_WIDE_INT offset = *offsetp;
7935 int sign = up ? 1 : -1;
7938 /* See arm_gen_load_multiple for discussion of
7939 the pros/cons of ldm/stm usage for XScale. */
7940 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7946 for (i = 0; i < count; i++)
7948 addr = plus_constant (to, i * 4 * sign);
7949 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7950 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7956 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7966 result = gen_rtx_PARALLEL (VOIDmode,
7967 rtvec_alloc (count + (write_back ? 1 : 0)));
7970 XVECEXP (result, 0, 0)
7971 = gen_rtx_SET (VOIDmode, to,
7972 plus_constant (to, count * 4 * sign));
7977 for (j = 0; i < count; i++, j++)
7979 addr = plus_constant (to, j * 4 * sign);
7980 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7981 XVECEXP (result, 0, i)
7982 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7993 arm_gen_movmemqi (rtx *operands)
7995 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7996 HOST_WIDE_INT srcoffset, dstoffset;
7998 rtx src, dst, srcbase, dstbase;
7999 rtx part_bytes_reg = NULL;
8002 if (GET_CODE (operands[2]) != CONST_INT
8003 || GET_CODE (operands[3]) != CONST_INT
8004 || INTVAL (operands[2]) > 64
8005 || INTVAL (operands[3]) & 3)
8008 dstbase = operands[0];
8009 srcbase = operands[1];
8011 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8012 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8014 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8015 out_words_to_go = INTVAL (operands[2]) / 4;
8016 last_bytes = INTVAL (operands[2]) & 3;
8017 dstoffset = srcoffset = 0;
8019 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8020 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8022 for (i = 0; in_words_to_go >= 2; i+=4)
8024 if (in_words_to_go > 4)
8025 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8026 srcbase, &srcoffset));
8028 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8029 FALSE, srcbase, &srcoffset));
8031 if (out_words_to_go)
8033 if (out_words_to_go > 4)
8034 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8035 dstbase, &dstoffset));
8036 else if (out_words_to_go != 1)
8037 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8041 dstbase, &dstoffset));
8044 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8045 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8046 if (last_bytes != 0)
8048 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8054 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8055 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8058 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8059 if (out_words_to_go)
8063 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8064 sreg = copy_to_reg (mem);
8066 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8067 emit_move_insn (mem, sreg);
8070 gcc_assert (!in_words_to_go); /* Sanity check */
8075 gcc_assert (in_words_to_go > 0);
8077 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8078 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8081 gcc_assert (!last_bytes || part_bytes_reg);
8083 if (BYTES_BIG_ENDIAN && last_bytes)
8085 rtx tmp = gen_reg_rtx (SImode);
8087 /* The bytes we want are in the top end of the word. */
8088 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8089 GEN_INT (8 * (4 - last_bytes))));
8090 part_bytes_reg = tmp;
8094 mem = adjust_automodify_address (dstbase, QImode,
8095 plus_constant (dst, last_bytes - 1),
8096 dstoffset + last_bytes - 1);
8097 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8101 tmp = gen_reg_rtx (SImode);
8102 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8103 part_bytes_reg = tmp;
8112 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8113 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8117 rtx tmp = gen_reg_rtx (SImode);
8118 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8119 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8120 part_bytes_reg = tmp;
8127 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8128 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8135 /* Select a dominance comparison mode if possible for a test of the general
8136 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8137 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8138 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8139 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8140 In all cases OP will be either EQ or NE, but we don't need to know which
8141 here. If we are unable to support a dominance comparison we return
8142 CC mode. This will then fail to match for the RTL expressions that
8143 generate this call. */
8145 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8147 enum rtx_code cond1, cond2;
8150 /* Currently we will probably get the wrong result if the individual
8151 comparisons are not simple. This also ensures that it is safe to
8152 reverse a comparison if necessary. */
8153 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8155 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8159 /* The if_then_else variant of this tests the second condition if the
8160 first passes, but is true if the first fails. Reverse the first
8161 condition to get a true "inclusive-or" expression. */
8162 if (cond_or == DOM_CC_NX_OR_Y)
8163 cond1 = reverse_condition (cond1);
8165 /* If the comparisons are not equal, and one doesn't dominate the other,
8166 then we can't do this. */
8168 && !comparison_dominates_p (cond1, cond2)
8169 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8174 enum rtx_code temp = cond1;
8182 if (cond_or == DOM_CC_X_AND_Y)
8187 case EQ: return CC_DEQmode;
8188 case LE: return CC_DLEmode;
8189 case LEU: return CC_DLEUmode;
8190 case GE: return CC_DGEmode;
8191 case GEU: return CC_DGEUmode;
8192 default: gcc_unreachable ();
8196 if (cond_or == DOM_CC_X_AND_Y)
8212 if (cond_or == DOM_CC_X_AND_Y)
8228 if (cond_or == DOM_CC_X_AND_Y)
8244 if (cond_or == DOM_CC_X_AND_Y)
8259 /* The remaining cases only occur when both comparisons are the
8262 gcc_assert (cond1 == cond2);
8266 gcc_assert (cond1 == cond2);
8270 gcc_assert (cond1 == cond2);
8274 gcc_assert (cond1 == cond2);
8278 gcc_assert (cond1 == cond2);
8287 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8289 /* All floating point compares return CCFP if it is an equality
8290 comparison, and CCFPE otherwise. */
8291 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8311 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8320 /* A compare with a shifted operand. Because of canonicalization, the
8321 comparison will have to be swapped when we emit the assembler. */
8322 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8323 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8324 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8325 || GET_CODE (x) == ROTATERT))
8328 /* This operation is performed swapped, but since we only rely on the Z
8329 flag we don't need an additional mode. */
8330 if (GET_MODE (y) == SImode && REG_P (y)
8331 && GET_CODE (x) == NEG
8332 && (op == EQ || op == NE))
8335 /* This is a special case that is used by combine to allow a
8336 comparison of a shifted byte load to be split into a zero-extend
8337 followed by a comparison of the shifted integer (only valid for
8338 equalities and unsigned inequalities). */
8339 if (GET_MODE (x) == SImode
8340 && GET_CODE (x) == ASHIFT
8341 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8342 && GET_CODE (XEXP (x, 0)) == SUBREG
8343 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8344 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8345 && (op == EQ || op == NE
8346 || op == GEU || op == GTU || op == LTU || op == LEU)
8347 && GET_CODE (y) == CONST_INT)
8350 /* A construct for a conditional compare, if the false arm contains
8351 0, then both conditions must be true, otherwise either condition
8352 must be true. Not all conditions are possible, so CCmode is
8353 returned if it can't be done. */
8354 if (GET_CODE (x) == IF_THEN_ELSE
8355 && (XEXP (x, 2) == const0_rtx
8356 || XEXP (x, 2) == const1_rtx)
8357 && COMPARISON_P (XEXP (x, 0))
8358 && COMPARISON_P (XEXP (x, 1)))
8359 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8360 INTVAL (XEXP (x, 2)));
8362 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8363 if (GET_CODE (x) == AND
8364 && COMPARISON_P (XEXP (x, 0))
8365 && COMPARISON_P (XEXP (x, 1)))
8366 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8369 if (GET_CODE (x) == IOR
8370 && COMPARISON_P (XEXP (x, 0))
8371 && COMPARISON_P (XEXP (x, 1)))
8372 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8375 /* An operation (on Thumb) where we want to test for a single bit.
8376 This is done by shifting that bit up into the top bit of a
8377 scratch register; we can then branch on the sign bit. */
8379 && GET_MODE (x) == SImode
8380 && (op == EQ || op == NE)
8381 && GET_CODE (x) == ZERO_EXTRACT
8382 && XEXP (x, 1) == const1_rtx)
8385 /* An operation that sets the condition codes as a side-effect, the
8386 V flag is not set correctly, so we can only use comparisons where
8387 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8389 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8390 if (GET_MODE (x) == SImode
8392 && (op == EQ || op == NE || op == LT || op == GE)
8393 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8394 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8395 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8396 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8397 || GET_CODE (x) == LSHIFTRT
8398 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8399 || GET_CODE (x) == ROTATERT
8400 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8403 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8406 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8407 && GET_CODE (x) == PLUS
8408 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8414 /* X and Y are two things to compare using CODE. Emit the compare insn and
8415 return the rtx for register 0 in the proper mode. FP means this is a
8416 floating point compare: I don't think that it is needed on the arm. */
8418 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8420 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8421 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8423 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8428 /* Generate a sequence of insns that will generate the correct return
8429 address mask depending on the physical architecture that the program
8432 arm_gen_return_addr_mask (void)
8434 rtx reg = gen_reg_rtx (Pmode);
8436 emit_insn (gen_return_addr_mask (reg));
8441 arm_reload_in_hi (rtx *operands)
8443 rtx ref = operands[1];
8445 HOST_WIDE_INT offset = 0;
8447 if (GET_CODE (ref) == SUBREG)
8449 offset = SUBREG_BYTE (ref);
8450 ref = SUBREG_REG (ref);
8453 if (GET_CODE (ref) == REG)
8455 /* We have a pseudo which has been spilt onto the stack; there
8456 are two cases here: the first where there is a simple
8457 stack-slot replacement and a second where the stack-slot is
8458 out of range, or is used as a subreg. */
8459 if (reg_equiv_mem[REGNO (ref)])
8461 ref = reg_equiv_mem[REGNO (ref)];
8462 base = find_replacement (&XEXP (ref, 0));
8465 /* The slot is out of range, or was dressed up in a SUBREG. */
8466 base = reg_equiv_address[REGNO (ref)];
8469 base = find_replacement (&XEXP (ref, 0));
8471 /* Handle the case where the address is too complex to be offset by 1. */
8472 if (GET_CODE (base) == MINUS
8473 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8475 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8477 emit_set_insn (base_plus, base);
8480 else if (GET_CODE (base) == PLUS)
8482 /* The addend must be CONST_INT, or we would have dealt with it above. */
8483 HOST_WIDE_INT hi, lo;
8485 offset += INTVAL (XEXP (base, 1));
8486 base = XEXP (base, 0);
8488 /* Rework the address into a legal sequence of insns. */
8489 /* Valid range for lo is -4095 -> 4095 */
8492 : -((-offset) & 0xfff));
8494 /* Corner case, if lo is the max offset then we would be out of range
8495 once we have added the additional 1 below, so bump the msb into the
8496 pre-loading insn(s). */
8500 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8501 ^ (HOST_WIDE_INT) 0x80000000)
8502 - (HOST_WIDE_INT) 0x80000000);
8504 gcc_assert (hi + lo == offset);
8508 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8510 /* Get the base address; addsi3 knows how to handle constants
8511 that require more than one insn. */
8512 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8518 /* Operands[2] may overlap operands[0] (though it won't overlap
8519 operands[1]), that's why we asked for a DImode reg -- so we can
8520 use the bit that does not overlap. */
8521 if (REGNO (operands[2]) == REGNO (operands[0]))
8522 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8524 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8526 emit_insn (gen_zero_extendqisi2 (scratch,
8527 gen_rtx_MEM (QImode,
8528 plus_constant (base,
8530 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8531 gen_rtx_MEM (QImode,
8532 plus_constant (base,
8534 if (!BYTES_BIG_ENDIAN)
8535 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8536 gen_rtx_IOR (SImode,
8539 gen_rtx_SUBREG (SImode, operands[0], 0),
8543 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8544 gen_rtx_IOR (SImode,
8545 gen_rtx_ASHIFT (SImode, scratch,
8547 gen_rtx_SUBREG (SImode, operands[0], 0)));
8550 /* Handle storing a half-word to memory during reload by synthesizing as two
8551 byte stores. Take care not to clobber the input values until after we
8552 have moved them somewhere safe. This code assumes that if the DImode
8553 scratch in operands[2] overlaps either the input value or output address
8554 in some way, then that value must die in this insn (we absolutely need
8555 two scratch registers for some corner cases). */
8557 arm_reload_out_hi (rtx *operands)
8559 rtx ref = operands[0];
8560 rtx outval = operands[1];
8562 HOST_WIDE_INT offset = 0;
8564 if (GET_CODE (ref) == SUBREG)
8566 offset = SUBREG_BYTE (ref);
8567 ref = SUBREG_REG (ref);
8570 if (GET_CODE (ref) == REG)
8572 /* We have a pseudo which has been spilt onto the stack; there
8573 are two cases here: the first where there is a simple
8574 stack-slot replacement and a second where the stack-slot is
8575 out of range, or is used as a subreg. */
8576 if (reg_equiv_mem[REGNO (ref)])
8578 ref = reg_equiv_mem[REGNO (ref)];
8579 base = find_replacement (&XEXP (ref, 0));
8582 /* The slot is out of range, or was dressed up in a SUBREG. */
8583 base = reg_equiv_address[REGNO (ref)];
8586 base = find_replacement (&XEXP (ref, 0));
8588 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8590 /* Handle the case where the address is too complex to be offset by 1. */
8591 if (GET_CODE (base) == MINUS
8592 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8594 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8596 /* Be careful not to destroy OUTVAL. */
8597 if (reg_overlap_mentioned_p (base_plus, outval))
8599 /* Updating base_plus might destroy outval, see if we can
8600 swap the scratch and base_plus. */
8601 if (!reg_overlap_mentioned_p (scratch, outval))
8604 scratch = base_plus;
8609 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8611 /* Be conservative and copy OUTVAL into the scratch now,
8612 this should only be necessary if outval is a subreg
8613 of something larger than a word. */
8614 /* XXX Might this clobber base? I can't see how it can,
8615 since scratch is known to overlap with OUTVAL, and
8616 must be wider than a word. */
8617 emit_insn (gen_movhi (scratch_hi, outval));
8618 outval = scratch_hi;
8622 emit_set_insn (base_plus, base);
8625 else if (GET_CODE (base) == PLUS)
8627 /* The addend must be CONST_INT, or we would have dealt with it above. */
8628 HOST_WIDE_INT hi, lo;
8630 offset += INTVAL (XEXP (base, 1));
8631 base = XEXP (base, 0);
8633 /* Rework the address into a legal sequence of insns. */
8634 /* Valid range for lo is -4095 -> 4095 */
8637 : -((-offset) & 0xfff));
8639 /* Corner case, if lo is the max offset then we would be out of range
8640 once we have added the additional 1 below, so bump the msb into the
8641 pre-loading insn(s). */
8645 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8646 ^ (HOST_WIDE_INT) 0x80000000)
8647 - (HOST_WIDE_INT) 0x80000000);
8649 gcc_assert (hi + lo == offset);
8653 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8655 /* Be careful not to destroy OUTVAL. */
8656 if (reg_overlap_mentioned_p (base_plus, outval))
8658 /* Updating base_plus might destroy outval, see if we
8659 can swap the scratch and base_plus. */
8660 if (!reg_overlap_mentioned_p (scratch, outval))
8663 scratch = base_plus;
8668 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8670 /* Be conservative and copy outval into scratch now,
8671 this should only be necessary if outval is a
8672 subreg of something larger than a word. */
8673 /* XXX Might this clobber base? I can't see how it
8674 can, since scratch is known to overlap with
8676 emit_insn (gen_movhi (scratch_hi, outval));
8677 outval = scratch_hi;
8681 /* Get the base address; addsi3 knows how to handle constants
8682 that require more than one insn. */
8683 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8689 if (BYTES_BIG_ENDIAN)
8691 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8692 plus_constant (base, offset + 1)),
8693 gen_lowpart (QImode, outval)));
8694 emit_insn (gen_lshrsi3 (scratch,
8695 gen_rtx_SUBREG (SImode, outval, 0),
8697 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8698 gen_lowpart (QImode, scratch)));
8702 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8703 gen_lowpart (QImode, outval)));
8704 emit_insn (gen_lshrsi3 (scratch,
8705 gen_rtx_SUBREG (SImode, outval, 0),
8707 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8708 plus_constant (base, offset + 1)),
8709 gen_lowpart (QImode, scratch)));
8713 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8714 (padded to the size of a word) should be passed in a register. */
8717 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8719 if (TARGET_AAPCS_BASED)
8720 return must_pass_in_stack_var_size (mode, type);
8722 return must_pass_in_stack_var_size_or_pad (mode, type);
8726 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8727 Return true if an argument passed on the stack should be padded upwards,
8728 i.e. if the least-significant byte has useful data.
8729 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8730 aggregate types are placed in the lowest memory address. */
8733 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8735 if (!TARGET_AAPCS_BASED)
8736 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8738 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8745 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8746 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8747 byte of the register has useful data, and return the opposite if the
8748 most significant byte does.
8749 For AAPCS, small aggregates and small complex types are always padded
8753 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8754 tree type, int first ATTRIBUTE_UNUSED)
8756 if (TARGET_AAPCS_BASED
8758 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8759 && int_size_in_bytes (type) <= 4)
8762 /* Otherwise, use default padding. */
8763 return !BYTES_BIG_ENDIAN;
8767 /* Print a symbolic form of X to the debug file, F. */
8769 arm_print_value (FILE *f, rtx x)
8771 switch (GET_CODE (x))
8774 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8778 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8786 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8788 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8789 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8797 fprintf (f, "\"%s\"", XSTR (x, 0));
8801 fprintf (f, "`%s'", XSTR (x, 0));
8805 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8809 arm_print_value (f, XEXP (x, 0));
8813 arm_print_value (f, XEXP (x, 0));
8815 arm_print_value (f, XEXP (x, 1));
8823 fprintf (f, "????");
8828 /* Routines for manipulation of the constant pool. */
8830 /* Arm instructions cannot load a large constant directly into a
8831 register; they have to come from a pc relative load. The constant
8832 must therefore be placed in the addressable range of the pc
8833 relative load. Depending on the precise pc relative load
8834 instruction the range is somewhere between 256 bytes and 4k. This
8835 means that we often have to dump a constant inside a function, and
8836 generate code to branch around it.
8838 It is important to minimize this, since the branches will slow
8839 things down and make the code larger.
8841 Normally we can hide the table after an existing unconditional
8842 branch so that there is no interruption of the flow, but in the
8843 worst case the code looks like this:
8861 We fix this by performing a scan after scheduling, which notices
8862 which instructions need to have their operands fetched from the
8863 constant table and builds the table.
8865 The algorithm starts by building a table of all the constants that
8866 need fixing up and all the natural barriers in the function (places
8867 where a constant table can be dropped without breaking the flow).
8868 For each fixup we note how far the pc-relative replacement will be
8869 able to reach and the offset of the instruction into the function.
8871 Having built the table we then group the fixes together to form
8872 tables that are as large as possible (subject to addressing
8873 constraints) and emit each table of constants after the last
8874 barrier that is within range of all the instructions in the group.
8875 If a group does not contain a barrier, then we forcibly create one
8876 by inserting a jump instruction into the flow. Once the table has
8877 been inserted, the insns are then modified to reference the
8878 relevant entry in the pool.
8880 Possible enhancements to the algorithm (not implemented) are:
8882 1) For some processors and object formats, there may be benefit in
8883 aligning the pools to the start of cache lines; this alignment
8884 would need to be taken into account when calculating addressability
8887 /* These typedefs are located at the start of this file, so that
8888 they can be used in the prototypes there. This comment is to
8889 remind readers of that fact so that the following structures
8890 can be understood more easily.
8892 typedef struct minipool_node Mnode;
8893 typedef struct minipool_fixup Mfix; */
8895 struct minipool_node
8897 /* Doubly linked chain of entries. */
8900 /* The maximum offset into the code that this entry can be placed. While
8901 pushing fixes for forward references, all entries are sorted in order
8902 of increasing max_address. */
8903 HOST_WIDE_INT max_address;
8904 /* Similarly for an entry inserted for a backwards ref. */
8905 HOST_WIDE_INT min_address;
8906 /* The number of fixes referencing this entry. This can become zero
8907 if we "unpush" an entry. In this case we ignore the entry when we
8908 come to emit the code. */
8910 /* The offset from the start of the minipool. */
8911 HOST_WIDE_INT offset;
8912 /* The value in table. */
8914 /* The mode of value. */
8915 enum machine_mode mode;
8916 /* The size of the value. With iWMMXt enabled
8917 sizes > 4 also imply an alignment of 8-bytes. */
8921 struct minipool_fixup
8925 HOST_WIDE_INT address;
8927 enum machine_mode mode;
8931 HOST_WIDE_INT forwards;
8932 HOST_WIDE_INT backwards;
8935 /* Fixes less than a word need padding out to a word boundary. */
8936 #define MINIPOOL_FIX_SIZE(mode) \
8937 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8939 static Mnode * minipool_vector_head;
8940 static Mnode * minipool_vector_tail;
8941 static rtx minipool_vector_label;
8942 static int minipool_pad;
8944 /* The linked list of all minipool fixes required for this function. */
8945 Mfix * minipool_fix_head;
8946 Mfix * minipool_fix_tail;
8947 /* The fix entry for the current minipool, once it has been placed. */
8948 Mfix * minipool_barrier;
8950 /* Determines if INSN is the start of a jump table. Returns the end
8951 of the TABLE or NULL_RTX. */
8953 is_jump_table (rtx insn)
8957 if (GET_CODE (insn) == JUMP_INSN
8958 && JUMP_LABEL (insn) != NULL
8959 && ((table = next_real_insn (JUMP_LABEL (insn)))
8960 == next_real_insn (insn))
8962 && GET_CODE (table) == JUMP_INSN
8963 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8964 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8970 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8971 #define JUMP_TABLES_IN_TEXT_SECTION 0
8974 static HOST_WIDE_INT
8975 get_jump_table_size (rtx insn)
8977 /* ADDR_VECs only take room if read-only data does into the text
8979 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8981 rtx body = PATTERN (insn);
8982 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8984 HOST_WIDE_INT modesize;
8986 modesize = GET_MODE_SIZE (GET_MODE (body));
8987 size = modesize * XVECLEN (body, elt);
8991 /* Round up size of TBB table to a halfword boundary. */
8992 size = (size + 1) & ~(HOST_WIDE_INT)1;
8995 /* No padding necessary for TBH. */
8998 /* Add two bytes for alignment on Thumb. */
9011 /* Move a minipool fix MP from its current location to before MAX_MP.
9012 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9013 constraints may need updating. */
9015 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9016 HOST_WIDE_INT max_address)
9018 /* The code below assumes these are different. */
9019 gcc_assert (mp != max_mp);
9023 if (max_address < mp->max_address)
9024 mp->max_address = max_address;
9028 if (max_address > max_mp->max_address - mp->fix_size)
9029 mp->max_address = max_mp->max_address - mp->fix_size;
9031 mp->max_address = max_address;
9033 /* Unlink MP from its current position. Since max_mp is non-null,
9034 mp->prev must be non-null. */
9035 mp->prev->next = mp->next;
9036 if (mp->next != NULL)
9037 mp->next->prev = mp->prev;
9039 minipool_vector_tail = mp->prev;
9041 /* Re-insert it before MAX_MP. */
9043 mp->prev = max_mp->prev;
9046 if (mp->prev != NULL)
9047 mp->prev->next = mp;
9049 minipool_vector_head = mp;
9052 /* Save the new entry. */
9055 /* Scan over the preceding entries and adjust their addresses as
9057 while (mp->prev != NULL
9058 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9060 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9067 /* Add a constant to the minipool for a forward reference. Returns the
9068 node added or NULL if the constant will not fit in this pool. */
9070 add_minipool_forward_ref (Mfix *fix)
9072 /* If set, max_mp is the first pool_entry that has a lower
9073 constraint than the one we are trying to add. */
9074 Mnode * max_mp = NULL;
9075 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9078 /* If the minipool starts before the end of FIX->INSN then this FIX
9079 can not be placed into the current pool. Furthermore, adding the
9080 new constant pool entry may cause the pool to start FIX_SIZE bytes
9082 if (minipool_vector_head &&
9083 (fix->address + get_attr_length (fix->insn)
9084 >= minipool_vector_head->max_address - fix->fix_size))
9087 /* Scan the pool to see if a constant with the same value has
9088 already been added. While we are doing this, also note the
9089 location where we must insert the constant if it doesn't already
9091 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9093 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9094 && fix->mode == mp->mode
9095 && (GET_CODE (fix->value) != CODE_LABEL
9096 || (CODE_LABEL_NUMBER (fix->value)
9097 == CODE_LABEL_NUMBER (mp->value)))
9098 && rtx_equal_p (fix->value, mp->value))
9100 /* More than one fix references this entry. */
9102 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9105 /* Note the insertion point if necessary. */
9107 && mp->max_address > max_address)
9110 /* If we are inserting an 8-bytes aligned quantity and
9111 we have not already found an insertion point, then
9112 make sure that all such 8-byte aligned quantities are
9113 placed at the start of the pool. */
9114 if (ARM_DOUBLEWORD_ALIGN
9116 && fix->fix_size >= 8
9117 && mp->fix_size < 8)
9120 max_address = mp->max_address;
9124 /* The value is not currently in the minipool, so we need to create
9125 a new entry for it. If MAX_MP is NULL, the entry will be put on
9126 the end of the list since the placement is less constrained than
9127 any existing entry. Otherwise, we insert the new fix before
9128 MAX_MP and, if necessary, adjust the constraints on the other
9131 mp->fix_size = fix->fix_size;
9132 mp->mode = fix->mode;
9133 mp->value = fix->value;
9135 /* Not yet required for a backwards ref. */
9136 mp->min_address = -65536;
9140 mp->max_address = max_address;
9142 mp->prev = minipool_vector_tail;
9144 if (mp->prev == NULL)
9146 minipool_vector_head = mp;
9147 minipool_vector_label = gen_label_rtx ();
9150 mp->prev->next = mp;
9152 minipool_vector_tail = mp;
9156 if (max_address > max_mp->max_address - mp->fix_size)
9157 mp->max_address = max_mp->max_address - mp->fix_size;
9159 mp->max_address = max_address;
9162 mp->prev = max_mp->prev;
9164 if (mp->prev != NULL)
9165 mp->prev->next = mp;
9167 minipool_vector_head = mp;
9170 /* Save the new entry. */
9173 /* Scan over the preceding entries and adjust their addresses as
9175 while (mp->prev != NULL
9176 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9178 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9186 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9187 HOST_WIDE_INT min_address)
9189 HOST_WIDE_INT offset;
9191 /* The code below assumes these are different. */
9192 gcc_assert (mp != min_mp);
9196 if (min_address > mp->min_address)
9197 mp->min_address = min_address;
9201 /* We will adjust this below if it is too loose. */
9202 mp->min_address = min_address;
9204 /* Unlink MP from its current position. Since min_mp is non-null,
9205 mp->next must be non-null. */
9206 mp->next->prev = mp->prev;
9207 if (mp->prev != NULL)
9208 mp->prev->next = mp->next;
9210 minipool_vector_head = mp->next;
9212 /* Reinsert it after MIN_MP. */
9214 mp->next = min_mp->next;
9216 if (mp->next != NULL)
9217 mp->next->prev = mp;
9219 minipool_vector_tail = mp;
9225 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9227 mp->offset = offset;
9228 if (mp->refcount > 0)
9229 offset += mp->fix_size;
9231 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9232 mp->next->min_address = mp->min_address + mp->fix_size;
9238 /* Add a constant to the minipool for a backward reference. Returns the
9239 node added or NULL if the constant will not fit in this pool.
9241 Note that the code for insertion for a backwards reference can be
9242 somewhat confusing because the calculated offsets for each fix do
9243 not take into account the size of the pool (which is still under
9246 add_minipool_backward_ref (Mfix *fix)
9248 /* If set, min_mp is the last pool_entry that has a lower constraint
9249 than the one we are trying to add. */
9250 Mnode *min_mp = NULL;
9251 /* This can be negative, since it is only a constraint. */
9252 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9255 /* If we can't reach the current pool from this insn, or if we can't
9256 insert this entry at the end of the pool without pushing other
9257 fixes out of range, then we don't try. This ensures that we
9258 can't fail later on. */
9259 if (min_address >= minipool_barrier->address
9260 || (minipool_vector_tail->min_address + fix->fix_size
9261 >= minipool_barrier->address))
9264 /* Scan the pool to see if a constant with the same value has
9265 already been added. While we are doing this, also note the
9266 location where we must insert the constant if it doesn't already
9268 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9270 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9271 && fix->mode == mp->mode
9272 && (GET_CODE (fix->value) != CODE_LABEL
9273 || (CODE_LABEL_NUMBER (fix->value)
9274 == CODE_LABEL_NUMBER (mp->value)))
9275 && rtx_equal_p (fix->value, mp->value)
9276 /* Check that there is enough slack to move this entry to the
9277 end of the table (this is conservative). */
9279 > (minipool_barrier->address
9280 + minipool_vector_tail->offset
9281 + minipool_vector_tail->fix_size)))
9284 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9288 mp->min_address += fix->fix_size;
9291 /* Note the insertion point if necessary. */
9292 if (mp->min_address < min_address)
9294 /* For now, we do not allow the insertion of 8-byte alignment
9295 requiring nodes anywhere but at the start of the pool. */
9296 if (ARM_DOUBLEWORD_ALIGN
9297 && fix->fix_size >= 8 && mp->fix_size < 8)
9302 else if (mp->max_address
9303 < minipool_barrier->address + mp->offset + fix->fix_size)
9305 /* Inserting before this entry would push the fix beyond
9306 its maximum address (which can happen if we have
9307 re-located a forwards fix); force the new fix to come
9309 if (ARM_DOUBLEWORD_ALIGN
9310 && fix->fix_size >= 8 && mp->fix_size < 8)
9315 min_address = mp->min_address + fix->fix_size;
9318 /* Do not insert a non-8-byte aligned quantity before 8-byte
9319 aligned quantities. */
9320 else if (ARM_DOUBLEWORD_ALIGN
9321 && fix->fix_size < 8
9322 && mp->fix_size >= 8)
9325 min_address = mp->min_address + fix->fix_size;
9330 /* We need to create a new entry. */
9332 mp->fix_size = fix->fix_size;
9333 mp->mode = fix->mode;
9334 mp->value = fix->value;
9336 mp->max_address = minipool_barrier->address + 65536;
9338 mp->min_address = min_address;
9343 mp->next = minipool_vector_head;
9345 if (mp->next == NULL)
9347 minipool_vector_tail = mp;
9348 minipool_vector_label = gen_label_rtx ();
9351 mp->next->prev = mp;
9353 minipool_vector_head = mp;
9357 mp->next = min_mp->next;
9361 if (mp->next != NULL)
9362 mp->next->prev = mp;
9364 minipool_vector_tail = mp;
9367 /* Save the new entry. */
9375 /* Scan over the following entries and adjust their offsets. */
9376 while (mp->next != NULL)
9378 if (mp->next->min_address < mp->min_address + mp->fix_size)
9379 mp->next->min_address = mp->min_address + mp->fix_size;
9382 mp->next->offset = mp->offset + mp->fix_size;
9384 mp->next->offset = mp->offset;
9393 assign_minipool_offsets (Mfix *barrier)
9395 HOST_WIDE_INT offset = 0;
9398 minipool_barrier = barrier;
9400 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9402 mp->offset = offset;
9404 if (mp->refcount > 0)
9405 offset += mp->fix_size;
9409 /* Output the literal table */
9411 dump_minipool (rtx scan)
9417 if (ARM_DOUBLEWORD_ALIGN)
9418 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9419 if (mp->refcount > 0 && mp->fix_size >= 8)
9427 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9428 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9430 scan = emit_label_after (gen_label_rtx (), scan);
9431 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9432 scan = emit_label_after (minipool_vector_label, scan);
9434 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9436 if (mp->refcount > 0)
9441 ";; Offset %u, min %ld, max %ld ",
9442 (unsigned) mp->offset, (unsigned long) mp->min_address,
9443 (unsigned long) mp->max_address);
9444 arm_print_value (dump_file, mp->value);
9445 fputc ('\n', dump_file);
9448 switch (mp->fix_size)
9450 #ifdef HAVE_consttable_1
9452 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9456 #ifdef HAVE_consttable_2
9458 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9462 #ifdef HAVE_consttable_4
9464 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9468 #ifdef HAVE_consttable_8
9470 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9474 #ifdef HAVE_consttable_16
9476 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9489 minipool_vector_head = minipool_vector_tail = NULL;
9490 scan = emit_insn_after (gen_consttable_end (), scan);
9491 scan = emit_barrier_after (scan);
9494 /* Return the cost of forcibly inserting a barrier after INSN. */
9496 arm_barrier_cost (rtx insn)
9498 /* Basing the location of the pool on the loop depth is preferable,
9499 but at the moment, the basic block information seems to be
9500 corrupt by this stage of the compilation. */
9502 rtx next = next_nonnote_insn (insn);
9504 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9507 switch (GET_CODE (insn))
9510 /* It will always be better to place the table before the label, rather
9519 return base_cost - 10;
9522 return base_cost + 10;
9526 /* Find the best place in the insn stream in the range
9527 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9528 Create the barrier by inserting a jump and add a new fix entry for
9531 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9533 HOST_WIDE_INT count = 0;
9535 rtx from = fix->insn;
9536 /* The instruction after which we will insert the jump. */
9537 rtx selected = NULL;
9539 /* The address at which the jump instruction will be placed. */
9540 HOST_WIDE_INT selected_address;
9542 HOST_WIDE_INT max_count = max_address - fix->address;
9543 rtx label = gen_label_rtx ();
9545 selected_cost = arm_barrier_cost (from);
9546 selected_address = fix->address;
9548 while (from && count < max_count)
9553 /* This code shouldn't have been called if there was a natural barrier
9555 gcc_assert (GET_CODE (from) != BARRIER);
9557 /* Count the length of this insn. */
9558 count += get_attr_length (from);
9560 /* If there is a jump table, add its length. */
9561 tmp = is_jump_table (from);
9564 count += get_jump_table_size (tmp);
9566 /* Jump tables aren't in a basic block, so base the cost on
9567 the dispatch insn. If we select this location, we will
9568 still put the pool after the table. */
9569 new_cost = arm_barrier_cost (from);
9571 if (count < max_count
9572 && (!selected || new_cost <= selected_cost))
9575 selected_cost = new_cost;
9576 selected_address = fix->address + count;
9579 /* Continue after the dispatch table. */
9580 from = NEXT_INSN (tmp);
9584 new_cost = arm_barrier_cost (from);
9586 if (count < max_count
9587 && (!selected || new_cost <= selected_cost))
9590 selected_cost = new_cost;
9591 selected_address = fix->address + count;
9594 from = NEXT_INSN (from);
9597 /* Make sure that we found a place to insert the jump. */
9598 gcc_assert (selected);
9600 /* Create a new JUMP_INSN that branches around a barrier. */
9601 from = emit_jump_insn_after (gen_jump (label), selected);
9602 JUMP_LABEL (from) = label;
9603 barrier = emit_barrier_after (from);
9604 emit_label_after (label, barrier);
9606 /* Create a minipool barrier entry for the new barrier. */
9607 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9608 new_fix->insn = barrier;
9609 new_fix->address = selected_address;
9610 new_fix->next = fix->next;
9611 fix->next = new_fix;
9616 /* Record that there is a natural barrier in the insn stream at
9619 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9621 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9624 fix->address = address;
9627 if (minipool_fix_head != NULL)
9628 minipool_fix_tail->next = fix;
9630 minipool_fix_head = fix;
9632 minipool_fix_tail = fix;
9635 /* Record INSN, which will need fixing up to load a value from the
9636 minipool. ADDRESS is the offset of the insn since the start of the
9637 function; LOC is a pointer to the part of the insn which requires
9638 fixing; VALUE is the constant that must be loaded, which is of type
9641 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9642 enum machine_mode mode, rtx value)
9644 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9647 fix->address = address;
9650 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9652 fix->forwards = get_attr_pool_range (insn);
9653 fix->backwards = get_attr_neg_pool_range (insn);
9654 fix->minipool = NULL;
9656 /* If an insn doesn't have a range defined for it, then it isn't
9657 expecting to be reworked by this code. Better to stop now than
9658 to generate duff assembly code. */
9659 gcc_assert (fix->forwards || fix->backwards);
9661 /* If an entry requires 8-byte alignment then assume all constant pools
9662 require 4 bytes of padding. Trying to do this later on a per-pool
9663 basis is awkward because existing pool entries have to be modified. */
9664 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9670 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9671 GET_MODE_NAME (mode),
9672 INSN_UID (insn), (unsigned long) address,
9673 -1 * (long)fix->backwards, (long)fix->forwards);
9674 arm_print_value (dump_file, fix->value);
9675 fprintf (dump_file, "\n");
9678 /* Add it to the chain of fixes. */
9681 if (minipool_fix_head != NULL)
9682 minipool_fix_tail->next = fix;
9684 minipool_fix_head = fix;
9686 minipool_fix_tail = fix;
9689 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9690 Returns the number of insns needed, or 99 if we don't know how to
9693 arm_const_double_inline_cost (rtx val)
9695 rtx lowpart, highpart;
9696 enum machine_mode mode;
9698 mode = GET_MODE (val);
9700 if (mode == VOIDmode)
9703 gcc_assert (GET_MODE_SIZE (mode) == 8);
9705 lowpart = gen_lowpart (SImode, val);
9706 highpart = gen_highpart_mode (SImode, mode, val);
9708 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9709 gcc_assert (GET_CODE (highpart) == CONST_INT);
9711 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9712 NULL_RTX, NULL_RTX, 0, 0)
9713 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9714 NULL_RTX, NULL_RTX, 0, 0));
9717 /* Return true if it is worthwhile to split a 64-bit constant into two
9718 32-bit operations. This is the case if optimizing for size, or
9719 if we have load delay slots, or if one 32-bit part can be done with
9720 a single data operation. */
9722 arm_const_double_by_parts (rtx val)
9724 enum machine_mode mode = GET_MODE (val);
9727 if (optimize_size || arm_ld_sched)
9730 if (mode == VOIDmode)
9733 part = gen_highpart_mode (SImode, mode, val);
9735 gcc_assert (GET_CODE (part) == CONST_INT);
9737 if (const_ok_for_arm (INTVAL (part))
9738 || const_ok_for_arm (~INTVAL (part)))
9741 part = gen_lowpart (SImode, val);
9743 gcc_assert (GET_CODE (part) == CONST_INT);
9745 if (const_ok_for_arm (INTVAL (part))
9746 || const_ok_for_arm (~INTVAL (part)))
9752 /* Scan INSN and note any of its operands that need fixing.
9753 If DO_PUSHES is false we do not actually push any of the fixups
9754 needed. The function returns TRUE if any fixups were needed/pushed.
9755 This is used by arm_memory_load_p() which needs to know about loads
9756 of constants that will be converted into minipool loads. */
9758 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9760 bool result = false;
9763 extract_insn (insn);
9765 if (!constrain_operands (1))
9766 fatal_insn_not_found (insn);
9768 if (recog_data.n_alternatives == 0)
9771 /* Fill in recog_op_alt with information about the constraints of
9773 preprocess_constraints ();
9775 for (opno = 0; opno < recog_data.n_operands; opno++)
9777 /* Things we need to fix can only occur in inputs. */
9778 if (recog_data.operand_type[opno] != OP_IN)
9781 /* If this alternative is a memory reference, then any mention
9782 of constants in this alternative is really to fool reload
9783 into allowing us to accept one there. We need to fix them up
9784 now so that we output the right code. */
9785 if (recog_op_alt[opno][which_alternative].memory_ok)
9787 rtx op = recog_data.operand[opno];
9789 if (CONSTANT_P (op))
9792 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9793 recog_data.operand_mode[opno], op);
9796 else if (GET_CODE (op) == MEM
9797 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9798 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9802 rtx cop = avoid_constant_pool_reference (op);
9804 /* Casting the address of something to a mode narrower
9805 than a word can cause avoid_constant_pool_reference()
9806 to return the pool reference itself. That's no good to
9807 us here. Lets just hope that we can use the
9808 constant pool value directly. */
9810 cop = get_pool_constant (XEXP (op, 0));
9812 push_minipool_fix (insn, address,
9813 recog_data.operand_loc[opno],
9814 recog_data.operand_mode[opno], cop);
9825 /* Gcc puts the pool in the wrong place for ARM, since we can only
9826 load addresses a limited distance around the pc. We do some
9827 special munging to move the constant pool values to the correct
9828 point in the code. */
9833 HOST_WIDE_INT address = 0;
9836 minipool_fix_head = minipool_fix_tail = NULL;
9838 /* The first insn must always be a note, or the code below won't
9839 scan it properly. */
9840 insn = get_insns ();
9841 gcc_assert (GET_CODE (insn) == NOTE);
9844 /* Scan all the insns and record the operands that will need fixing. */
9845 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9847 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9848 && (arm_cirrus_insn_p (insn)
9849 || GET_CODE (insn) == JUMP_INSN
9850 || arm_memory_load_p (insn)))
9851 cirrus_reorg (insn);
9853 if (GET_CODE (insn) == BARRIER)
9854 push_minipool_barrier (insn, address);
9855 else if (INSN_P (insn))
9859 note_invalid_constants (insn, address, true);
9860 address += get_attr_length (insn);
9862 /* If the insn is a vector jump, add the size of the table
9863 and skip the table. */
9864 if ((table = is_jump_table (insn)) != NULL)
9866 address += get_jump_table_size (table);
9872 fix = minipool_fix_head;
9874 /* Now scan the fixups and perform the required changes. */
9879 Mfix * last_added_fix;
9880 Mfix * last_barrier = NULL;
9883 /* Skip any further barriers before the next fix. */
9884 while (fix && GET_CODE (fix->insn) == BARRIER)
9887 /* No more fixes. */
9891 last_added_fix = NULL;
9893 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9895 if (GET_CODE (ftmp->insn) == BARRIER)
9897 if (ftmp->address >= minipool_vector_head->max_address)
9900 last_barrier = ftmp;
9902 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9905 last_added_fix = ftmp; /* Keep track of the last fix added. */
9908 /* If we found a barrier, drop back to that; any fixes that we
9909 could have reached but come after the barrier will now go in
9910 the next mini-pool. */
9911 if (last_barrier != NULL)
9913 /* Reduce the refcount for those fixes that won't go into this
9915 for (fdel = last_barrier->next;
9916 fdel && fdel != ftmp;
9919 fdel->minipool->refcount--;
9920 fdel->minipool = NULL;
9923 ftmp = last_barrier;
9927 /* ftmp is first fix that we can't fit into this pool and
9928 there no natural barriers that we could use. Insert a
9929 new barrier in the code somewhere between the previous
9930 fix and this one, and arrange to jump around it. */
9931 HOST_WIDE_INT max_address;
9933 /* The last item on the list of fixes must be a barrier, so
9934 we can never run off the end of the list of fixes without
9935 last_barrier being set. */
9938 max_address = minipool_vector_head->max_address;
9939 /* Check that there isn't another fix that is in range that
9940 we couldn't fit into this pool because the pool was
9941 already too large: we need to put the pool before such an
9942 instruction. The pool itself may come just after the
9943 fix because create_fix_barrier also allows space for a
9944 jump instruction. */
9945 if (ftmp->address < max_address)
9946 max_address = ftmp->address + 1;
9948 last_barrier = create_fix_barrier (last_added_fix, max_address);
9951 assign_minipool_offsets (last_barrier);
9955 if (GET_CODE (ftmp->insn) != BARRIER
9956 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9963 /* Scan over the fixes we have identified for this pool, fixing them
9964 up and adding the constants to the pool itself. */
9965 for (this_fix = fix; this_fix && ftmp != this_fix;
9966 this_fix = this_fix->next)
9967 if (GET_CODE (this_fix->insn) != BARRIER)
9970 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9971 minipool_vector_label),
9972 this_fix->minipool->offset);
9973 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9976 dump_minipool (last_barrier->insn);
9980 /* From now on we must synthesize any constants that we can't handle
9981 directly. This can happen if the RTL gets split during final
9982 instruction generation. */
9983 after_arm_reorg = 1;
9985 /* Free the minipool memory. */
9986 obstack_free (&minipool_obstack, minipool_startobj);
9989 /* Routines to output assembly language. */
9991 /* If the rtx is the correct value then return the string of the number.
9992 In this way we can ensure that valid double constants are generated even
9993 when cross compiling. */
9995 fp_immediate_constant (rtx x)
10000 if (!fp_consts_inited)
10003 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10004 for (i = 0; i < 8; i++)
10005 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10006 return strings_fp[i];
10008 gcc_unreachable ();
10011 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10012 static const char *
10013 fp_const_from_val (REAL_VALUE_TYPE *r)
10017 if (!fp_consts_inited)
10020 for (i = 0; i < 8; i++)
10021 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10022 return strings_fp[i];
10024 gcc_unreachable ();
10027 /* Output the operands of a LDM/STM instruction to STREAM.
10028 MASK is the ARM register set mask of which only bits 0-15 are important.
10029 REG is the base register, either the frame pointer or the stack pointer,
10030 INSTR is the possibly suffixed load or store instruction.
10031 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10034 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10035 unsigned long mask, int rfe)
10038 bool not_first = FALSE;
10040 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10041 fputc ('\t', stream);
10042 asm_fprintf (stream, instr, reg);
10043 fputc ('{', stream);
10045 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10046 if (mask & (1 << i))
10049 fprintf (stream, ", ");
10051 asm_fprintf (stream, "%r", i);
10056 fprintf (stream, "}^\n");
10058 fprintf (stream, "}\n");
10062 /* Output a FLDMD instruction to STREAM.
10063 BASE if the register containing the address.
10064 REG and COUNT specify the register range.
10065 Extra registers may be added to avoid hardware bugs.
10067 We output FLDMD even for ARMv5 VFP implementations. Although
10068 FLDMD is technically not supported until ARMv6, it is believed
10069 that all VFP implementations support its use in this context. */
10072 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10076 /* Workaround ARM10 VFPr1 bug. */
10077 if (count == 2 && !arm_arch6)
10084 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10085 load into multiple parts if we have to handle more than 16 registers. */
10088 vfp_output_fldmd (stream, base, reg, 16);
10089 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10093 fputc ('\t', stream);
10094 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10096 for (i = reg; i < reg + count; i++)
10099 fputs (", ", stream);
10100 asm_fprintf (stream, "d%d", i);
10102 fputs ("}\n", stream);
10107 /* Output the assembly for a store multiple. */
10110 vfp_output_fstmd (rtx * operands)
10117 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10118 p = strlen (pattern);
10120 gcc_assert (GET_CODE (operands[1]) == REG);
10122 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10123 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10125 p += sprintf (&pattern[p], ", d%d", base + i);
10127 strcpy (&pattern[p], "}");
10129 output_asm_insn (pattern, operands);
10134 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10135 number of bytes pushed. */
10138 vfp_emit_fstmd (int base_reg, int count)
10145 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10146 register pairs are stored by a store multiple insn. We avoid this
10147 by pushing an extra pair. */
10148 if (count == 2 && !arm_arch6)
10150 if (base_reg == LAST_VFP_REGNUM - 3)
10155 /* FSTMD may not store more than 16 doubleword registers at once. Split
10156 larger stores into multiple parts (up to a maximum of two, in
10161 /* NOTE: base_reg is an internal register number, so each D register
10163 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10164 saved += vfp_emit_fstmd (base_reg, 16);
10168 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10169 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10171 reg = gen_rtx_REG (DFmode, base_reg);
10174 XVECEXP (par, 0, 0)
10175 = gen_rtx_SET (VOIDmode,
10176 gen_frame_mem (BLKmode,
10177 gen_rtx_PRE_DEC (BLKmode,
10178 stack_pointer_rtx)),
10179 gen_rtx_UNSPEC (BLKmode,
10180 gen_rtvec (1, reg),
10181 UNSPEC_PUSH_MULT));
10183 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10184 plus_constant (stack_pointer_rtx, -(count * 8)));
10185 RTX_FRAME_RELATED_P (tmp) = 1;
10186 XVECEXP (dwarf, 0, 0) = tmp;
10188 tmp = gen_rtx_SET (VOIDmode,
10189 gen_frame_mem (DFmode, stack_pointer_rtx),
10191 RTX_FRAME_RELATED_P (tmp) = 1;
10192 XVECEXP (dwarf, 0, 1) = tmp;
10194 for (i = 1; i < count; i++)
10196 reg = gen_rtx_REG (DFmode, base_reg);
10198 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10200 tmp = gen_rtx_SET (VOIDmode,
10201 gen_frame_mem (DFmode,
10202 plus_constant (stack_pointer_rtx,
10205 RTX_FRAME_RELATED_P (tmp) = 1;
10206 XVECEXP (dwarf, 0, i + 1) = tmp;
10209 par = emit_insn (par);
10210 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
10212 RTX_FRAME_RELATED_P (par) = 1;
10217 /* Emit a call instruction with pattern PAT. ADDR is the address of
10218 the call target. */
10221 arm_emit_call_insn (rtx pat, rtx addr)
10225 insn = emit_call_insn (pat);
10227 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10228 If the call might use such an entry, add a use of the PIC register
10229 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10230 if (TARGET_VXWORKS_RTP
10232 && GET_CODE (addr) == SYMBOL_REF
10233 && (SYMBOL_REF_DECL (addr)
10234 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10235 : !SYMBOL_REF_LOCAL_P (addr)))
10237 require_pic_register ();
10238 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10242 /* Output a 'call' insn. */
10244 output_call (rtx *operands)
10246 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10248 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10249 if (REGNO (operands[0]) == LR_REGNUM)
10251 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10252 output_asm_insn ("mov%?\t%0, %|lr", operands);
10255 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10257 if (TARGET_INTERWORK || arm_arch4t)
10258 output_asm_insn ("bx%?\t%0", operands);
10260 output_asm_insn ("mov%?\t%|pc, %0", operands);
10265 /* Output a 'call' insn that is a reference in memory. */
10267 output_call_mem (rtx *operands)
10269 if (TARGET_INTERWORK && !arm_arch5)
10271 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10272 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10273 output_asm_insn ("bx%?\t%|ip", operands);
10275 else if (regno_use_in (LR_REGNUM, operands[0]))
10277 /* LR is used in the memory address. We load the address in the
10278 first instruction. It's safe to use IP as the target of the
10279 load since the call will kill it anyway. */
10280 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10282 output_asm_insn ("blx%?\t%|ip", operands);
10285 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10287 output_asm_insn ("bx%?\t%|ip", operands);
10289 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10294 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10295 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10302 /* Output a move from arm registers to an fpa registers.
10303 OPERANDS[0] is an fpa register.
10304 OPERANDS[1] is the first registers of an arm register pair. */
10306 output_mov_long_double_fpa_from_arm (rtx *operands)
10308 int arm_reg0 = REGNO (operands[1]);
10311 gcc_assert (arm_reg0 != IP_REGNUM);
10313 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10314 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10315 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10317 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10318 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10323 /* Output a move from an fpa register to arm registers.
10324 OPERANDS[0] is the first registers of an arm register pair.
10325 OPERANDS[1] is an fpa register. */
10327 output_mov_long_double_arm_from_fpa (rtx *operands)
10329 int arm_reg0 = REGNO (operands[0]);
10332 gcc_assert (arm_reg0 != IP_REGNUM);
10334 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10335 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10336 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10338 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10339 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10343 /* Output a move from arm registers to arm registers of a long double
10344 OPERANDS[0] is the destination.
10345 OPERANDS[1] is the source. */
10347 output_mov_long_double_arm_from_arm (rtx *operands)
10349 /* We have to be careful here because the two might overlap. */
10350 int dest_start = REGNO (operands[0]);
10351 int src_start = REGNO (operands[1]);
10355 if (dest_start < src_start)
10357 for (i = 0; i < 3; i++)
10359 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10360 ops[1] = gen_rtx_REG (SImode, src_start + i);
10361 output_asm_insn ("mov%?\t%0, %1", ops);
10366 for (i = 2; i >= 0; i--)
10368 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10369 ops[1] = gen_rtx_REG (SImode, src_start + i);
10370 output_asm_insn ("mov%?\t%0, %1", ops);
10378 /* Emit a MOVW/MOVT pair. */
10379 void arm_emit_movpair (rtx dest, rtx src)
10381 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10382 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10386 /* Output a move from arm registers to an fpa registers.
10387 OPERANDS[0] is an fpa register.
10388 OPERANDS[1] is the first registers of an arm register pair. */
10390 output_mov_double_fpa_from_arm (rtx *operands)
10392 int arm_reg0 = REGNO (operands[1]);
10395 gcc_assert (arm_reg0 != IP_REGNUM);
10397 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10398 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10399 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10400 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10404 /* Output a move from an fpa register to arm registers.
10405 OPERANDS[0] is the first registers of an arm register pair.
10406 OPERANDS[1] is an fpa register. */
10408 output_mov_double_arm_from_fpa (rtx *operands)
10410 int arm_reg0 = REGNO (operands[0]);
10413 gcc_assert (arm_reg0 != IP_REGNUM);
10415 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10416 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10417 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10418 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10422 /* Output a move between double words.
10423 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10424 or MEM<-REG and all MEMs must be offsettable addresses. */
10426 output_move_double (rtx *operands)
10428 enum rtx_code code0 = GET_CODE (operands[0]);
10429 enum rtx_code code1 = GET_CODE (operands[1]);
10434 unsigned int reg0 = REGNO (operands[0]);
10436 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10438 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10440 switch (GET_CODE (XEXP (operands[1], 0)))
10444 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10445 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10447 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10451 gcc_assert (TARGET_LDRD);
10452 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10457 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10459 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10464 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10466 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10470 gcc_assert (TARGET_LDRD);
10471 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10476 /* Autoicrement addressing modes should never have overlapping
10477 base and destination registers, and overlapping index registers
10478 are already prohibited, so this doesn't need to worry about
10480 otherops[0] = operands[0];
10481 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10482 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10484 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10486 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10488 /* Registers overlap so split out the increment. */
10489 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10490 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10494 /* Use a single insn if we can.
10495 FIXME: IWMMXT allows offsets larger than ldrd can
10496 handle, fix these up with a pair of ldr. */
10498 || GET_CODE (otherops[2]) != CONST_INT
10499 || (INTVAL (otherops[2]) > -256
10500 && INTVAL (otherops[2]) < 256))
10501 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10504 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10505 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10511 /* Use a single insn if we can.
10512 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10513 fix these up with a pair of ldr. */
10515 || GET_CODE (otherops[2]) != CONST_INT
10516 || (INTVAL (otherops[2]) > -256
10517 && INTVAL (otherops[2]) < 256))
10518 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10521 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10522 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10529 /* We might be able to use ldrd %0, %1 here. However the range is
10530 different to ldr/adr, and it is broken on some ARMv7-M
10531 implementations. */
10532 /* Use the second register of the pair to avoid problematic
10534 otherops[1] = operands[1];
10535 output_asm_insn ("adr%?\t%0, %1", otherops);
10536 operands[1] = otherops[0];
10538 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10540 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10543 /* ??? This needs checking for thumb2. */
10545 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10546 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10548 otherops[0] = operands[0];
10549 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10550 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10552 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10554 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10556 switch ((int) INTVAL (otherops[2]))
10559 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10564 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10569 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10573 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10574 operands[1] = otherops[0];
10576 && (GET_CODE (otherops[2]) == REG
10578 || (GET_CODE (otherops[2]) == CONST_INT
10579 && INTVAL (otherops[2]) > -256
10580 && INTVAL (otherops[2]) < 256)))
10582 if (reg_overlap_mentioned_p (operands[0],
10586 /* Swap base and index registers over to
10587 avoid a conflict. */
10589 otherops[1] = otherops[2];
10592 /* If both registers conflict, it will usually
10593 have been fixed by a splitter. */
10594 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10595 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10597 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10598 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10602 otherops[0] = operands[0];
10603 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10608 if (GET_CODE (otherops[2]) == CONST_INT)
10610 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10611 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10613 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10616 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10619 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10622 return "ldr%(d%)\t%0, [%1]";
10624 return "ldm%(ia%)\t%1, %M0";
10628 otherops[1] = adjust_address (operands[1], SImode, 4);
10629 /* Take care of overlapping base/data reg. */
10630 if (reg_mentioned_p (operands[0], operands[1]))
10632 output_asm_insn ("ldr%?\t%0, %1", otherops);
10633 output_asm_insn ("ldr%?\t%0, %1", operands);
10637 output_asm_insn ("ldr%?\t%0, %1", operands);
10638 output_asm_insn ("ldr%?\t%0, %1", otherops);
10645 /* Constraints should ensure this. */
10646 gcc_assert (code0 == MEM && code1 == REG);
10647 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10649 switch (GET_CODE (XEXP (operands[0], 0)))
10653 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10655 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10659 gcc_assert (TARGET_LDRD);
10660 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10665 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10667 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10672 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10674 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10678 gcc_assert (TARGET_LDRD);
10679 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10684 otherops[0] = operands[1];
10685 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10686 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10688 /* IWMMXT allows offsets larger than ldrd can handle,
10689 fix these up with a pair of ldr. */
10691 && GET_CODE (otherops[2]) == CONST_INT
10692 && (INTVAL(otherops[2]) <= -256
10693 || INTVAL(otherops[2]) >= 256))
10695 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10697 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10698 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10702 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10703 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10706 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10707 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10709 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10713 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10714 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10716 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10719 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10725 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10731 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10736 && (GET_CODE (otherops[2]) == REG
10738 || (GET_CODE (otherops[2]) == CONST_INT
10739 && INTVAL (otherops[2]) > -256
10740 && INTVAL (otherops[2]) < 256)))
10742 otherops[0] = operands[1];
10743 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10744 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10750 otherops[0] = adjust_address (operands[0], SImode, 4);
10751 otherops[1] = operands[1];
10752 output_asm_insn ("str%?\t%1, %0", operands);
10753 output_asm_insn ("str%?\t%H1, %0", otherops);
10760 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10761 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10764 output_move_quad (rtx *operands)
10766 if (REG_P (operands[0]))
10768 /* Load, or reg->reg move. */
10770 if (MEM_P (operands[1]))
10772 switch (GET_CODE (XEXP (operands[1], 0)))
10775 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10780 output_asm_insn ("adr%?\t%0, %1", operands);
10781 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10785 gcc_unreachable ();
10793 gcc_assert (REG_P (operands[1]));
10795 dest = REGNO (operands[0]);
10796 src = REGNO (operands[1]);
10798 /* This seems pretty dumb, but hopefully GCC won't try to do it
10801 for (i = 0; i < 4; i++)
10803 ops[0] = gen_rtx_REG (SImode, dest + i);
10804 ops[1] = gen_rtx_REG (SImode, src + i);
10805 output_asm_insn ("mov%?\t%0, %1", ops);
10808 for (i = 3; i >= 0; i--)
10810 ops[0] = gen_rtx_REG (SImode, dest + i);
10811 ops[1] = gen_rtx_REG (SImode, src + i);
10812 output_asm_insn ("mov%?\t%0, %1", ops);
10818 gcc_assert (MEM_P (operands[0]));
10819 gcc_assert (REG_P (operands[1]));
10820 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10822 switch (GET_CODE (XEXP (operands[0], 0)))
10825 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10829 gcc_unreachable ();
10836 /* Output a VFP load or store instruction. */
10839 output_move_vfp (rtx *operands)
10841 rtx reg, mem, addr, ops[2];
10842 int load = REG_P (operands[0]);
10843 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10844 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10847 enum machine_mode mode;
10849 reg = operands[!load];
10850 mem = operands[load];
10852 mode = GET_MODE (reg);
10854 gcc_assert (REG_P (reg));
10855 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10856 gcc_assert (mode == SFmode
10860 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10861 gcc_assert (MEM_P (mem));
10863 addr = XEXP (mem, 0);
10865 switch (GET_CODE (addr))
10868 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10869 ops[0] = XEXP (addr, 0);
10874 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10875 ops[0] = XEXP (addr, 0);
10880 templ = "f%s%c%%?\t%%%s0, %%1%s";
10886 sprintf (buff, templ,
10887 load ? "ld" : "st",
10890 integer_p ? "\t%@ int" : "");
10891 output_asm_insn (buff, ops);
10896 /* Output a Neon quad-word load or store, or a load or store for
10897 larger structure modes.
10899 WARNING: The ordering of elements is weird in big-endian mode,
10900 because we use VSTM, as required by the EABI. GCC RTL defines
10901 element ordering based on in-memory order. This can be differ
10902 from the architectural ordering of elements within a NEON register.
10903 The intrinsics defined in arm_neon.h use the NEON register element
10904 ordering, not the GCC RTL element ordering.
10906 For example, the in-memory ordering of a big-endian a quadword
10907 vector with 16-bit elements when stored from register pair {d0,d1}
10908 will be (lowest address first, d0[N] is NEON register element N):
10910 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
10912 When necessary, quadword registers (dN, dN+1) are moved to ARM
10913 registers from rN in the order:
10915 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10917 So that STM/LDM can be used on vectors in ARM registers, and the
10918 same memory layout will result as if VSTM/VLDM were used. */
10921 output_move_neon (rtx *operands)
10923 rtx reg, mem, addr, ops[2];
10924 int regno, load = REG_P (operands[0]);
10927 enum machine_mode mode;
10929 reg = operands[!load];
10930 mem = operands[load];
10932 mode = GET_MODE (reg);
10934 gcc_assert (REG_P (reg));
10935 regno = REGNO (reg);
10936 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10937 || NEON_REGNO_OK_FOR_QUAD (regno));
10938 gcc_assert (VALID_NEON_DREG_MODE (mode)
10939 || VALID_NEON_QREG_MODE (mode)
10940 || VALID_NEON_STRUCT_MODE (mode));
10941 gcc_assert (MEM_P (mem));
10943 addr = XEXP (mem, 0);
10945 /* Strip off const from addresses like (const (plus (...))). */
10946 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10947 addr = XEXP (addr, 0);
10949 switch (GET_CODE (addr))
10952 templ = "v%smia%%?\t%%0!, %%h1";
10953 ops[0] = XEXP (addr, 0);
10958 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10959 gcc_unreachable ();
10964 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10967 for (i = 0; i < nregs; i++)
10969 /* We're only using DImode here because it's a convenient size. */
10970 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10971 ops[1] = adjust_address (mem, SImode, 8 * i);
10972 if (reg_overlap_mentioned_p (ops[0], mem))
10974 gcc_assert (overlap == -1);
10979 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10980 output_asm_insn (buff, ops);
10985 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10986 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10987 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10988 output_asm_insn (buff, ops);
10995 templ = "v%smia%%?\t%%m0, %%h1";
11000 sprintf (buff, templ, load ? "ld" : "st");
11001 output_asm_insn (buff, ops);
11006 /* Output an ADD r, s, #n where n may be too big for one instruction.
11007 If adding zero to one register, output nothing. */
11009 output_add_immediate (rtx *operands)
11011 HOST_WIDE_INT n = INTVAL (operands[2]);
11013 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11016 output_multi_immediate (operands,
11017 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11020 output_multi_immediate (operands,
11021 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11028 /* Output a multiple immediate operation.
11029 OPERANDS is the vector of operands referred to in the output patterns.
11030 INSTR1 is the output pattern to use for the first constant.
11031 INSTR2 is the output pattern to use for subsequent constants.
11032 IMMED_OP is the index of the constant slot in OPERANDS.
11033 N is the constant value. */
11034 static const char *
11035 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11036 int immed_op, HOST_WIDE_INT n)
11038 #if HOST_BITS_PER_WIDE_INT > 32
11044 /* Quick and easy output. */
11045 operands[immed_op] = const0_rtx;
11046 output_asm_insn (instr1, operands);
11051 const char * instr = instr1;
11053 /* Note that n is never zero here (which would give no output). */
11054 for (i = 0; i < 32; i += 2)
11058 operands[immed_op] = GEN_INT (n & (255 << i));
11059 output_asm_insn (instr, operands);
11069 /* Return the name of a shifter operation. */
11070 static const char *
11071 arm_shift_nmem(enum rtx_code code)
11076 return ARM_LSL_NAME;
11092 /* Return the appropriate ARM instruction for the operation code.
11093 The returned result should not be overwritten. OP is the rtx of the
11094 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11097 arithmetic_instr (rtx op, int shift_first_arg)
11099 switch (GET_CODE (op))
11105 return shift_first_arg ? "rsb" : "sub";
11120 return arm_shift_nmem(GET_CODE(op));
11123 gcc_unreachable ();
11127 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11128 for the operation code. The returned result should not be overwritten.
11129 OP is the rtx code of the shift.
11130 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11132 static const char *
11133 shift_op (rtx op, HOST_WIDE_INT *amountp)
11136 enum rtx_code code = GET_CODE (op);
11138 switch (GET_CODE (XEXP (op, 1)))
11146 *amountp = INTVAL (XEXP (op, 1));
11150 gcc_unreachable ();
11156 gcc_assert (*amountp != -1);
11157 *amountp = 32 - *amountp;
11160 /* Fall through. */
11166 mnem = arm_shift_nmem(code);
11170 /* We never have to worry about the amount being other than a
11171 power of 2, since this case can never be reloaded from a reg. */
11172 gcc_assert (*amountp != -1);
11173 *amountp = int_log2 (*amountp);
11174 return ARM_LSL_NAME;
11177 gcc_unreachable ();
11180 if (*amountp != -1)
11182 /* This is not 100% correct, but follows from the desire to merge
11183 multiplication by a power of 2 with the recognizer for a
11184 shift. >=32 is not a valid shift for "lsl", so we must try and
11185 output a shift that produces the correct arithmetical result.
11186 Using lsr #32 is identical except for the fact that the carry bit
11187 is not set correctly if we set the flags; but we never use the
11188 carry bit from such an operation, so we can ignore that. */
11189 if (code == ROTATERT)
11190 /* Rotate is just modulo 32. */
11192 else if (*amountp != (*amountp & 31))
11194 if (code == ASHIFT)
11199 /* Shifts of 0 are no-ops. */
11207 /* Obtain the shift from the POWER of two. */
11209 static HOST_WIDE_INT
11210 int_log2 (HOST_WIDE_INT power)
11212 HOST_WIDE_INT shift = 0;
11214 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11216 gcc_assert (shift <= 31);
11223 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11224 because /bin/as is horribly restrictive. The judgement about
11225 whether or not each character is 'printable' (and can be output as
11226 is) or not (and must be printed with an octal escape) must be made
11227 with reference to the *host* character set -- the situation is
11228 similar to that discussed in the comments above pp_c_char in
11229 c-pretty-print.c. */
11231 #define MAX_ASCII_LEN 51
11234 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11237 int len_so_far = 0;
11239 fputs ("\t.ascii\t\"", stream);
11241 for (i = 0; i < len; i++)
11245 if (len_so_far >= MAX_ASCII_LEN)
11247 fputs ("\"\n\t.ascii\t\"", stream);
11253 if (c == '\\' || c == '\"')
11255 putc ('\\', stream);
11263 fprintf (stream, "\\%03o", c);
11268 fputs ("\"\n", stream);
11271 /* Compute the register save mask for registers 0 through 12
11272 inclusive. This code is used by arm_compute_save_reg_mask. */
11274 static unsigned long
11275 arm_compute_save_reg0_reg12_mask (void)
11277 unsigned long func_type = arm_current_func_type ();
11278 unsigned long save_reg_mask = 0;
11281 if (IS_INTERRUPT (func_type))
11283 unsigned int max_reg;
11284 /* Interrupt functions must not corrupt any registers,
11285 even call clobbered ones. If this is a leaf function
11286 we can just examine the registers used by the RTL, but
11287 otherwise we have to assume that whatever function is
11288 called might clobber anything, and so we have to save
11289 all the call-clobbered registers as well. */
11290 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11291 /* FIQ handlers have registers r8 - r12 banked, so
11292 we only need to check r0 - r7, Normal ISRs only
11293 bank r14 and r15, so we must check up to r12.
11294 r13 is the stack pointer which is always preserved,
11295 so we do not need to consider it here. */
11300 for (reg = 0; reg <= max_reg; reg++)
11301 if (df_regs_ever_live_p (reg)
11302 || (! current_function_is_leaf && call_used_regs[reg]))
11303 save_reg_mask |= (1 << reg);
11305 /* Also save the pic base register if necessary. */
11307 && !TARGET_SINGLE_PIC_BASE
11308 && arm_pic_register != INVALID_REGNUM
11309 && crtl->uses_pic_offset_table)
11310 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11314 /* In the normal case we only need to save those registers
11315 which are call saved and which are used by this function. */
11316 for (reg = 0; reg <= 11; reg++)
11317 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11318 save_reg_mask |= (1 << reg);
11320 /* Handle the frame pointer as a special case. */
11321 if (frame_pointer_needed)
11322 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11324 /* If we aren't loading the PIC register,
11325 don't stack it even though it may be live. */
11327 && !TARGET_SINGLE_PIC_BASE
11328 && arm_pic_register != INVALID_REGNUM
11329 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11330 || crtl->uses_pic_offset_table))
11331 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11333 /* The prologue will copy SP into R0, so save it. */
11334 if (IS_STACKALIGN (func_type))
11335 save_reg_mask |= 1;
11338 /* Save registers so the exception handler can modify them. */
11339 if (crtl->calls_eh_return)
11345 reg = EH_RETURN_DATA_REGNO (i);
11346 if (reg == INVALID_REGNUM)
11348 save_reg_mask |= 1 << reg;
11352 return save_reg_mask;
11356 /* Compute the number of bytes used to store the static chain register on the
11357 stack, above the stack frame. We need to know this accurately to get the
11358 alignment of the rest of the stack frame correct. */
11360 static int arm_compute_static_chain_stack_bytes (void)
11362 unsigned long func_type = arm_current_func_type ();
11363 int static_chain_stack_bytes = 0;
11365 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11366 IS_NESTED (func_type) &&
11367 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11368 static_chain_stack_bytes = 4;
11370 return static_chain_stack_bytes;
11374 /* Compute a bit mask of which registers need to be
11375 saved on the stack for the current function.
11376 This is used by arm_get_frame_offsets, which may add extra registers. */
11378 static unsigned long
11379 arm_compute_save_reg_mask (void)
11381 unsigned int save_reg_mask = 0;
11382 unsigned long func_type = arm_current_func_type ();
11385 if (IS_NAKED (func_type))
11386 /* This should never really happen. */
11389 /* If we are creating a stack frame, then we must save the frame pointer,
11390 IP (which will hold the old stack pointer), LR and the PC. */
11391 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11393 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11396 | (1 << PC_REGNUM);
11398 /* Volatile functions do not return, so there
11399 is no need to save any other registers. */
11400 if (IS_VOLATILE (func_type))
11401 return save_reg_mask;
11403 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11405 /* Decide if we need to save the link register.
11406 Interrupt routines have their own banked link register,
11407 so they never need to save it.
11408 Otherwise if we do not use the link register we do not need to save
11409 it. If we are pushing other registers onto the stack however, we
11410 can save an instruction in the epilogue by pushing the link register
11411 now and then popping it back into the PC. This incurs extra memory
11412 accesses though, so we only do it when optimizing for size, and only
11413 if we know that we will not need a fancy return sequence. */
11414 if (df_regs_ever_live_p (LR_REGNUM)
11417 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11418 && !crtl->calls_eh_return))
11419 save_reg_mask |= 1 << LR_REGNUM;
11421 if (cfun->machine->lr_save_eliminated)
11422 save_reg_mask &= ~ (1 << LR_REGNUM);
11424 if (TARGET_REALLY_IWMMXT
11425 && ((bit_count (save_reg_mask)
11426 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11427 arm_compute_static_chain_stack_bytes())
11430 /* The total number of registers that are going to be pushed
11431 onto the stack is odd. We need to ensure that the stack
11432 is 64-bit aligned before we start to save iWMMXt registers,
11433 and also before we start to create locals. (A local variable
11434 might be a double or long long which we will load/store using
11435 an iWMMXt instruction). Therefore we need to push another
11436 ARM register, so that the stack will be 64-bit aligned. We
11437 try to avoid using the arg registers (r0 -r3) as they might be
11438 used to pass values in a tail call. */
11439 for (reg = 4; reg <= 12; reg++)
11440 if ((save_reg_mask & (1 << reg)) == 0)
11444 save_reg_mask |= (1 << reg);
11447 cfun->machine->sibcall_blocked = 1;
11448 save_reg_mask |= (1 << 3);
11452 /* We may need to push an additional register for use initializing the
11453 PIC base register. */
11454 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11455 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11457 reg = thumb_find_work_register (1 << 4);
11458 if (!call_used_regs[reg])
11459 save_reg_mask |= (1 << reg);
11462 return save_reg_mask;
11466 /* Compute a bit mask of which registers need to be
11467 saved on the stack for the current function. */
11468 static unsigned long
11469 thumb1_compute_save_reg_mask (void)
11471 unsigned long mask;
11475 for (reg = 0; reg < 12; reg ++)
11476 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11480 && !TARGET_SINGLE_PIC_BASE
11481 && arm_pic_register != INVALID_REGNUM
11482 && crtl->uses_pic_offset_table)
11483 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11485 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11486 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11487 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11489 /* LR will also be pushed if any lo regs are pushed. */
11490 if (mask & 0xff || thumb_force_lr_save ())
11491 mask |= (1 << LR_REGNUM);
11493 /* Make sure we have a low work register if we need one.
11494 We will need one if we are going to push a high register,
11495 but we are not currently intending to push a low register. */
11496 if ((mask & 0xff) == 0
11497 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11499 /* Use thumb_find_work_register to choose which register
11500 we will use. If the register is live then we will
11501 have to push it. Use LAST_LO_REGNUM as our fallback
11502 choice for the register to select. */
11503 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11504 /* Make sure the register returned by thumb_find_work_register is
11505 not part of the return value. */
11506 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11507 reg = LAST_LO_REGNUM;
11509 if (! call_used_regs[reg])
11513 /* The 504 below is 8 bytes less than 512 because there are two possible
11514 alignment words. We can't tell here if they will be present or not so we
11515 have to play it safe and assume that they are. */
11516 if ((CALLER_INTERWORKING_SLOT_SIZE +
11517 ROUND_UP_WORD (get_frame_size ()) +
11518 crtl->outgoing_args_size) >= 504)
11520 /* This is the same as the code in thumb1_expand_prologue() which
11521 determines which register to use for stack decrement. */
11522 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11523 if (mask & (1 << reg))
11526 if (reg > LAST_LO_REGNUM)
11528 /* Make sure we have a register available for stack decrement. */
11529 mask |= 1 << LAST_LO_REGNUM;
11537 /* Return the number of bytes required to save VFP registers. */
11539 arm_get_vfp_saved_size (void)
11541 unsigned int regno;
11546 /* Space for saved VFP registers. */
11547 if (TARGET_HARD_FLOAT && TARGET_VFP)
11550 for (regno = FIRST_VFP_REGNUM;
11551 regno < LAST_VFP_REGNUM;
11554 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11555 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11559 /* Workaround ARM10 VFPr1 bug. */
11560 if (count == 2 && !arm_arch6)
11562 saved += count * 8;
11571 if (count == 2 && !arm_arch6)
11573 saved += count * 8;
11580 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11581 everything bar the final return instruction. */
11583 output_return_instruction (rtx operand, int really_return, int reverse)
11585 char conditional[10];
11588 unsigned long live_regs_mask;
11589 unsigned long func_type;
11590 arm_stack_offsets *offsets;
11592 func_type = arm_current_func_type ();
11594 if (IS_NAKED (func_type))
11597 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11599 /* If this function was declared non-returning, and we have
11600 found a tail call, then we have to trust that the called
11601 function won't return. */
11606 /* Otherwise, trap an attempted return by aborting. */
11608 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11610 assemble_external_libcall (ops[1]);
11611 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11617 gcc_assert (!cfun->calls_alloca || really_return);
11619 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11621 return_used_this_function = 1;
11623 offsets = arm_get_frame_offsets ();
11624 live_regs_mask = offsets->saved_regs_mask;
11626 if (live_regs_mask)
11628 const char * return_reg;
11630 /* If we do not have any special requirements for function exit
11631 (e.g. interworking) then we can load the return address
11632 directly into the PC. Otherwise we must load it into LR. */
11634 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11635 return_reg = reg_names[PC_REGNUM];
11637 return_reg = reg_names[LR_REGNUM];
11639 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11641 /* There are three possible reasons for the IP register
11642 being saved. 1) a stack frame was created, in which case
11643 IP contains the old stack pointer, or 2) an ISR routine
11644 corrupted it, or 3) it was saved to align the stack on
11645 iWMMXt. In case 1, restore IP into SP, otherwise just
11647 if (frame_pointer_needed)
11649 live_regs_mask &= ~ (1 << IP_REGNUM);
11650 live_regs_mask |= (1 << SP_REGNUM);
11653 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11656 /* On some ARM architectures it is faster to use LDR rather than
11657 LDM to load a single register. On other architectures, the
11658 cost is the same. In 26 bit mode, or for exception handlers,
11659 we have to use LDM to load the PC so that the CPSR is also
11661 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11662 if (live_regs_mask == (1U << reg))
11665 if (reg <= LAST_ARM_REGNUM
11666 && (reg != LR_REGNUM
11668 || ! IS_INTERRUPT (func_type)))
11670 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11671 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11678 /* Generate the load multiple instruction to restore the
11679 registers. Note we can get here, even if
11680 frame_pointer_needed is true, but only if sp already
11681 points to the base of the saved core registers. */
11682 if (live_regs_mask & (1 << SP_REGNUM))
11684 unsigned HOST_WIDE_INT stack_adjust;
11686 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11687 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11689 if (stack_adjust && arm_arch5 && TARGET_ARM)
11690 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11693 /* If we can't use ldmib (SA110 bug),
11694 then try to pop r3 instead. */
11696 live_regs_mask |= 1 << 3;
11697 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11701 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11703 p = instr + strlen (instr);
11705 for (reg = 0; reg <= SP_REGNUM; reg++)
11706 if (live_regs_mask & (1 << reg))
11708 int l = strlen (reg_names[reg]);
11714 memcpy (p, ", ", 2);
11718 memcpy (p, "%|", 2);
11719 memcpy (p + 2, reg_names[reg], l);
11723 if (live_regs_mask & (1 << LR_REGNUM))
11725 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11726 /* If returning from an interrupt, restore the CPSR. */
11727 if (IS_INTERRUPT (func_type))
11734 output_asm_insn (instr, & operand);
11736 /* See if we need to generate an extra instruction to
11737 perform the actual function return. */
11739 && func_type != ARM_FT_INTERWORKED
11740 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11742 /* The return has already been handled
11743 by loading the LR into the PC. */
11750 switch ((int) ARM_FUNC_TYPE (func_type))
11754 /* ??? This is wrong for unified assembly syntax. */
11755 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11758 case ARM_FT_INTERWORKED:
11759 sprintf (instr, "bx%s\t%%|lr", conditional);
11762 case ARM_FT_EXCEPTION:
11763 /* ??? This is wrong for unified assembly syntax. */
11764 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11768 /* Use bx if it's available. */
11769 if (arm_arch5 || arm_arch4t)
11770 sprintf (instr, "bx%s\t%%|lr", conditional);
11772 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11776 output_asm_insn (instr, & operand);
11782 /* Write the function name into the code section, directly preceding
11783 the function prologue.
11785 Code will be output similar to this:
11787 .ascii "arm_poke_function_name", 0
11790 .word 0xff000000 + (t1 - t0)
11791 arm_poke_function_name
11793 stmfd sp!, {fp, ip, lr, pc}
11796 When performing a stack backtrace, code can inspect the value
11797 of 'pc' stored at 'fp' + 0. If the trace function then looks
11798 at location pc - 12 and the top 8 bits are set, then we know
11799 that there is a function name embedded immediately preceding this
11800 location and has length ((pc[-3]) & 0xff000000).
11802 We assume that pc is declared as a pointer to an unsigned long.
11804 It is of no benefit to output the function name if we are assembling
11805 a leaf function. These function types will not contain a stack
11806 backtrace structure, therefore it is not possible to determine the
11809 arm_poke_function_name (FILE *stream, const char *name)
11811 unsigned long alignlength;
11812 unsigned long length;
11815 length = strlen (name) + 1;
11816 alignlength = ROUND_UP_WORD (length);
11818 ASM_OUTPUT_ASCII (stream, name, length);
11819 ASM_OUTPUT_ALIGN (stream, 2);
11820 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11821 assemble_aligned_integer (UNITS_PER_WORD, x);
11824 /* Place some comments into the assembler stream
11825 describing the current function. */
11827 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11829 unsigned long func_type;
11833 thumb1_output_function_prologue (f, frame_size);
11837 /* Sanity check. */
11838 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11840 func_type = arm_current_func_type ();
11842 switch ((int) ARM_FUNC_TYPE (func_type))
11845 case ARM_FT_NORMAL:
11847 case ARM_FT_INTERWORKED:
11848 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11851 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11854 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11856 case ARM_FT_EXCEPTION:
11857 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11861 if (IS_NAKED (func_type))
11862 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11864 if (IS_VOLATILE (func_type))
11865 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11867 if (IS_NESTED (func_type))
11868 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11869 if (IS_STACKALIGN (func_type))
11870 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11872 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11874 crtl->args.pretend_args_size, frame_size);
11876 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11877 frame_pointer_needed,
11878 cfun->machine->uses_anonymous_args);
11880 if (cfun->machine->lr_save_eliminated)
11881 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11883 if (crtl->calls_eh_return)
11884 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11886 return_used_this_function = 0;
11890 arm_output_epilogue (rtx sibling)
11893 unsigned long saved_regs_mask;
11894 unsigned long func_type;
11895 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11896 frame that is $fp + 4 for a non-variadic function. */
11897 int floats_offset = 0;
11899 FILE * f = asm_out_file;
11900 unsigned int lrm_count = 0;
11901 int really_return = (sibling == NULL);
11903 arm_stack_offsets *offsets;
11905 /* If we have already generated the return instruction
11906 then it is futile to generate anything else. */
11907 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11910 func_type = arm_current_func_type ();
11912 if (IS_NAKED (func_type))
11913 /* Naked functions don't have epilogues. */
11916 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11920 /* A volatile function should never return. Call abort. */
11921 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11922 assemble_external_libcall (op);
11923 output_asm_insn ("bl\t%a0", &op);
11928 /* If we are throwing an exception, then we really must be doing a
11929 return, so we can't tail-call. */
11930 gcc_assert (!crtl->calls_eh_return || really_return);
11932 offsets = arm_get_frame_offsets ();
11933 saved_regs_mask = offsets->saved_regs_mask;
11936 lrm_count = bit_count (saved_regs_mask);
11938 floats_offset = offsets->saved_args;
11939 /* Compute how far away the floats will be. */
11940 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11941 if (saved_regs_mask & (1 << reg))
11942 floats_offset += 4;
11944 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11946 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11947 int vfp_offset = offsets->frame;
11949 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11951 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11952 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11954 floats_offset += 12;
11955 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11956 reg, FP_REGNUM, floats_offset - vfp_offset);
11961 start_reg = LAST_FPA_REGNUM;
11963 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11965 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11967 floats_offset += 12;
11969 /* We can't unstack more than four registers at once. */
11970 if (start_reg - reg == 3)
11972 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11973 reg, FP_REGNUM, floats_offset - vfp_offset);
11974 start_reg = reg - 1;
11979 if (reg != start_reg)
11980 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11981 reg + 1, start_reg - reg,
11982 FP_REGNUM, floats_offset - vfp_offset);
11983 start_reg = reg - 1;
11987 /* Just in case the last register checked also needs unstacking. */
11988 if (reg != start_reg)
11989 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11990 reg + 1, start_reg - reg,
11991 FP_REGNUM, floats_offset - vfp_offset);
11994 if (TARGET_HARD_FLOAT && TARGET_VFP)
11998 /* The fldmd insns do not have base+offset addressing
11999 modes, so we use IP to hold the address. */
12000 saved_size = arm_get_vfp_saved_size ();
12002 if (saved_size > 0)
12004 floats_offset += saved_size;
12005 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12006 FP_REGNUM, floats_offset - vfp_offset);
12008 start_reg = FIRST_VFP_REGNUM;
12009 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12011 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12012 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12014 if (start_reg != reg)
12015 vfp_output_fldmd (f, IP_REGNUM,
12016 (start_reg - FIRST_VFP_REGNUM) / 2,
12017 (reg - start_reg) / 2);
12018 start_reg = reg + 2;
12021 if (start_reg != reg)
12022 vfp_output_fldmd (f, IP_REGNUM,
12023 (start_reg - FIRST_VFP_REGNUM) / 2,
12024 (reg - start_reg) / 2);
12029 /* The frame pointer is guaranteed to be non-double-word aligned.
12030 This is because it is set to (old_stack_pointer - 4) and the
12031 old_stack_pointer was double word aligned. Thus the offset to
12032 the iWMMXt registers to be loaded must also be non-double-word
12033 sized, so that the resultant address *is* double-word aligned.
12034 We can ignore floats_offset since that was already included in
12035 the live_regs_mask. */
12036 lrm_count += (lrm_count % 2 ? 2 : 1);
12038 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12039 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12041 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12042 reg, FP_REGNUM, lrm_count * 4);
12047 /* saved_regs_mask should contain the IP, which at the time of stack
12048 frame generation actually contains the old stack pointer. So a
12049 quick way to unwind the stack is just pop the IP register directly
12050 into the stack pointer. */
12051 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12052 saved_regs_mask &= ~ (1 << IP_REGNUM);
12053 saved_regs_mask |= (1 << SP_REGNUM);
12055 /* There are two registers left in saved_regs_mask - LR and PC. We
12056 only need to restore the LR register (the return address), but to
12057 save time we can load it directly into the PC, unless we need a
12058 special function exit sequence, or we are not really returning. */
12060 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12061 && !crtl->calls_eh_return)
12062 /* Delete the LR from the register mask, so that the LR on
12063 the stack is loaded into the PC in the register mask. */
12064 saved_regs_mask &= ~ (1 << LR_REGNUM);
12066 saved_regs_mask &= ~ (1 << PC_REGNUM);
12068 /* We must use SP as the base register, because SP is one of the
12069 registers being restored. If an interrupt or page fault
12070 happens in the ldm instruction, the SP might or might not
12071 have been restored. That would be bad, as then SP will no
12072 longer indicate the safe area of stack, and we can get stack
12073 corruption. Using SP as the base register means that it will
12074 be reset correctly to the original value, should an interrupt
12075 occur. If the stack pointer already points at the right
12076 place, then omit the subtraction. */
12077 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12078 || cfun->calls_alloca)
12079 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12080 4 * bit_count (saved_regs_mask));
12081 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12083 if (IS_INTERRUPT (func_type))
12084 /* Interrupt handlers will have pushed the
12085 IP onto the stack, so restore it now. */
12086 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12090 /* This branch is executed for ARM mode (non-apcs frames) and
12091 Thumb-2 mode. Frame layout is essentially the same for those
12092 cases, except that in ARM mode frame pointer points to the
12093 first saved register, while in Thumb-2 mode the frame pointer points
12094 to the last saved register.
12096 It is possible to make frame pointer point to last saved
12097 register in both cases, and remove some conditionals below.
12098 That means that fp setup in prologue would be just "mov fp, sp"
12099 and sp restore in epilogue would be just "mov sp, fp", whereas
12100 now we have to use add/sub in those cases. However, the value
12101 of that would be marginal, as both mov and add/sub are 32-bit
12102 in ARM mode, and it would require extra conditionals
12103 in arm_expand_prologue to distingish ARM-apcs-frame case
12104 (where frame pointer is required to point at first register)
12105 and ARM-non-apcs-frame. Therefore, such change is postponed
12106 until real need arise. */
12107 unsigned HOST_WIDE_INT amount;
12109 /* Restore stack pointer if necessary. */
12110 if (TARGET_ARM && frame_pointer_needed)
12112 operands[0] = stack_pointer_rtx;
12113 operands[1] = hard_frame_pointer_rtx;
12115 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12116 output_add_immediate (operands);
12120 if (frame_pointer_needed)
12122 /* For Thumb-2 restore sp from the frame pointer.
12123 Operand restrictions mean we have to incrememnt FP, then copy
12125 amount = offsets->locals_base - offsets->saved_regs;
12126 operands[0] = hard_frame_pointer_rtx;
12130 unsigned long count;
12131 operands[0] = stack_pointer_rtx;
12132 amount = offsets->outgoing_args - offsets->saved_regs;
12133 /* pop call clobbered registers if it avoids a
12134 separate stack adjustment. */
12135 count = offsets->saved_regs - offsets->saved_args;
12138 && !crtl->calls_eh_return
12139 && bit_count(saved_regs_mask) * 4 == count
12140 && !IS_INTERRUPT (func_type)
12141 && !crtl->tail_call_emit)
12143 unsigned long mask;
12144 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12146 mask &= ~saved_regs_mask;
12148 while (bit_count (mask) * 4 > amount)
12150 while ((mask & (1 << reg)) == 0)
12152 mask &= ~(1 << reg);
12154 if (bit_count (mask) * 4 == amount) {
12156 saved_regs_mask |= mask;
12163 operands[1] = operands[0];
12164 operands[2] = GEN_INT (amount);
12165 output_add_immediate (operands);
12167 if (frame_pointer_needed)
12168 asm_fprintf (f, "\tmov\t%r, %r\n",
12169 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12172 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12174 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12175 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12176 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12181 start_reg = FIRST_FPA_REGNUM;
12183 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12185 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12187 if (reg - start_reg == 3)
12189 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12190 start_reg, SP_REGNUM);
12191 start_reg = reg + 1;
12196 if (reg != start_reg)
12197 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12198 start_reg, reg - start_reg,
12201 start_reg = reg + 1;
12205 /* Just in case the last register checked also needs unstacking. */
12206 if (reg != start_reg)
12207 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12208 start_reg, reg - start_reg, SP_REGNUM);
12211 if (TARGET_HARD_FLOAT && TARGET_VFP)
12213 start_reg = FIRST_VFP_REGNUM;
12214 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12216 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12217 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12219 if (start_reg != reg)
12220 vfp_output_fldmd (f, SP_REGNUM,
12221 (start_reg - FIRST_VFP_REGNUM) / 2,
12222 (reg - start_reg) / 2);
12223 start_reg = reg + 2;
12226 if (start_reg != reg)
12227 vfp_output_fldmd (f, SP_REGNUM,
12228 (start_reg - FIRST_VFP_REGNUM) / 2,
12229 (reg - start_reg) / 2);
12232 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12233 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12234 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12236 /* If we can, restore the LR into the PC. */
12237 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12238 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12239 && !IS_STACKALIGN (func_type)
12241 && crtl->args.pretend_args_size == 0
12242 && saved_regs_mask & (1 << LR_REGNUM)
12243 && !crtl->calls_eh_return)
12245 saved_regs_mask &= ~ (1 << LR_REGNUM);
12246 saved_regs_mask |= (1 << PC_REGNUM);
12247 rfe = IS_INTERRUPT (func_type);
12252 /* Load the registers off the stack. If we only have one register
12253 to load use the LDR instruction - it is faster. For Thumb-2
12254 always use pop and the assembler will pick the best instruction.*/
12255 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12256 && !IS_INTERRUPT(func_type))
12258 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12260 else if (saved_regs_mask)
12262 if (saved_regs_mask & (1 << SP_REGNUM))
12263 /* Note - write back to the stack register is not enabled
12264 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12265 in the list of registers and if we add writeback the
12266 instruction becomes UNPREDICTABLE. */
12267 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12269 else if (TARGET_ARM)
12270 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12273 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12276 if (crtl->args.pretend_args_size)
12278 /* Unwind the pre-pushed regs. */
12279 operands[0] = operands[1] = stack_pointer_rtx;
12280 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12281 output_add_immediate (operands);
12285 /* We may have already restored PC directly from the stack. */
12286 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12289 /* Stack adjustment for exception handler. */
12290 if (crtl->calls_eh_return)
12291 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12292 ARM_EH_STACKADJ_REGNUM);
12294 /* Generate the return instruction. */
12295 switch ((int) ARM_FUNC_TYPE (func_type))
12299 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12302 case ARM_FT_EXCEPTION:
12303 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12306 case ARM_FT_INTERWORKED:
12307 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12311 if (IS_STACKALIGN (func_type))
12313 /* See comment in arm_expand_prologue. */
12314 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12316 if (arm_arch5 || arm_arch4t)
12317 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12319 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12327 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12328 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12330 arm_stack_offsets *offsets;
12336 /* Emit any call-via-reg trampolines that are needed for v4t support
12337 of call_reg and call_value_reg type insns. */
12338 for (regno = 0; regno < LR_REGNUM; regno++)
12340 rtx label = cfun->machine->call_via[regno];
12344 switch_to_section (function_section (current_function_decl));
12345 targetm.asm_out.internal_label (asm_out_file, "L",
12346 CODE_LABEL_NUMBER (label));
12347 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12351 /* ??? Probably not safe to set this here, since it assumes that a
12352 function will be emitted as assembly immediately after we generate
12353 RTL for it. This does not happen for inline functions. */
12354 return_used_this_function = 0;
12356 else /* TARGET_32BIT */
12358 /* We need to take into account any stack-frame rounding. */
12359 offsets = arm_get_frame_offsets ();
12361 gcc_assert (!use_return_insn (FALSE, NULL)
12362 || !return_used_this_function
12363 || offsets->saved_regs == offsets->outgoing_args
12364 || frame_pointer_needed);
12366 /* Reset the ARM-specific per-function variables. */
12367 after_arm_reorg = 0;
12371 /* Generate and emit an insn that we will recognize as a push_multi.
12372 Unfortunately, since this insn does not reflect very well the actual
12373 semantics of the operation, we need to annotate the insn for the benefit
12374 of DWARF2 frame unwind information. */
12376 emit_multi_reg_push (unsigned long mask)
12379 int num_dwarf_regs;
12383 int dwarf_par_index;
12386 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12387 if (mask & (1 << i))
12390 gcc_assert (num_regs && num_regs <= 16);
12392 /* We don't record the PC in the dwarf frame information. */
12393 num_dwarf_regs = num_regs;
12394 if (mask & (1 << PC_REGNUM))
12397 /* For the body of the insn we are going to generate an UNSPEC in
12398 parallel with several USEs. This allows the insn to be recognized
12399 by the push_multi pattern in the arm.md file. The insn looks
12400 something like this:
12403 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12404 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12405 (use (reg:SI 11 fp))
12406 (use (reg:SI 12 ip))
12407 (use (reg:SI 14 lr))
12408 (use (reg:SI 15 pc))
12411 For the frame note however, we try to be more explicit and actually
12412 show each register being stored into the stack frame, plus a (single)
12413 decrement of the stack pointer. We do it this way in order to be
12414 friendly to the stack unwinding code, which only wants to see a single
12415 stack decrement per instruction. The RTL we generate for the note looks
12416 something like this:
12419 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12420 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12421 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12422 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12423 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12426 This sequence is used both by the code to support stack unwinding for
12427 exceptions handlers and the code to generate dwarf2 frame debugging. */
12429 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12430 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12431 dwarf_par_index = 1;
12433 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12435 if (mask & (1 << i))
12437 reg = gen_rtx_REG (SImode, i);
12439 XVECEXP (par, 0, 0)
12440 = gen_rtx_SET (VOIDmode,
12441 gen_frame_mem (BLKmode,
12442 gen_rtx_PRE_DEC (BLKmode,
12443 stack_pointer_rtx)),
12444 gen_rtx_UNSPEC (BLKmode,
12445 gen_rtvec (1, reg),
12446 UNSPEC_PUSH_MULT));
12448 if (i != PC_REGNUM)
12450 tmp = gen_rtx_SET (VOIDmode,
12451 gen_frame_mem (SImode, stack_pointer_rtx),
12453 RTX_FRAME_RELATED_P (tmp) = 1;
12454 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12462 for (j = 1, i++; j < num_regs; i++)
12464 if (mask & (1 << i))
12466 reg = gen_rtx_REG (SImode, i);
12468 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12470 if (i != PC_REGNUM)
12473 = gen_rtx_SET (VOIDmode,
12474 gen_frame_mem (SImode,
12475 plus_constant (stack_pointer_rtx,
12478 RTX_FRAME_RELATED_P (tmp) = 1;
12479 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12486 par = emit_insn (par);
12488 tmp = gen_rtx_SET (VOIDmode,
12490 plus_constant (stack_pointer_rtx, -4 * num_regs));
12491 RTX_FRAME_RELATED_P (tmp) = 1;
12492 XVECEXP (dwarf, 0, 0) = tmp;
12494 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12499 /* Calculate the size of the return value that is passed in registers. */
12501 arm_size_return_regs (void)
12503 enum machine_mode mode;
12505 if (crtl->return_rtx != 0)
12506 mode = GET_MODE (crtl->return_rtx);
12508 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12510 return GET_MODE_SIZE (mode);
12514 emit_sfm (int base_reg, int count)
12521 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12522 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12524 reg = gen_rtx_REG (XFmode, base_reg++);
12526 XVECEXP (par, 0, 0)
12527 = gen_rtx_SET (VOIDmode,
12528 gen_frame_mem (BLKmode,
12529 gen_rtx_PRE_DEC (BLKmode,
12530 stack_pointer_rtx)),
12531 gen_rtx_UNSPEC (BLKmode,
12532 gen_rtvec (1, reg),
12533 UNSPEC_PUSH_MULT));
12534 tmp = gen_rtx_SET (VOIDmode,
12535 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12536 RTX_FRAME_RELATED_P (tmp) = 1;
12537 XVECEXP (dwarf, 0, 1) = tmp;
12539 for (i = 1; i < count; i++)
12541 reg = gen_rtx_REG (XFmode, base_reg++);
12542 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12544 tmp = gen_rtx_SET (VOIDmode,
12545 gen_frame_mem (XFmode,
12546 plus_constant (stack_pointer_rtx,
12549 RTX_FRAME_RELATED_P (tmp) = 1;
12550 XVECEXP (dwarf, 0, i + 1) = tmp;
12553 tmp = gen_rtx_SET (VOIDmode,
12555 plus_constant (stack_pointer_rtx, -12 * count));
12557 RTX_FRAME_RELATED_P (tmp) = 1;
12558 XVECEXP (dwarf, 0, 0) = tmp;
12560 par = emit_insn (par);
12561 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12567 /* Return true if the current function needs to save/restore LR. */
12570 thumb_force_lr_save (void)
12572 return !cfun->machine->lr_save_eliminated
12573 && (!leaf_function_p ()
12574 || thumb_far_jump_used_p ()
12575 || df_regs_ever_live_p (LR_REGNUM));
12579 /* Compute the distance from register FROM to register TO.
12580 These can be the arg pointer (26), the soft frame pointer (25),
12581 the stack pointer (13) or the hard frame pointer (11).
12582 In thumb mode r7 is used as the soft frame pointer, if needed.
12583 Typical stack layout looks like this:
12585 old stack pointer -> | |
12588 | | saved arguments for
12589 | | vararg functions
12592 hard FP & arg pointer -> | | \
12600 soft frame pointer -> | | /
12605 locals base pointer -> | | /
12610 current stack pointer -> | | /
12613 For a given function some or all of these stack components
12614 may not be needed, giving rise to the possibility of
12615 eliminating some of the registers.
12617 The values returned by this function must reflect the behavior
12618 of arm_expand_prologue() and arm_compute_save_reg_mask().
12620 The sign of the number returned reflects the direction of stack
12621 growth, so the values are positive for all eliminations except
12622 from the soft frame pointer to the hard frame pointer.
12624 SFP may point just inside the local variables block to ensure correct
12628 /* Calculate stack offsets. These are used to calculate register elimination
12629 offsets and in prologue/epilogue code. Also calculates which registers
12630 should be saved. */
12632 static arm_stack_offsets *
12633 arm_get_frame_offsets (void)
12635 struct arm_stack_offsets *offsets;
12636 unsigned long func_type;
12640 HOST_WIDE_INT frame_size;
12643 offsets = &cfun->machine->stack_offsets;
12645 /* We need to know if we are a leaf function. Unfortunately, it
12646 is possible to be called after start_sequence has been called,
12647 which causes get_insns to return the insns for the sequence,
12648 not the function, which will cause leaf_function_p to return
12649 the incorrect result.
12651 to know about leaf functions once reload has completed, and the
12652 frame size cannot be changed after that time, so we can safely
12653 use the cached value. */
12655 if (reload_completed)
12658 /* Initially this is the size of the local variables. It will translated
12659 into an offset once we have determined the size of preceding data. */
12660 frame_size = ROUND_UP_WORD (get_frame_size ());
12662 leaf = leaf_function_p ();
12664 /* Space for variadic functions. */
12665 offsets->saved_args = crtl->args.pretend_args_size;
12667 /* In Thumb mode this is incorrect, but never used. */
12668 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12669 arm_compute_static_chain_stack_bytes();
12673 unsigned int regno;
12675 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12676 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12677 saved = core_saved;
12679 /* We know that SP will be doubleword aligned on entry, and we must
12680 preserve that condition at any subroutine call. We also require the
12681 soft frame pointer to be doubleword aligned. */
12683 if (TARGET_REALLY_IWMMXT)
12685 /* Check for the call-saved iWMMXt registers. */
12686 for (regno = FIRST_IWMMXT_REGNUM;
12687 regno <= LAST_IWMMXT_REGNUM;
12689 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12693 func_type = arm_current_func_type ();
12694 if (! IS_VOLATILE (func_type))
12696 /* Space for saved FPA registers. */
12697 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12698 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12701 /* Space for saved VFP registers. */
12702 if (TARGET_HARD_FLOAT && TARGET_VFP)
12703 saved += arm_get_vfp_saved_size ();
12706 else /* TARGET_THUMB1 */
12708 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12709 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12710 saved = core_saved;
12711 if (TARGET_BACKTRACE)
12715 /* Saved registers include the stack frame. */
12716 offsets->saved_regs = offsets->saved_args + saved +
12717 arm_compute_static_chain_stack_bytes();
12718 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12719 /* A leaf function does not need any stack alignment if it has nothing
12721 if (leaf && frame_size == 0)
12723 offsets->outgoing_args = offsets->soft_frame;
12724 offsets->locals_base = offsets->soft_frame;
12728 /* Ensure SFP has the correct alignment. */
12729 if (ARM_DOUBLEWORD_ALIGN
12730 && (offsets->soft_frame & 7))
12732 offsets->soft_frame += 4;
12733 /* Try to align stack by pushing an extra reg. Don't bother doing this
12734 when there is a stack frame as the alignment will be rolled into
12735 the normal stack adjustment. */
12736 if (frame_size + crtl->outgoing_args_size == 0)
12740 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12742 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12749 if (reg == -1 && arm_size_return_regs () <= 12
12750 && !crtl->tail_call_emit)
12752 /* Push/pop an argument register (r3) if all callee saved
12753 registers are already being pushed. */
12759 offsets->saved_regs += 4;
12760 offsets->saved_regs_mask |= (1 << reg);
12765 offsets->locals_base = offsets->soft_frame + frame_size;
12766 offsets->outgoing_args = (offsets->locals_base
12767 + crtl->outgoing_args_size);
12769 if (ARM_DOUBLEWORD_ALIGN)
12771 /* Ensure SP remains doubleword aligned. */
12772 if (offsets->outgoing_args & 7)
12773 offsets->outgoing_args += 4;
12774 gcc_assert (!(offsets->outgoing_args & 7));
12781 /* Calculate the relative offsets for the different stack pointers. Positive
12782 offsets are in the direction of stack growth. */
12785 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12787 arm_stack_offsets *offsets;
12789 offsets = arm_get_frame_offsets ();
12791 /* OK, now we have enough information to compute the distances.
12792 There must be an entry in these switch tables for each pair
12793 of registers in ELIMINABLE_REGS, even if some of the entries
12794 seem to be redundant or useless. */
12797 case ARG_POINTER_REGNUM:
12800 case THUMB_HARD_FRAME_POINTER_REGNUM:
12803 case FRAME_POINTER_REGNUM:
12804 /* This is the reverse of the soft frame pointer
12805 to hard frame pointer elimination below. */
12806 return offsets->soft_frame - offsets->saved_args;
12808 case ARM_HARD_FRAME_POINTER_REGNUM:
12809 /* This is only non-zero in the case where the static chain register
12810 is stored above the frame. */
12811 return offsets->frame - offsets->saved_args - 4;
12813 case STACK_POINTER_REGNUM:
12814 /* If nothing has been pushed on the stack at all
12815 then this will return -4. This *is* correct! */
12816 return offsets->outgoing_args - (offsets->saved_args + 4);
12819 gcc_unreachable ();
12821 gcc_unreachable ();
12823 case FRAME_POINTER_REGNUM:
12826 case THUMB_HARD_FRAME_POINTER_REGNUM:
12829 case ARM_HARD_FRAME_POINTER_REGNUM:
12830 /* The hard frame pointer points to the top entry in the
12831 stack frame. The soft frame pointer to the bottom entry
12832 in the stack frame. If there is no stack frame at all,
12833 then they are identical. */
12835 return offsets->frame - offsets->soft_frame;
12837 case STACK_POINTER_REGNUM:
12838 return offsets->outgoing_args - offsets->soft_frame;
12841 gcc_unreachable ();
12843 gcc_unreachable ();
12846 /* You cannot eliminate from the stack pointer.
12847 In theory you could eliminate from the hard frame
12848 pointer to the stack pointer, but this will never
12849 happen, since if a stack frame is not needed the
12850 hard frame pointer will never be used. */
12851 gcc_unreachable ();
12856 /* Emit RTL to save coprocessor registers on function entry. Returns the
12857 number of bytes pushed. */
12860 arm_save_coproc_regs(void)
12862 int saved_size = 0;
12864 unsigned start_reg;
12867 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12868 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12870 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12871 insn = gen_rtx_MEM (V2SImode, insn);
12872 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12873 RTX_FRAME_RELATED_P (insn) = 1;
12877 /* Save any floating point call-saved registers used by this
12879 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12881 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12882 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12884 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12885 insn = gen_rtx_MEM (XFmode, insn);
12886 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12887 RTX_FRAME_RELATED_P (insn) = 1;
12893 start_reg = LAST_FPA_REGNUM;
12895 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12897 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12899 if (start_reg - reg == 3)
12901 insn = emit_sfm (reg, 4);
12902 RTX_FRAME_RELATED_P (insn) = 1;
12904 start_reg = reg - 1;
12909 if (start_reg != reg)
12911 insn = emit_sfm (reg + 1, start_reg - reg);
12912 RTX_FRAME_RELATED_P (insn) = 1;
12913 saved_size += (start_reg - reg) * 12;
12915 start_reg = reg - 1;
12919 if (start_reg != reg)
12921 insn = emit_sfm (reg + 1, start_reg - reg);
12922 saved_size += (start_reg - reg) * 12;
12923 RTX_FRAME_RELATED_P (insn) = 1;
12926 if (TARGET_HARD_FLOAT && TARGET_VFP)
12928 start_reg = FIRST_VFP_REGNUM;
12930 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12932 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12933 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12935 if (start_reg != reg)
12936 saved_size += vfp_emit_fstmd (start_reg,
12937 (reg - start_reg) / 2);
12938 start_reg = reg + 2;
12941 if (start_reg != reg)
12942 saved_size += vfp_emit_fstmd (start_reg,
12943 (reg - start_reg) / 2);
12949 /* Set the Thumb frame pointer from the stack pointer. */
12952 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12954 HOST_WIDE_INT amount;
12957 amount = offsets->outgoing_args - offsets->locals_base;
12959 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12960 stack_pointer_rtx, GEN_INT (amount)));
12963 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12964 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12965 expects the first two operands to be the same. */
12968 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12970 hard_frame_pointer_rtx));
12974 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12975 hard_frame_pointer_rtx,
12976 stack_pointer_rtx));
12978 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12979 plus_constant (stack_pointer_rtx, amount));
12980 RTX_FRAME_RELATED_P (dwarf) = 1;
12981 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12985 RTX_FRAME_RELATED_P (insn) = 1;
12988 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12991 arm_expand_prologue (void)
12996 unsigned long live_regs_mask;
12997 unsigned long func_type;
12999 int saved_pretend_args = 0;
13000 int saved_regs = 0;
13001 unsigned HOST_WIDE_INT args_to_push;
13002 arm_stack_offsets *offsets;
13004 func_type = arm_current_func_type ();
13006 /* Naked functions don't have prologues. */
13007 if (IS_NAKED (func_type))
13010 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13011 args_to_push = crtl->args.pretend_args_size;
13013 /* Compute which register we will have to save onto the stack. */
13014 offsets = arm_get_frame_offsets ();
13015 live_regs_mask = offsets->saved_regs_mask;
13017 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13019 if (IS_STACKALIGN (func_type))
13024 /* Handle a word-aligned stack pointer. We generate the following:
13029 <save and restore r0 in normal prologue/epilogue>
13033 The unwinder doesn't need to know about the stack realignment.
13034 Just tell it we saved SP in r0. */
13035 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13037 r0 = gen_rtx_REG (SImode, 0);
13038 r1 = gen_rtx_REG (SImode, 1);
13039 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13040 compiler won't choke. */
13041 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13042 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13043 insn = gen_movsi (r0, stack_pointer_rtx);
13044 RTX_FRAME_RELATED_P (insn) = 1;
13045 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
13046 dwarf, REG_NOTES (insn));
13048 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13049 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13052 /* For APCS frames, if IP register is clobbered
13053 when creating frame, save that register in a special
13055 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13057 if (IS_INTERRUPT (func_type))
13059 /* Interrupt functions must not corrupt any registers.
13060 Creating a frame pointer however, corrupts the IP
13061 register, so we must push it first. */
13062 insn = emit_multi_reg_push (1 << IP_REGNUM);
13064 /* Do not set RTX_FRAME_RELATED_P on this insn.
13065 The dwarf stack unwinding code only wants to see one
13066 stack decrement per function, and this is not it. If
13067 this instruction is labeled as being part of the frame
13068 creation sequence then dwarf2out_frame_debug_expr will
13069 die when it encounters the assignment of IP to FP
13070 later on, since the use of SP here establishes SP as
13071 the CFA register and not IP.
13073 Anyway this instruction is not really part of the stack
13074 frame creation although it is part of the prologue. */
13076 else if (IS_NESTED (func_type))
13078 /* The Static chain register is the same as the IP register
13079 used as a scratch register during stack frame creation.
13080 To get around this need to find somewhere to store IP
13081 whilst the frame is being created. We try the following
13084 1. The last argument register.
13085 2. A slot on the stack above the frame. (This only
13086 works if the function is not a varargs function).
13087 3. Register r3, after pushing the argument registers
13090 Note - we only need to tell the dwarf2 backend about the SP
13091 adjustment in the second variant; the static chain register
13092 doesn't need to be unwound, as it doesn't contain a value
13093 inherited from the caller. */
13095 if (df_regs_ever_live_p (3) == false)
13096 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13097 else if (args_to_push == 0)
13101 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13104 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13105 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13108 /* Just tell the dwarf backend that we adjusted SP. */
13109 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13110 plus_constant (stack_pointer_rtx,
13112 RTX_FRAME_RELATED_P (insn) = 1;
13113 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
13114 dwarf, REG_NOTES (insn));
13118 /* Store the args on the stack. */
13119 if (cfun->machine->uses_anonymous_args)
13120 insn = emit_multi_reg_push
13121 ((0xf0 >> (args_to_push / 4)) & 0xf);
13124 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13125 GEN_INT (- args_to_push)));
13127 RTX_FRAME_RELATED_P (insn) = 1;
13129 saved_pretend_args = 1;
13130 fp_offset = args_to_push;
13133 /* Now reuse r3 to preserve IP. */
13134 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13138 insn = emit_set_insn (ip_rtx,
13139 plus_constant (stack_pointer_rtx, fp_offset));
13140 RTX_FRAME_RELATED_P (insn) = 1;
13145 /* Push the argument registers, or reserve space for them. */
13146 if (cfun->machine->uses_anonymous_args)
13147 insn = emit_multi_reg_push
13148 ((0xf0 >> (args_to_push / 4)) & 0xf);
13151 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13152 GEN_INT (- args_to_push)));
13153 RTX_FRAME_RELATED_P (insn) = 1;
13156 /* If this is an interrupt service routine, and the link register
13157 is going to be pushed, and we're not generating extra
13158 push of IP (needed when frame is needed and frame layout if apcs),
13159 subtracting four from LR now will mean that the function return
13160 can be done with a single instruction. */
13161 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13162 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13163 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13166 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13168 emit_set_insn (lr, plus_constant (lr, -4));
13171 if (live_regs_mask)
13173 saved_regs += bit_count (live_regs_mask) * 4;
13174 if (optimize_size && !frame_pointer_needed
13175 && saved_regs == offsets->saved_regs - offsets->saved_args)
13177 /* If no coprocessor registers are being pushed and we don't have
13178 to worry about a frame pointer then push extra registers to
13179 create the stack frame. This is done is a way that does not
13180 alter the frame layout, so is independent of the epilogue. */
13184 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13186 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13187 if (frame && n * 4 >= frame)
13190 live_regs_mask |= (1 << n) - 1;
13191 saved_regs += frame;
13194 insn = emit_multi_reg_push (live_regs_mask);
13195 RTX_FRAME_RELATED_P (insn) = 1;
13198 if (! IS_VOLATILE (func_type))
13199 saved_regs += arm_save_coproc_regs ();
13201 if (frame_pointer_needed && TARGET_ARM)
13203 /* Create the new frame pointer. */
13204 if (TARGET_APCS_FRAME)
13206 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13207 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13208 RTX_FRAME_RELATED_P (insn) = 1;
13210 if (IS_NESTED (func_type))
13212 /* Recover the static chain register. */
13213 if (!df_regs_ever_live_p (3)
13214 || saved_pretend_args)
13215 insn = gen_rtx_REG (SImode, 3);
13216 else /* if (crtl->args.pretend_args_size == 0) */
13218 insn = plus_constant (hard_frame_pointer_rtx, 4);
13219 insn = gen_frame_mem (SImode, insn);
13221 emit_set_insn (ip_rtx, insn);
13222 /* Add a USE to stop propagate_one_insn() from barfing. */
13223 emit_insn (gen_prologue_use (ip_rtx));
13228 insn = GEN_INT (saved_regs - 4);
13229 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13230 stack_pointer_rtx, insn));
13231 RTX_FRAME_RELATED_P (insn) = 1;
13235 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13237 /* This add can produce multiple insns for a large constant, so we
13238 need to get tricky. */
13239 rtx last = get_last_insn ();
13241 amount = GEN_INT (offsets->saved_args + saved_regs
13242 - offsets->outgoing_args);
13244 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13248 last = last ? NEXT_INSN (last) : get_insns ();
13249 RTX_FRAME_RELATED_P (last) = 1;
13251 while (last != insn);
13253 /* If the frame pointer is needed, emit a special barrier that
13254 will prevent the scheduler from moving stores to the frame
13255 before the stack adjustment. */
13256 if (frame_pointer_needed)
13257 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13258 hard_frame_pointer_rtx));
13262 if (frame_pointer_needed && TARGET_THUMB2)
13263 thumb_set_frame_pointer (offsets);
13265 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13267 unsigned long mask;
13269 mask = live_regs_mask;
13270 mask &= THUMB2_WORK_REGS;
13271 if (!IS_NESTED (func_type))
13272 mask |= (1 << IP_REGNUM);
13273 arm_load_pic_register (mask);
13276 /* If we are profiling, make sure no instructions are scheduled before
13277 the call to mcount. Similarly if the user has requested no
13278 scheduling in the prolog. Similarly if we want non-call exceptions
13279 using the EABI unwinder, to prevent faulting instructions from being
13280 swapped with a stack adjustment. */
13281 if (crtl->profile || !TARGET_SCHED_PROLOG
13282 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13283 emit_insn (gen_blockage ());
13285 /* If the link register is being kept alive, with the return address in it,
13286 then make sure that it does not get reused by the ce2 pass. */
13287 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13288 cfun->machine->lr_save_eliminated = 1;
13291 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13293 arm_print_condition (FILE *stream)
13295 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13297 /* Branch conversion is not implemented for Thumb-2. */
13300 output_operand_lossage ("predicated Thumb instruction");
13303 if (current_insn_predicate != NULL)
13305 output_operand_lossage
13306 ("predicated instruction in conditional sequence");
13310 fputs (arm_condition_codes[arm_current_cc], stream);
13312 else if (current_insn_predicate)
13314 enum arm_cond_code code;
13318 output_operand_lossage ("predicated Thumb instruction");
13322 code = get_arm_condition_code (current_insn_predicate);
13323 fputs (arm_condition_codes[code], stream);
13328 /* If CODE is 'd', then the X is a condition operand and the instruction
13329 should only be executed if the condition is true.
13330 if CODE is 'D', then the X is a condition operand and the instruction
13331 should only be executed if the condition is false: however, if the mode
13332 of the comparison is CCFPEmode, then always execute the instruction -- we
13333 do this because in these circumstances !GE does not necessarily imply LT;
13334 in these cases the instruction pattern will take care to make sure that
13335 an instruction containing %d will follow, thereby undoing the effects of
13336 doing this instruction unconditionally.
13337 If CODE is 'N' then X is a floating point operand that must be negated
13339 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13340 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13342 arm_print_operand (FILE *stream, rtx x, int code)
13347 fputs (ASM_COMMENT_START, stream);
13351 fputs (user_label_prefix, stream);
13355 fputs (REGISTER_PREFIX, stream);
13359 arm_print_condition (stream);
13363 /* Nothing in unified syntax, otherwise the current condition code. */
13364 if (!TARGET_UNIFIED_ASM)
13365 arm_print_condition (stream);
13369 /* The current condition code in unified syntax, otherwise nothing. */
13370 if (TARGET_UNIFIED_ASM)
13371 arm_print_condition (stream);
13375 /* The current condition code for a condition code setting instruction.
13376 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13377 if (TARGET_UNIFIED_ASM)
13379 fputc('s', stream);
13380 arm_print_condition (stream);
13384 arm_print_condition (stream);
13385 fputc('s', stream);
13390 /* If the instruction is conditionally executed then print
13391 the current condition code, otherwise print 's'. */
13392 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13393 if (current_insn_predicate)
13394 arm_print_condition (stream);
13396 fputc('s', stream);
13399 /* %# is a "break" sequence. It doesn't output anything, but is used to
13400 separate e.g. operand numbers from following text, if that text consists
13401 of further digits which we don't want to be part of the operand
13409 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13410 r = REAL_VALUE_NEGATE (r);
13411 fprintf (stream, "%s", fp_const_from_val (&r));
13415 /* An integer or symbol address without a preceding # sign. */
13417 switch (GET_CODE (x))
13420 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13424 output_addr_const (stream, x);
13428 gcc_unreachable ();
13433 if (GET_CODE (x) == CONST_INT)
13436 val = ARM_SIGN_EXTEND (~INTVAL (x));
13437 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13441 putc ('~', stream);
13442 output_addr_const (stream, x);
13447 /* The low 16 bits of an immediate constant. */
13448 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13452 fprintf (stream, "%s", arithmetic_instr (x, 1));
13455 /* Truncate Cirrus shift counts. */
13457 if (GET_CODE (x) == CONST_INT)
13459 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13462 arm_print_operand (stream, x, 0);
13466 fprintf (stream, "%s", arithmetic_instr (x, 0));
13474 if (!shift_operator (x, SImode))
13476 output_operand_lossage ("invalid shift operand");
13480 shift = shift_op (x, &val);
13484 fprintf (stream, ", %s ", shift);
13486 arm_print_operand (stream, XEXP (x, 1), 0);
13488 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13493 /* An explanation of the 'Q', 'R' and 'H' register operands:
13495 In a pair of registers containing a DI or DF value the 'Q'
13496 operand returns the register number of the register containing
13497 the least significant part of the value. The 'R' operand returns
13498 the register number of the register containing the most
13499 significant part of the value.
13501 The 'H' operand returns the higher of the two register numbers.
13502 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13503 same as the 'Q' operand, since the most significant part of the
13504 value is held in the lower number register. The reverse is true
13505 on systems where WORDS_BIG_ENDIAN is false.
13507 The purpose of these operands is to distinguish between cases
13508 where the endian-ness of the values is important (for example
13509 when they are added together), and cases where the endian-ness
13510 is irrelevant, but the order of register operations is important.
13511 For example when loading a value from memory into a register
13512 pair, the endian-ness does not matter. Provided that the value
13513 from the lower memory address is put into the lower numbered
13514 register, and the value from the higher address is put into the
13515 higher numbered register, the load will work regardless of whether
13516 the value being loaded is big-wordian or little-wordian. The
13517 order of the two register loads can matter however, if the address
13518 of the memory location is actually held in one of the registers
13519 being overwritten by the load. */
13521 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13523 output_operand_lossage ("invalid operand for code '%c'", code);
13527 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13531 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13533 output_operand_lossage ("invalid operand for code '%c'", code);
13537 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13541 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13543 output_operand_lossage ("invalid operand for code '%c'", code);
13547 asm_fprintf (stream, "%r", REGNO (x) + 1);
13551 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13553 output_operand_lossage ("invalid operand for code '%c'", code);
13557 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13561 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13563 output_operand_lossage ("invalid operand for code '%c'", code);
13567 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13571 asm_fprintf (stream, "%r",
13572 GET_CODE (XEXP (x, 0)) == REG
13573 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13577 asm_fprintf (stream, "{%r-%r}",
13579 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13582 /* Like 'M', but writing doubleword vector registers, for use by Neon
13586 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13587 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13589 asm_fprintf (stream, "{d%d}", regno);
13591 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13596 /* CONST_TRUE_RTX means always -- that's the default. */
13597 if (x == const_true_rtx)
13600 if (!COMPARISON_P (x))
13602 output_operand_lossage ("invalid operand for code '%c'", code);
13606 fputs (arm_condition_codes[get_arm_condition_code (x)],
13611 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13612 want to do that. */
13613 if (x == const_true_rtx)
13615 output_operand_lossage ("instruction never executed");
13618 if (!COMPARISON_P (x))
13620 output_operand_lossage ("invalid operand for code '%c'", code);
13624 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13625 (get_arm_condition_code (x))],
13629 /* Cirrus registers can be accessed in a variety of ways:
13630 single floating point (f)
13631 double floating point (d)
13633 64bit integer (dx). */
13634 case 'W': /* Cirrus register in F mode. */
13635 case 'X': /* Cirrus register in D mode. */
13636 case 'Y': /* Cirrus register in FX mode. */
13637 case 'Z': /* Cirrus register in DX mode. */
13638 gcc_assert (GET_CODE (x) == REG
13639 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13641 fprintf (stream, "mv%s%s",
13643 : code == 'X' ? "d"
13644 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13648 /* Print cirrus register in the mode specified by the register's mode. */
13651 int mode = GET_MODE (x);
13653 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13655 output_operand_lossage ("invalid operand for code '%c'", code);
13659 fprintf (stream, "mv%s%s",
13660 mode == DFmode ? "d"
13661 : mode == SImode ? "fx"
13662 : mode == DImode ? "dx"
13663 : "f", reg_names[REGNO (x)] + 2);
13669 if (GET_CODE (x) != REG
13670 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13671 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13672 /* Bad value for wCG register number. */
13674 output_operand_lossage ("invalid operand for code '%c'", code);
13679 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13682 /* Print an iWMMXt control register name. */
13684 if (GET_CODE (x) != CONST_INT
13686 || INTVAL (x) >= 16)
13687 /* Bad value for wC register number. */
13689 output_operand_lossage ("invalid operand for code '%c'", code);
13695 static const char * wc_reg_names [16] =
13697 "wCID", "wCon", "wCSSF", "wCASF",
13698 "wC4", "wC5", "wC6", "wC7",
13699 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13700 "wC12", "wC13", "wC14", "wC15"
13703 fprintf (stream, wc_reg_names [INTVAL (x)]);
13707 /* Print a VFP/Neon double precision or quad precision register name. */
13711 int mode = GET_MODE (x);
13712 int is_quad = (code == 'q');
13715 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13717 output_operand_lossage ("invalid operand for code '%c'", code);
13721 if (GET_CODE (x) != REG
13722 || !IS_VFP_REGNUM (REGNO (x)))
13724 output_operand_lossage ("invalid operand for code '%c'", code);
13729 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13730 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13732 output_operand_lossage ("invalid operand for code '%c'", code);
13736 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13737 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13741 /* These two codes print the low/high doubleword register of a Neon quad
13742 register, respectively. For pair-structure types, can also print
13743 low/high quadword registers. */
13747 int mode = GET_MODE (x);
13750 if ((GET_MODE_SIZE (mode) != 16
13751 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13753 output_operand_lossage ("invalid operand for code '%c'", code);
13758 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13760 output_operand_lossage ("invalid operand for code '%c'", code);
13764 if (GET_MODE_SIZE (mode) == 16)
13765 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13766 + (code == 'f' ? 1 : 0));
13768 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13769 + (code == 'f' ? 1 : 0));
13773 /* Print a VFPv3 floating-point constant, represented as an integer
13777 int index = vfp3_const_double_index (x);
13778 gcc_assert (index != -1);
13779 fprintf (stream, "%d", index);
13783 /* Print bits representing opcode features for Neon.
13785 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13786 and polynomials as unsigned.
13788 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13790 Bit 2 is 1 for rounding functions, 0 otherwise. */
13792 /* Identify the type as 's', 'u', 'p' or 'f'. */
13795 HOST_WIDE_INT bits = INTVAL (x);
13796 fputc ("uspf"[bits & 3], stream);
13800 /* Likewise, but signed and unsigned integers are both 'i'. */
13803 HOST_WIDE_INT bits = INTVAL (x);
13804 fputc ("iipf"[bits & 3], stream);
13808 /* As for 'T', but emit 'u' instead of 'p'. */
13811 HOST_WIDE_INT bits = INTVAL (x);
13812 fputc ("usuf"[bits & 3], stream);
13816 /* Bit 2: rounding (vs none). */
13819 HOST_WIDE_INT bits = INTVAL (x);
13820 fputs ((bits & 4) != 0 ? "r" : "", stream);
13827 output_operand_lossage ("missing operand");
13831 switch (GET_CODE (x))
13834 asm_fprintf (stream, "%r", REGNO (x));
13838 output_memory_reference_mode = GET_MODE (x);
13839 output_address (XEXP (x, 0));
13846 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13847 sizeof (fpstr), 0, 1);
13848 fprintf (stream, "#%s", fpstr);
13851 fprintf (stream, "#%s", fp_immediate_constant (x));
13855 gcc_assert (GET_CODE (x) != NEG);
13856 fputc ('#', stream);
13857 output_addr_const (stream, x);
13863 /* Target hook for assembling integer objects. The ARM version needs to
13864 handle word-sized values specially. */
13866 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13868 enum machine_mode mode;
13870 if (size == UNITS_PER_WORD && aligned_p)
13872 fputs ("\t.word\t", asm_out_file);
13873 output_addr_const (asm_out_file, x);
13875 /* Mark symbols as position independent. We only do this in the
13876 .text segment, not in the .data segment. */
13877 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13878 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13880 /* See legitimize_pic_address for an explanation of the
13881 TARGET_VXWORKS_RTP check. */
13882 if (TARGET_VXWORKS_RTP
13883 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13884 fputs ("(GOT)", asm_out_file);
13886 fputs ("(GOTOFF)", asm_out_file);
13888 fputc ('\n', asm_out_file);
13892 mode = GET_MODE (x);
13894 if (arm_vector_mode_supported_p (mode))
13898 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13900 units = CONST_VECTOR_NUNITS (x);
13901 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13903 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13904 for (i = 0; i < units; i++)
13906 rtx elt = CONST_VECTOR_ELT (x, i);
13908 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13911 for (i = 0; i < units; i++)
13913 rtx elt = CONST_VECTOR_ELT (x, i);
13914 REAL_VALUE_TYPE rval;
13916 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13919 (rval, GET_MODE_INNER (mode),
13920 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13926 return default_assemble_integer (x, size, aligned_p);
13930 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13934 if (!TARGET_AAPCS_BASED)
13937 default_named_section_asm_out_constructor
13938 : default_named_section_asm_out_destructor) (symbol, priority);
13942 /* Put these in the .init_array section, using a special relocation. */
13943 if (priority != DEFAULT_INIT_PRIORITY)
13946 sprintf (buf, "%s.%.5u",
13947 is_ctor ? ".init_array" : ".fini_array",
13949 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13956 switch_to_section (s);
13957 assemble_align (POINTER_SIZE);
13958 fputs ("\t.word\t", asm_out_file);
13959 output_addr_const (asm_out_file, symbol);
13960 fputs ("(target1)\n", asm_out_file);
13963 /* Add a function to the list of static constructors. */
13966 arm_elf_asm_constructor (rtx symbol, int priority)
13968 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13971 /* Add a function to the list of static destructors. */
13974 arm_elf_asm_destructor (rtx symbol, int priority)
13976 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13979 /* A finite state machine takes care of noticing whether or not instructions
13980 can be conditionally executed, and thus decrease execution time and code
13981 size by deleting branch instructions. The fsm is controlled by
13982 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13984 /* The state of the fsm controlling condition codes are:
13985 0: normal, do nothing special
13986 1: make ASM_OUTPUT_OPCODE not output this instruction
13987 2: make ASM_OUTPUT_OPCODE not output this instruction
13988 3: make instructions conditional
13989 4: make instructions conditional
13991 State transitions (state->state by whom under condition):
13992 0 -> 1 final_prescan_insn if the `target' is a label
13993 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13994 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13995 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13996 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13997 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13998 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13999 (the target insn is arm_target_insn).
14001 If the jump clobbers the conditions then we use states 2 and 4.
14003 A similar thing can be done with conditional return insns.
14005 XXX In case the `target' is an unconditional branch, this conditionalising
14006 of the instructions always reduces code size, but not always execution
14007 time. But then, I want to reduce the code size to somewhere near what
14008 /bin/cc produces. */
14010 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14011 instructions. When a COND_EXEC instruction is seen the subsequent
14012 instructions are scanned so that multiple conditional instructions can be
14013 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14014 specify the length and true/false mask for the IT block. These will be
14015 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14017 /* Returns the index of the ARM condition code string in
14018 `arm_condition_codes'. COMPARISON should be an rtx like
14019 `(eq (...) (...))'. */
14020 static enum arm_cond_code
14021 get_arm_condition_code (rtx comparison)
14023 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14025 enum rtx_code comp_code = GET_CODE (comparison);
14027 if (GET_MODE_CLASS (mode) != MODE_CC)
14028 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14029 XEXP (comparison, 1));
14033 case CC_DNEmode: code = ARM_NE; goto dominance;
14034 case CC_DEQmode: code = ARM_EQ; goto dominance;
14035 case CC_DGEmode: code = ARM_GE; goto dominance;
14036 case CC_DGTmode: code = ARM_GT; goto dominance;
14037 case CC_DLEmode: code = ARM_LE; goto dominance;
14038 case CC_DLTmode: code = ARM_LT; goto dominance;
14039 case CC_DGEUmode: code = ARM_CS; goto dominance;
14040 case CC_DGTUmode: code = ARM_HI; goto dominance;
14041 case CC_DLEUmode: code = ARM_LS; goto dominance;
14042 case CC_DLTUmode: code = ARM_CC;
14045 gcc_assert (comp_code == EQ || comp_code == NE);
14047 if (comp_code == EQ)
14048 return ARM_INVERSE_CONDITION_CODE (code);
14054 case NE: return ARM_NE;
14055 case EQ: return ARM_EQ;
14056 case GE: return ARM_PL;
14057 case LT: return ARM_MI;
14058 default: gcc_unreachable ();
14064 case NE: return ARM_NE;
14065 case EQ: return ARM_EQ;
14066 default: gcc_unreachable ();
14072 case NE: return ARM_MI;
14073 case EQ: return ARM_PL;
14074 default: gcc_unreachable ();
14079 /* These encodings assume that AC=1 in the FPA system control
14080 byte. This allows us to handle all cases except UNEQ and
14084 case GE: return ARM_GE;
14085 case GT: return ARM_GT;
14086 case LE: return ARM_LS;
14087 case LT: return ARM_MI;
14088 case NE: return ARM_NE;
14089 case EQ: return ARM_EQ;
14090 case ORDERED: return ARM_VC;
14091 case UNORDERED: return ARM_VS;
14092 case UNLT: return ARM_LT;
14093 case UNLE: return ARM_LE;
14094 case UNGT: return ARM_HI;
14095 case UNGE: return ARM_PL;
14096 /* UNEQ and LTGT do not have a representation. */
14097 case UNEQ: /* Fall through. */
14098 case LTGT: /* Fall through. */
14099 default: gcc_unreachable ();
14105 case NE: return ARM_NE;
14106 case EQ: return ARM_EQ;
14107 case GE: return ARM_LE;
14108 case GT: return ARM_LT;
14109 case LE: return ARM_GE;
14110 case LT: return ARM_GT;
14111 case GEU: return ARM_LS;
14112 case GTU: return ARM_CC;
14113 case LEU: return ARM_CS;
14114 case LTU: return ARM_HI;
14115 default: gcc_unreachable ();
14121 case LTU: return ARM_CS;
14122 case GEU: return ARM_CC;
14123 default: gcc_unreachable ();
14129 case NE: return ARM_NE;
14130 case EQ: return ARM_EQ;
14131 case GE: return ARM_GE;
14132 case GT: return ARM_GT;
14133 case LE: return ARM_LE;
14134 case LT: return ARM_LT;
14135 case GEU: return ARM_CS;
14136 case GTU: return ARM_HI;
14137 case LEU: return ARM_LS;
14138 case LTU: return ARM_CC;
14139 default: gcc_unreachable ();
14142 default: gcc_unreachable ();
14146 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14149 thumb2_final_prescan_insn (rtx insn)
14151 rtx first_insn = insn;
14152 rtx body = PATTERN (insn);
14154 enum arm_cond_code code;
14158 /* Remove the previous insn from the count of insns to be output. */
14159 if (arm_condexec_count)
14160 arm_condexec_count--;
14162 /* Nothing to do if we are already inside a conditional block. */
14163 if (arm_condexec_count)
14166 if (GET_CODE (body) != COND_EXEC)
14169 /* Conditional jumps are implemented directly. */
14170 if (GET_CODE (insn) == JUMP_INSN)
14173 predicate = COND_EXEC_TEST (body);
14174 arm_current_cc = get_arm_condition_code (predicate);
14176 n = get_attr_ce_count (insn);
14177 arm_condexec_count = 1;
14178 arm_condexec_mask = (1 << n) - 1;
14179 arm_condexec_masklen = n;
14180 /* See if subsequent instructions can be combined into the same block. */
14183 insn = next_nonnote_insn (insn);
14185 /* Jumping into the middle of an IT block is illegal, so a label or
14186 barrier terminates the block. */
14187 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14190 body = PATTERN (insn);
14191 /* USE and CLOBBER aren't really insns, so just skip them. */
14192 if (GET_CODE (body) == USE
14193 || GET_CODE (body) == CLOBBER)
14196 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14197 if (GET_CODE (body) != COND_EXEC)
14199 /* Allow up to 4 conditionally executed instructions in a block. */
14200 n = get_attr_ce_count (insn);
14201 if (arm_condexec_masklen + n > 4)
14204 predicate = COND_EXEC_TEST (body);
14205 code = get_arm_condition_code (predicate);
14206 mask = (1 << n) - 1;
14207 if (arm_current_cc == code)
14208 arm_condexec_mask |= (mask << arm_condexec_masklen);
14209 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14212 arm_condexec_count++;
14213 arm_condexec_masklen += n;
14215 /* A jump must be the last instruction in a conditional block. */
14216 if (GET_CODE(insn) == JUMP_INSN)
14219 /* Restore recog_data (getting the attributes of other insns can
14220 destroy this array, but final.c assumes that it remains intact
14221 across this call). */
14222 extract_constrain_insn_cached (first_insn);
14226 arm_final_prescan_insn (rtx insn)
14228 /* BODY will hold the body of INSN. */
14229 rtx body = PATTERN (insn);
14231 /* This will be 1 if trying to repeat the trick, and things need to be
14232 reversed if it appears to fail. */
14235 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14236 taken are clobbered, even if the rtl suggests otherwise. It also
14237 means that we have to grub around within the jump expression to find
14238 out what the conditions are when the jump isn't taken. */
14239 int jump_clobbers = 0;
14241 /* If we start with a return insn, we only succeed if we find another one. */
14242 int seeking_return = 0;
14244 /* START_INSN will hold the insn from where we start looking. This is the
14245 first insn after the following code_label if REVERSE is true. */
14246 rtx start_insn = insn;
14248 /* If in state 4, check if the target branch is reached, in order to
14249 change back to state 0. */
14250 if (arm_ccfsm_state == 4)
14252 if (insn == arm_target_insn)
14254 arm_target_insn = NULL;
14255 arm_ccfsm_state = 0;
14260 /* If in state 3, it is possible to repeat the trick, if this insn is an
14261 unconditional branch to a label, and immediately following this branch
14262 is the previous target label which is only used once, and the label this
14263 branch jumps to is not too far off. */
14264 if (arm_ccfsm_state == 3)
14266 if (simplejump_p (insn))
14268 start_insn = next_nonnote_insn (start_insn);
14269 if (GET_CODE (start_insn) == BARRIER)
14271 /* XXX Isn't this always a barrier? */
14272 start_insn = next_nonnote_insn (start_insn);
14274 if (GET_CODE (start_insn) == CODE_LABEL
14275 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14276 && LABEL_NUSES (start_insn) == 1)
14281 else if (GET_CODE (body) == RETURN)
14283 start_insn = next_nonnote_insn (start_insn);
14284 if (GET_CODE (start_insn) == BARRIER)
14285 start_insn = next_nonnote_insn (start_insn);
14286 if (GET_CODE (start_insn) == CODE_LABEL
14287 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14288 && LABEL_NUSES (start_insn) == 1)
14291 seeking_return = 1;
14300 gcc_assert (!arm_ccfsm_state || reverse);
14301 if (GET_CODE (insn) != JUMP_INSN)
14304 /* This jump might be paralleled with a clobber of the condition codes
14305 the jump should always come first */
14306 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14307 body = XVECEXP (body, 0, 0);
14310 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14311 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14314 int fail = FALSE, succeed = FALSE;
14315 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14316 int then_not_else = TRUE;
14317 rtx this_insn = start_insn, label = 0;
14319 /* If the jump cannot be done with one instruction, we cannot
14320 conditionally execute the instruction in the inverse case. */
14321 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14327 /* Register the insn jumped to. */
14330 if (!seeking_return)
14331 label = XEXP (SET_SRC (body), 0);
14333 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14334 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14335 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14337 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14338 then_not_else = FALSE;
14340 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14341 seeking_return = 1;
14342 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14344 seeking_return = 1;
14345 then_not_else = FALSE;
14348 gcc_unreachable ();
14350 /* See how many insns this branch skips, and what kind of insns. If all
14351 insns are okay, and the label or unconditional branch to the same
14352 label is not too far away, succeed. */
14353 for (insns_skipped = 0;
14354 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14358 this_insn = next_nonnote_insn (this_insn);
14362 switch (GET_CODE (this_insn))
14365 /* Succeed if it is the target label, otherwise fail since
14366 control falls in from somewhere else. */
14367 if (this_insn == label)
14371 arm_ccfsm_state = 2;
14372 this_insn = next_nonnote_insn (this_insn);
14375 arm_ccfsm_state = 1;
14383 /* Succeed if the following insn is the target label.
14385 If return insns are used then the last insn in a function
14386 will be a barrier. */
14387 this_insn = next_nonnote_insn (this_insn);
14388 if (this_insn && this_insn == label)
14392 arm_ccfsm_state = 2;
14393 this_insn = next_nonnote_insn (this_insn);
14396 arm_ccfsm_state = 1;
14404 /* The AAPCS says that conditional calls should not be
14405 used since they make interworking inefficient (the
14406 linker can't transform BL<cond> into BLX). That's
14407 only a problem if the machine has BLX. */
14414 /* Succeed if the following insn is the target label, or
14415 if the following two insns are a barrier and the
14417 this_insn = next_nonnote_insn (this_insn);
14418 if (this_insn && GET_CODE (this_insn) == BARRIER)
14419 this_insn = next_nonnote_insn (this_insn);
14421 if (this_insn && this_insn == label
14422 && insns_skipped < max_insns_skipped)
14426 arm_ccfsm_state = 2;
14427 this_insn = next_nonnote_insn (this_insn);
14430 arm_ccfsm_state = 1;
14438 /* If this is an unconditional branch to the same label, succeed.
14439 If it is to another label, do nothing. If it is conditional,
14441 /* XXX Probably, the tests for SET and the PC are
14444 scanbody = PATTERN (this_insn);
14445 if (GET_CODE (scanbody) == SET
14446 && GET_CODE (SET_DEST (scanbody)) == PC)
14448 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14449 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14451 arm_ccfsm_state = 2;
14454 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14457 /* Fail if a conditional return is undesirable (e.g. on a
14458 StrongARM), but still allow this if optimizing for size. */
14459 else if (GET_CODE (scanbody) == RETURN
14460 && !use_return_insn (TRUE, NULL)
14463 else if (GET_CODE (scanbody) == RETURN
14466 arm_ccfsm_state = 2;
14469 else if (GET_CODE (scanbody) == PARALLEL)
14471 switch (get_attr_conds (this_insn))
14481 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14486 /* Instructions using or affecting the condition codes make it
14488 scanbody = PATTERN (this_insn);
14489 if (!(GET_CODE (scanbody) == SET
14490 || GET_CODE (scanbody) == PARALLEL)
14491 || get_attr_conds (this_insn) != CONDS_NOCOND)
14494 /* A conditional cirrus instruction must be followed by
14495 a non Cirrus instruction. However, since we
14496 conditionalize instructions in this function and by
14497 the time we get here we can't add instructions
14498 (nops), because shorten_branches() has already been
14499 called, we will disable conditionalizing Cirrus
14500 instructions to be safe. */
14501 if (GET_CODE (scanbody) != USE
14502 && GET_CODE (scanbody) != CLOBBER
14503 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14513 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14514 arm_target_label = CODE_LABEL_NUMBER (label);
14517 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14519 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14521 this_insn = next_nonnote_insn (this_insn);
14522 gcc_assert (!this_insn
14523 || (GET_CODE (this_insn) != BARRIER
14524 && GET_CODE (this_insn) != CODE_LABEL));
14528 /* Oh, dear! we ran off the end.. give up. */
14529 extract_constrain_insn_cached (insn);
14530 arm_ccfsm_state = 0;
14531 arm_target_insn = NULL;
14534 arm_target_insn = this_insn;
14538 gcc_assert (!reverse);
14540 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14542 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14543 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14544 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14545 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14549 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14552 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14556 if (reverse || then_not_else)
14557 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14560 /* Restore recog_data (getting the attributes of other insns can
14561 destroy this array, but final.c assumes that it remains intact
14562 across this call. */
14563 extract_constrain_insn_cached (insn);
14567 /* Output IT instructions. */
14569 thumb2_asm_output_opcode (FILE * stream)
14574 if (arm_condexec_mask)
14576 for (n = 0; n < arm_condexec_masklen; n++)
14577 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14579 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14580 arm_condition_codes[arm_current_cc]);
14581 arm_condexec_mask = 0;
14585 /* Returns true if REGNO is a valid register
14586 for holding a quantity of type MODE. */
14588 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14590 if (GET_MODE_CLASS (mode) == MODE_CC)
14591 return (regno == CC_REGNUM
14592 || (TARGET_HARD_FLOAT && TARGET_VFP
14593 && regno == VFPCC_REGNUM));
14596 /* For the Thumb we only allow values bigger than SImode in
14597 registers 0 - 6, so that there is always a second low
14598 register available to hold the upper part of the value.
14599 We probably we ought to ensure that the register is the
14600 start of an even numbered register pair. */
14601 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14603 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14604 && IS_CIRRUS_REGNUM (regno))
14605 /* We have outlawed SI values in Cirrus registers because they
14606 reside in the lower 32 bits, but SF values reside in the
14607 upper 32 bits. This causes gcc all sorts of grief. We can't
14608 even split the registers into pairs because Cirrus SI values
14609 get sign extended to 64bits-- aldyh. */
14610 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14612 if (TARGET_HARD_FLOAT && TARGET_VFP
14613 && IS_VFP_REGNUM (regno))
14615 if (mode == SFmode || mode == SImode)
14616 return VFP_REGNO_OK_FOR_SINGLE (regno);
14618 if (mode == DFmode)
14619 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14622 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14623 || (VALID_NEON_QREG_MODE (mode)
14624 && NEON_REGNO_OK_FOR_QUAD (regno))
14625 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14626 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14627 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14628 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14629 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14634 if (TARGET_REALLY_IWMMXT)
14636 if (IS_IWMMXT_GR_REGNUM (regno))
14637 return mode == SImode;
14639 if (IS_IWMMXT_REGNUM (regno))
14640 return VALID_IWMMXT_REG_MODE (mode);
14643 /* We allow any value to be stored in the general registers.
14644 Restrict doubleword quantities to even register pairs so that we can
14645 use ldrd. Do not allow Neon structure opaque modes in general registers;
14646 they would use too many. */
14647 if (regno <= LAST_ARM_REGNUM)
14648 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14649 && !VALID_NEON_STRUCT_MODE (mode);
14651 if (regno == FRAME_POINTER_REGNUM
14652 || regno == ARG_POINTER_REGNUM)
14653 /* We only allow integers in the fake hard registers. */
14654 return GET_MODE_CLASS (mode) == MODE_INT;
14656 /* The only registers left are the FPA registers
14657 which we only allow to hold FP values. */
14658 return (TARGET_HARD_FLOAT && TARGET_FPA
14659 && GET_MODE_CLASS (mode) == MODE_FLOAT
14660 && regno >= FIRST_FPA_REGNUM
14661 && regno <= LAST_FPA_REGNUM);
14664 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14665 not used in arm mode. */
14667 arm_regno_class (int regno)
14671 if (regno == STACK_POINTER_REGNUM)
14673 if (regno == CC_REGNUM)
14680 if (TARGET_THUMB2 && regno < 8)
14683 if ( regno <= LAST_ARM_REGNUM
14684 || regno == FRAME_POINTER_REGNUM
14685 || regno == ARG_POINTER_REGNUM)
14686 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14688 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14689 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14691 if (IS_CIRRUS_REGNUM (regno))
14692 return CIRRUS_REGS;
14694 if (IS_VFP_REGNUM (regno))
14696 if (regno <= D7_VFP_REGNUM)
14697 return VFP_D0_D7_REGS;
14698 else if (regno <= LAST_LO_VFP_REGNUM)
14699 return VFP_LO_REGS;
14701 return VFP_HI_REGS;
14704 if (IS_IWMMXT_REGNUM (regno))
14705 return IWMMXT_REGS;
14707 if (IS_IWMMXT_GR_REGNUM (regno))
14708 return IWMMXT_GR_REGS;
14713 /* Handle a special case when computing the offset
14714 of an argument from the frame pointer. */
14716 arm_debugger_arg_offset (int value, rtx addr)
14720 /* We are only interested if dbxout_parms() failed to compute the offset. */
14724 /* We can only cope with the case where the address is held in a register. */
14725 if (GET_CODE (addr) != REG)
14728 /* If we are using the frame pointer to point at the argument, then
14729 an offset of 0 is correct. */
14730 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14733 /* If we are using the stack pointer to point at the
14734 argument, then an offset of 0 is correct. */
14735 /* ??? Check this is consistent with thumb2 frame layout. */
14736 if ((TARGET_THUMB || !frame_pointer_needed)
14737 && REGNO (addr) == SP_REGNUM)
14740 /* Oh dear. The argument is pointed to by a register rather
14741 than being held in a register, or being stored at a known
14742 offset from the frame pointer. Since GDB only understands
14743 those two kinds of argument we must translate the address
14744 held in the register into an offset from the frame pointer.
14745 We do this by searching through the insns for the function
14746 looking to see where this register gets its value. If the
14747 register is initialized from the frame pointer plus an offset
14748 then we are in luck and we can continue, otherwise we give up.
14750 This code is exercised by producing debugging information
14751 for a function with arguments like this:
14753 double func (double a, double b, int c, double d) {return d;}
14755 Without this code the stab for parameter 'd' will be set to
14756 an offset of 0 from the frame pointer, rather than 8. */
14758 /* The if() statement says:
14760 If the insn is a normal instruction
14761 and if the insn is setting the value in a register
14762 and if the register being set is the register holding the address of the argument
14763 and if the address is computing by an addition
14764 that involves adding to a register
14765 which is the frame pointer
14770 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14772 if ( GET_CODE (insn) == INSN
14773 && GET_CODE (PATTERN (insn)) == SET
14774 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14775 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14776 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14777 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14778 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14781 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14790 warning (0, "unable to compute real location of stacked parameter");
14791 value = 8; /* XXX magic hack */
14797 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14800 if ((MASK) & insn_flags) \
14801 add_builtin_function ((NAME), (TYPE), (CODE), \
14802 BUILT_IN_MD, NULL, NULL_TREE); \
14806 struct builtin_description
14808 const unsigned int mask;
14809 const enum insn_code icode;
14810 const char * const name;
14811 const enum arm_builtins code;
14812 const enum rtx_code comparison;
14813 const unsigned int flag;
14816 static const struct builtin_description bdesc_2arg[] =
14818 #define IWMMXT_BUILTIN(code, string, builtin) \
14819 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14820 ARM_BUILTIN_##builtin, 0, 0 },
14822 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14823 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14824 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14825 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14826 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14827 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14828 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14829 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14830 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14831 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14832 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14833 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14834 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14835 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14836 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14837 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14838 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14839 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14840 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14841 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14842 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14843 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14844 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14845 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14846 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14847 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14848 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14849 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14850 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14851 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14852 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14853 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14854 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14855 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14856 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14857 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14858 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14859 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14860 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14861 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14862 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14863 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14864 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14865 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14866 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14867 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14868 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14869 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14870 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14871 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14872 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14873 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14874 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14875 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14876 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14877 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14878 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14879 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14881 #define IWMMXT_BUILTIN2(code, builtin) \
14882 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14884 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14885 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14886 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14887 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14888 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14889 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14890 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14891 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14892 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14893 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14894 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14895 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14896 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14897 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14898 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14899 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14900 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14901 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14902 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14903 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14904 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14905 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14906 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14907 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14908 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14909 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14910 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14911 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14912 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14913 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14914 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14915 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14918 static const struct builtin_description bdesc_1arg[] =
14920 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14921 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14922 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14923 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14924 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14925 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14926 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14927 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14928 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14929 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14930 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14931 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14932 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14933 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14934 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14935 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14936 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14937 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14940 /* Set up all the iWMMXt builtins. This is
14941 not called if TARGET_IWMMXT is zero. */
14944 arm_init_iwmmxt_builtins (void)
14946 const struct builtin_description * d;
14948 tree endlink = void_list_node;
14950 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14951 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14952 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14955 = build_function_type (integer_type_node,
14956 tree_cons (NULL_TREE, integer_type_node, endlink));
14957 tree v8qi_ftype_v8qi_v8qi_int
14958 = build_function_type (V8QI_type_node,
14959 tree_cons (NULL_TREE, V8QI_type_node,
14960 tree_cons (NULL_TREE, V8QI_type_node,
14961 tree_cons (NULL_TREE,
14964 tree v4hi_ftype_v4hi_int
14965 = build_function_type (V4HI_type_node,
14966 tree_cons (NULL_TREE, V4HI_type_node,
14967 tree_cons (NULL_TREE, integer_type_node,
14969 tree v2si_ftype_v2si_int
14970 = build_function_type (V2SI_type_node,
14971 tree_cons (NULL_TREE, V2SI_type_node,
14972 tree_cons (NULL_TREE, integer_type_node,
14974 tree v2si_ftype_di_di
14975 = build_function_type (V2SI_type_node,
14976 tree_cons (NULL_TREE, long_long_integer_type_node,
14977 tree_cons (NULL_TREE, long_long_integer_type_node,
14979 tree di_ftype_di_int
14980 = build_function_type (long_long_integer_type_node,
14981 tree_cons (NULL_TREE, long_long_integer_type_node,
14982 tree_cons (NULL_TREE, integer_type_node,
14984 tree di_ftype_di_int_int
14985 = build_function_type (long_long_integer_type_node,
14986 tree_cons (NULL_TREE, long_long_integer_type_node,
14987 tree_cons (NULL_TREE, integer_type_node,
14988 tree_cons (NULL_TREE,
14991 tree int_ftype_v8qi
14992 = build_function_type (integer_type_node,
14993 tree_cons (NULL_TREE, V8QI_type_node,
14995 tree int_ftype_v4hi
14996 = build_function_type (integer_type_node,
14997 tree_cons (NULL_TREE, V4HI_type_node,
14999 tree int_ftype_v2si
15000 = build_function_type (integer_type_node,
15001 tree_cons (NULL_TREE, V2SI_type_node,
15003 tree int_ftype_v8qi_int
15004 = build_function_type (integer_type_node,
15005 tree_cons (NULL_TREE, V8QI_type_node,
15006 tree_cons (NULL_TREE, integer_type_node,
15008 tree int_ftype_v4hi_int
15009 = build_function_type (integer_type_node,
15010 tree_cons (NULL_TREE, V4HI_type_node,
15011 tree_cons (NULL_TREE, integer_type_node,
15013 tree int_ftype_v2si_int
15014 = build_function_type (integer_type_node,
15015 tree_cons (NULL_TREE, V2SI_type_node,
15016 tree_cons (NULL_TREE, integer_type_node,
15018 tree v8qi_ftype_v8qi_int_int
15019 = build_function_type (V8QI_type_node,
15020 tree_cons (NULL_TREE, V8QI_type_node,
15021 tree_cons (NULL_TREE, integer_type_node,
15022 tree_cons (NULL_TREE,
15025 tree v4hi_ftype_v4hi_int_int
15026 = build_function_type (V4HI_type_node,
15027 tree_cons (NULL_TREE, V4HI_type_node,
15028 tree_cons (NULL_TREE, integer_type_node,
15029 tree_cons (NULL_TREE,
15032 tree v2si_ftype_v2si_int_int
15033 = build_function_type (V2SI_type_node,
15034 tree_cons (NULL_TREE, V2SI_type_node,
15035 tree_cons (NULL_TREE, integer_type_node,
15036 tree_cons (NULL_TREE,
15039 /* Miscellaneous. */
15040 tree v8qi_ftype_v4hi_v4hi
15041 = build_function_type (V8QI_type_node,
15042 tree_cons (NULL_TREE, V4HI_type_node,
15043 tree_cons (NULL_TREE, V4HI_type_node,
15045 tree v4hi_ftype_v2si_v2si
15046 = build_function_type (V4HI_type_node,
15047 tree_cons (NULL_TREE, V2SI_type_node,
15048 tree_cons (NULL_TREE, V2SI_type_node,
15050 tree v2si_ftype_v4hi_v4hi
15051 = build_function_type (V2SI_type_node,
15052 tree_cons (NULL_TREE, V4HI_type_node,
15053 tree_cons (NULL_TREE, V4HI_type_node,
15055 tree v2si_ftype_v8qi_v8qi
15056 = build_function_type (V2SI_type_node,
15057 tree_cons (NULL_TREE, V8QI_type_node,
15058 tree_cons (NULL_TREE, V8QI_type_node,
15060 tree v4hi_ftype_v4hi_di
15061 = build_function_type (V4HI_type_node,
15062 tree_cons (NULL_TREE, V4HI_type_node,
15063 tree_cons (NULL_TREE,
15064 long_long_integer_type_node,
15066 tree v2si_ftype_v2si_di
15067 = build_function_type (V2SI_type_node,
15068 tree_cons (NULL_TREE, V2SI_type_node,
15069 tree_cons (NULL_TREE,
15070 long_long_integer_type_node,
15072 tree void_ftype_int_int
15073 = build_function_type (void_type_node,
15074 tree_cons (NULL_TREE, integer_type_node,
15075 tree_cons (NULL_TREE, integer_type_node,
15078 = build_function_type (long_long_unsigned_type_node, endlink);
15080 = build_function_type (long_long_integer_type_node,
15081 tree_cons (NULL_TREE, V8QI_type_node,
15084 = build_function_type (long_long_integer_type_node,
15085 tree_cons (NULL_TREE, V4HI_type_node,
15088 = build_function_type (long_long_integer_type_node,
15089 tree_cons (NULL_TREE, V2SI_type_node,
15091 tree v2si_ftype_v4hi
15092 = build_function_type (V2SI_type_node,
15093 tree_cons (NULL_TREE, V4HI_type_node,
15095 tree v4hi_ftype_v8qi
15096 = build_function_type (V4HI_type_node,
15097 tree_cons (NULL_TREE, V8QI_type_node,
15100 tree di_ftype_di_v4hi_v4hi
15101 = build_function_type (long_long_unsigned_type_node,
15102 tree_cons (NULL_TREE,
15103 long_long_unsigned_type_node,
15104 tree_cons (NULL_TREE, V4HI_type_node,
15105 tree_cons (NULL_TREE,
15109 tree di_ftype_v4hi_v4hi
15110 = build_function_type (long_long_unsigned_type_node,
15111 tree_cons (NULL_TREE, V4HI_type_node,
15112 tree_cons (NULL_TREE, V4HI_type_node,
15115 /* Normal vector binops. */
15116 tree v8qi_ftype_v8qi_v8qi
15117 = build_function_type (V8QI_type_node,
15118 tree_cons (NULL_TREE, V8QI_type_node,
15119 tree_cons (NULL_TREE, V8QI_type_node,
15121 tree v4hi_ftype_v4hi_v4hi
15122 = build_function_type (V4HI_type_node,
15123 tree_cons (NULL_TREE, V4HI_type_node,
15124 tree_cons (NULL_TREE, V4HI_type_node,
15126 tree v2si_ftype_v2si_v2si
15127 = build_function_type (V2SI_type_node,
15128 tree_cons (NULL_TREE, V2SI_type_node,
15129 tree_cons (NULL_TREE, V2SI_type_node,
15131 tree di_ftype_di_di
15132 = build_function_type (long_long_unsigned_type_node,
15133 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15134 tree_cons (NULL_TREE,
15135 long_long_unsigned_type_node,
15138 /* Add all builtins that are more or less simple operations on two
15140 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15142 /* Use one of the operands; the target can have a different mode for
15143 mask-generating compares. */
15144 enum machine_mode mode;
15150 mode = insn_data[d->icode].operand[1].mode;
15155 type = v8qi_ftype_v8qi_v8qi;
15158 type = v4hi_ftype_v4hi_v4hi;
15161 type = v2si_ftype_v2si_v2si;
15164 type = di_ftype_di_di;
15168 gcc_unreachable ();
15171 def_mbuiltin (d->mask, d->name, type, d->code);
15174 /* Add the remaining MMX insns with somewhat more complicated types. */
15175 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15176 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15177 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15179 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15180 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15181 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15182 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15183 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15184 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15186 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15187 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15188 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15189 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15190 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15191 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15194 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15195 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15196 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15197 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15198 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15200 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15201 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15202 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15203 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15204 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15205 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15207 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15209 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15210 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15211 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15212 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15215 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15217 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15220 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15222 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15224 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15226 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15228 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15229 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15232 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15233 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15234 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15235 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15236 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15237 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15239 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15240 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15241 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15242 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15243 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15244 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15245 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15246 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15247 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15249 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15250 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15252 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15253 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15254 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15257 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15258 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15259 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15260 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15261 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15262 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15263 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15267 arm_init_tls_builtins (void)
15271 ftype = build_function_type (ptr_type_node, void_list_node);
15272 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15273 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15275 TREE_NOTHROW (decl) = 1;
15276 TREE_READONLY (decl) = 1;
15293 } neon_builtin_type_bits;
15295 #define v8qi_UP T_V8QI
15296 #define v4hi_UP T_V4HI
15297 #define v2si_UP T_V2SI
15298 #define v2sf_UP T_V2SF
15300 #define v16qi_UP T_V16QI
15301 #define v8hi_UP T_V8HI
15302 #define v4si_UP T_V4SI
15303 #define v4sf_UP T_V4SF
15304 #define v2di_UP T_V2DI
15309 #define UP(X) X##_UP
15344 NEON_LOADSTRUCTLANE,
15346 NEON_STORESTRUCTLANE,
15355 const neon_itype itype;
15356 const neon_builtin_type_bits bits;
15357 const enum insn_code codes[T_MAX];
15358 const unsigned int num_vars;
15359 unsigned int base_fcode;
15360 } neon_builtin_datum;
15362 #define CF(N,X) CODE_FOR_neon_##N##X
15364 #define VAR1(T, N, A) \
15365 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15366 #define VAR2(T, N, A, B) \
15367 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15368 #define VAR3(T, N, A, B, C) \
15369 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15370 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15371 #define VAR4(T, N, A, B, C, D) \
15372 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15373 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15374 #define VAR5(T, N, A, B, C, D, E) \
15375 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15376 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15377 #define VAR6(T, N, A, B, C, D, E, F) \
15378 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15379 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15380 #define VAR7(T, N, A, B, C, D, E, F, G) \
15381 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15382 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15384 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15385 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15387 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15388 CF (N, G), CF (N, H) }, 8, 0
15389 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15390 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15391 | UP (H) | UP (I), \
15392 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15393 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15394 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15395 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15396 | UP (H) | UP (I) | UP (J), \
15397 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15398 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15400 /* The mode entries in the following table correspond to the "key" type of the
15401 instruction variant, i.e. equivalent to that which would be specified after
15402 the assembler mnemonic, which usually refers to the last vector operand.
15403 (Signed/unsigned/polynomial types are not differentiated between though, and
15404 are all mapped onto the same mode for a given element size.) The modes
15405 listed per instruction should be the same as those defined for that
15406 instruction's pattern in neon.md.
15407 WARNING: Variants should be listed in the same increasing order as
15408 neon_builtin_type_bits. */
15410 static neon_builtin_datum neon_builtin_data[] =
15412 { VAR10 (BINOP, vadd,
15413 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15414 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15415 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15416 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15417 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15418 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15419 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15420 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15421 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15422 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15423 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15424 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15425 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15426 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15427 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15428 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15429 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15430 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15431 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15432 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15433 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15434 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15435 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15436 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15437 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15438 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15439 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15440 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15441 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15442 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15443 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15444 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15445 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15446 { VAR10 (BINOP, vsub,
15447 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15448 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15449 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15450 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15451 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15452 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15453 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15454 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15455 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15456 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15457 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15458 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15459 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15460 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15461 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15462 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15463 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15464 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15465 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15466 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15467 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15468 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15469 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15470 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15471 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15472 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15473 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15474 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15475 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15476 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15477 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15478 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15479 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15480 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15481 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15482 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15483 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15484 /* FIXME: vget_lane supports more variants than this! */
15485 { VAR10 (GETLANE, vget_lane,
15486 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15487 { VAR10 (SETLANE, vset_lane,
15488 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15489 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15490 { VAR10 (DUP, vdup_n,
15491 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15492 { VAR10 (DUPLANE, vdup_lane,
15493 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15494 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15495 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15496 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15497 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15498 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15499 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15500 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15501 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15502 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15503 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15504 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15505 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15506 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15507 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15508 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15509 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15510 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15511 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15512 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15513 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15514 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15515 { VAR10 (BINOP, vext,
15516 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15517 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15518 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15519 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15520 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15521 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15522 { VAR10 (SELECT, vbsl,
15523 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15524 { VAR1 (VTBL, vtbl1, v8qi) },
15525 { VAR1 (VTBL, vtbl2, v8qi) },
15526 { VAR1 (VTBL, vtbl3, v8qi) },
15527 { VAR1 (VTBL, vtbl4, v8qi) },
15528 { VAR1 (VTBX, vtbx1, v8qi) },
15529 { VAR1 (VTBX, vtbx2, v8qi) },
15530 { VAR1 (VTBX, vtbx3, v8qi) },
15531 { VAR1 (VTBX, vtbx4, v8qi) },
15532 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15533 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15534 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15535 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15536 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15537 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15538 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15539 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15540 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15541 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15542 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15543 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15544 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15545 { VAR10 (LOAD1, vld1,
15546 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15547 { VAR10 (LOAD1LANE, vld1_lane,
15548 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15549 { VAR10 (LOAD1, vld1_dup,
15550 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15551 { VAR10 (STORE1, vst1,
15552 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15553 { VAR10 (STORE1LANE, vst1_lane,
15554 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15555 { VAR9 (LOADSTRUCT,
15556 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15557 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15558 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15559 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15560 { VAR9 (STORESTRUCT, vst2,
15561 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15562 { VAR7 (STORESTRUCTLANE, vst2_lane,
15563 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15564 { VAR9 (LOADSTRUCT,
15565 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15566 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15567 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15568 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15569 { VAR9 (STORESTRUCT, vst3,
15570 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15571 { VAR7 (STORESTRUCTLANE, vst3_lane,
15572 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15573 { VAR9 (LOADSTRUCT, vld4,
15574 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15575 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15576 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15577 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15578 { VAR9 (STORESTRUCT, vst4,
15579 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15580 { VAR7 (STORESTRUCTLANE, vst4_lane,
15581 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15582 { VAR10 (LOGICBINOP, vand,
15583 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15584 { VAR10 (LOGICBINOP, vorr,
15585 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15586 { VAR10 (BINOP, veor,
15587 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15588 { VAR10 (LOGICBINOP, vbic,
15589 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15590 { VAR10 (LOGICBINOP, vorn,
15591 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15607 arm_init_neon_builtins (void)
15609 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15611 tree neon_intQI_type_node;
15612 tree neon_intHI_type_node;
15613 tree neon_polyQI_type_node;
15614 tree neon_polyHI_type_node;
15615 tree neon_intSI_type_node;
15616 tree neon_intDI_type_node;
15617 tree neon_float_type_node;
15619 tree intQI_pointer_node;
15620 tree intHI_pointer_node;
15621 tree intSI_pointer_node;
15622 tree intDI_pointer_node;
15623 tree float_pointer_node;
15625 tree const_intQI_node;
15626 tree const_intHI_node;
15627 tree const_intSI_node;
15628 tree const_intDI_node;
15629 tree const_float_node;
15631 tree const_intQI_pointer_node;
15632 tree const_intHI_pointer_node;
15633 tree const_intSI_pointer_node;
15634 tree const_intDI_pointer_node;
15635 tree const_float_pointer_node;
15637 tree V8QI_type_node;
15638 tree V4HI_type_node;
15639 tree V2SI_type_node;
15640 tree V2SF_type_node;
15641 tree V16QI_type_node;
15642 tree V8HI_type_node;
15643 tree V4SI_type_node;
15644 tree V4SF_type_node;
15645 tree V2DI_type_node;
15647 tree intUQI_type_node;
15648 tree intUHI_type_node;
15649 tree intUSI_type_node;
15650 tree intUDI_type_node;
15652 tree intEI_type_node;
15653 tree intOI_type_node;
15654 tree intCI_type_node;
15655 tree intXI_type_node;
15657 tree V8QI_pointer_node;
15658 tree V4HI_pointer_node;
15659 tree V2SI_pointer_node;
15660 tree V2SF_pointer_node;
15661 tree V16QI_pointer_node;
15662 tree V8HI_pointer_node;
15663 tree V4SI_pointer_node;
15664 tree V4SF_pointer_node;
15665 tree V2DI_pointer_node;
15667 tree void_ftype_pv8qi_v8qi_v8qi;
15668 tree void_ftype_pv4hi_v4hi_v4hi;
15669 tree void_ftype_pv2si_v2si_v2si;
15670 tree void_ftype_pv2sf_v2sf_v2sf;
15671 tree void_ftype_pdi_di_di;
15672 tree void_ftype_pv16qi_v16qi_v16qi;
15673 tree void_ftype_pv8hi_v8hi_v8hi;
15674 tree void_ftype_pv4si_v4si_v4si;
15675 tree void_ftype_pv4sf_v4sf_v4sf;
15676 tree void_ftype_pv2di_v2di_v2di;
15678 tree reinterp_ftype_dreg[5][5];
15679 tree reinterp_ftype_qreg[5][5];
15680 tree dreg_types[5], qreg_types[5];
15682 /* Create distinguished type nodes for NEON vector element types,
15683 and pointers to values of such types, so we can detect them later. */
15684 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15685 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15686 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15687 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15688 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15689 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15690 neon_float_type_node = make_node (REAL_TYPE);
15691 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15692 layout_type (neon_float_type_node);
15694 /* Define typedefs which exactly correspond to the modes we are basing vector
15695 types on. If you change these names you'll need to change
15696 the table used by arm_mangle_type too. */
15697 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15698 "__builtin_neon_qi");
15699 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15700 "__builtin_neon_hi");
15701 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15702 "__builtin_neon_si");
15703 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15704 "__builtin_neon_sf");
15705 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15706 "__builtin_neon_di");
15707 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15708 "__builtin_neon_poly8");
15709 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15710 "__builtin_neon_poly16");
15712 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15713 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15714 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15715 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15716 float_pointer_node = build_pointer_type (neon_float_type_node);
15718 /* Next create constant-qualified versions of the above types. */
15719 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15721 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15723 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15725 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15727 const_float_node = build_qualified_type (neon_float_type_node,
15730 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15731 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15732 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15733 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15734 const_float_pointer_node = build_pointer_type (const_float_node);
15736 /* Now create vector types based on our NEON element types. */
15737 /* 64-bit vectors. */
15739 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15741 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15743 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15745 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15746 /* 128-bit vectors. */
15748 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15750 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15752 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15754 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15756 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15758 /* Unsigned integer types for various mode sizes. */
15759 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15760 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15761 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15762 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15764 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15765 "__builtin_neon_uqi");
15766 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15767 "__builtin_neon_uhi");
15768 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15769 "__builtin_neon_usi");
15770 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15771 "__builtin_neon_udi");
15773 /* Opaque integer types for structures of vectors. */
15774 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15775 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15776 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15777 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15779 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15780 "__builtin_neon_ti");
15781 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15782 "__builtin_neon_ei");
15783 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15784 "__builtin_neon_oi");
15785 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15786 "__builtin_neon_ci");
15787 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15788 "__builtin_neon_xi");
15790 /* Pointers to vector types. */
15791 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15792 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15793 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15794 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15795 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15796 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15797 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15798 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15799 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15801 /* Operations which return results as pairs. */
15802 void_ftype_pv8qi_v8qi_v8qi =
15803 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15804 V8QI_type_node, NULL);
15805 void_ftype_pv4hi_v4hi_v4hi =
15806 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15807 V4HI_type_node, NULL);
15808 void_ftype_pv2si_v2si_v2si =
15809 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15810 V2SI_type_node, NULL);
15811 void_ftype_pv2sf_v2sf_v2sf =
15812 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15813 V2SF_type_node, NULL);
15814 void_ftype_pdi_di_di =
15815 build_function_type_list (void_type_node, intDI_pointer_node,
15816 neon_intDI_type_node, neon_intDI_type_node, NULL);
15817 void_ftype_pv16qi_v16qi_v16qi =
15818 build_function_type_list (void_type_node, V16QI_pointer_node,
15819 V16QI_type_node, V16QI_type_node, NULL);
15820 void_ftype_pv8hi_v8hi_v8hi =
15821 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15822 V8HI_type_node, NULL);
15823 void_ftype_pv4si_v4si_v4si =
15824 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15825 V4SI_type_node, NULL);
15826 void_ftype_pv4sf_v4sf_v4sf =
15827 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15828 V4SF_type_node, NULL);
15829 void_ftype_pv2di_v2di_v2di =
15830 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15831 V2DI_type_node, NULL);
15833 dreg_types[0] = V8QI_type_node;
15834 dreg_types[1] = V4HI_type_node;
15835 dreg_types[2] = V2SI_type_node;
15836 dreg_types[3] = V2SF_type_node;
15837 dreg_types[4] = neon_intDI_type_node;
15839 qreg_types[0] = V16QI_type_node;
15840 qreg_types[1] = V8HI_type_node;
15841 qreg_types[2] = V4SI_type_node;
15842 qreg_types[3] = V4SF_type_node;
15843 qreg_types[4] = V2DI_type_node;
15845 for (i = 0; i < 5; i++)
15848 for (j = 0; j < 5; j++)
15850 reinterp_ftype_dreg[i][j]
15851 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15852 reinterp_ftype_qreg[i][j]
15853 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15857 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15859 neon_builtin_datum *d = &neon_builtin_data[i];
15860 unsigned int j, codeidx = 0;
15862 d->base_fcode = fcode;
15864 for (j = 0; j < T_MAX; j++)
15866 const char* const modenames[] = {
15867 "v8qi", "v4hi", "v2si", "v2sf", "di",
15868 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15872 enum insn_code icode;
15873 int is_load = 0, is_store = 0;
15875 if ((d->bits & (1 << j)) == 0)
15878 icode = d->codes[codeidx++];
15883 case NEON_LOAD1LANE:
15884 case NEON_LOADSTRUCT:
15885 case NEON_LOADSTRUCTLANE:
15887 /* Fall through. */
15889 case NEON_STORE1LANE:
15890 case NEON_STORESTRUCT:
15891 case NEON_STORESTRUCTLANE:
15894 /* Fall through. */
15897 case NEON_LOGICBINOP:
15898 case NEON_SHIFTINSERT:
15905 case NEON_SHIFTIMM:
15906 case NEON_SHIFTACC:
15912 case NEON_LANEMULL:
15913 case NEON_LANEMULH:
15915 case NEON_SCALARMUL:
15916 case NEON_SCALARMULL:
15917 case NEON_SCALARMULH:
15918 case NEON_SCALARMAC:
15924 tree return_type = void_type_node, args = void_list_node;
15926 /* Build a function type directly from the insn_data for this
15927 builtin. The build_function_type() function takes care of
15928 removing duplicates for us. */
15929 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15933 if (is_load && k == 1)
15935 /* Neon load patterns always have the memory operand
15936 (a SImode pointer) in the operand 1 position. We
15937 want a const pointer to the element type in that
15939 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15945 eltype = const_intQI_pointer_node;
15950 eltype = const_intHI_pointer_node;
15955 eltype = const_intSI_pointer_node;
15960 eltype = const_float_pointer_node;
15965 eltype = const_intDI_pointer_node;
15968 default: gcc_unreachable ();
15971 else if (is_store && k == 0)
15973 /* Similarly, Neon store patterns use operand 0 as
15974 the memory location to store to (a SImode pointer).
15975 Use a pointer to the element type of the store in
15977 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15983 eltype = intQI_pointer_node;
15988 eltype = intHI_pointer_node;
15993 eltype = intSI_pointer_node;
15998 eltype = float_pointer_node;
16003 eltype = intDI_pointer_node;
16006 default: gcc_unreachable ();
16011 switch (insn_data[icode].operand[k].mode)
16013 case VOIDmode: eltype = void_type_node; break;
16015 case QImode: eltype = neon_intQI_type_node; break;
16016 case HImode: eltype = neon_intHI_type_node; break;
16017 case SImode: eltype = neon_intSI_type_node; break;
16018 case SFmode: eltype = neon_float_type_node; break;
16019 case DImode: eltype = neon_intDI_type_node; break;
16020 case TImode: eltype = intTI_type_node; break;
16021 case EImode: eltype = intEI_type_node; break;
16022 case OImode: eltype = intOI_type_node; break;
16023 case CImode: eltype = intCI_type_node; break;
16024 case XImode: eltype = intXI_type_node; break;
16025 /* 64-bit vectors. */
16026 case V8QImode: eltype = V8QI_type_node; break;
16027 case V4HImode: eltype = V4HI_type_node; break;
16028 case V2SImode: eltype = V2SI_type_node; break;
16029 case V2SFmode: eltype = V2SF_type_node; break;
16030 /* 128-bit vectors. */
16031 case V16QImode: eltype = V16QI_type_node; break;
16032 case V8HImode: eltype = V8HI_type_node; break;
16033 case V4SImode: eltype = V4SI_type_node; break;
16034 case V4SFmode: eltype = V4SF_type_node; break;
16035 case V2DImode: eltype = V2DI_type_node; break;
16036 default: gcc_unreachable ();
16040 if (k == 0 && !is_store)
16041 return_type = eltype;
16043 args = tree_cons (NULL_TREE, eltype, args);
16046 ftype = build_function_type (return_type, args);
16050 case NEON_RESULTPAIR:
16052 switch (insn_data[icode].operand[1].mode)
16054 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16055 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16056 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16057 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16058 case DImode: ftype = void_ftype_pdi_di_di; break;
16059 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16060 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16061 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16062 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16063 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16064 default: gcc_unreachable ();
16069 case NEON_REINTERP:
16071 /* We iterate over 5 doubleword types, then 5 quadword
16074 switch (insn_data[icode].operand[0].mode)
16076 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16077 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16078 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16079 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16080 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16081 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16082 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16083 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16084 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16085 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16086 default: gcc_unreachable ();
16092 gcc_unreachable ();
16095 gcc_assert (ftype != NULL);
16097 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16099 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16106 arm_init_builtins (void)
16108 arm_init_tls_builtins ();
16110 if (TARGET_REALLY_IWMMXT)
16111 arm_init_iwmmxt_builtins ();
16114 arm_init_neon_builtins ();
16117 /* Errors in the source file can cause expand_expr to return const0_rtx
16118 where we expect a vector. To avoid crashing, use one of the vector
16119 clear instructions. */
16122 safe_vector_operand (rtx x, enum machine_mode mode)
16124 if (x != const0_rtx)
16126 x = gen_reg_rtx (mode);
16128 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16129 : gen_rtx_SUBREG (DImode, x, 0)));
16133 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16136 arm_expand_binop_builtin (enum insn_code icode,
16137 tree exp, rtx target)
16140 tree arg0 = CALL_EXPR_ARG (exp, 0);
16141 tree arg1 = CALL_EXPR_ARG (exp, 1);
16142 rtx op0 = expand_normal (arg0);
16143 rtx op1 = expand_normal (arg1);
16144 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16145 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16146 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16148 if (VECTOR_MODE_P (mode0))
16149 op0 = safe_vector_operand (op0, mode0);
16150 if (VECTOR_MODE_P (mode1))
16151 op1 = safe_vector_operand (op1, mode1);
16154 || GET_MODE (target) != tmode
16155 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16156 target = gen_reg_rtx (tmode);
16158 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16160 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16161 op0 = copy_to_mode_reg (mode0, op0);
16162 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16163 op1 = copy_to_mode_reg (mode1, op1);
16165 pat = GEN_FCN (icode) (target, op0, op1);
16172 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16175 arm_expand_unop_builtin (enum insn_code icode,
16176 tree exp, rtx target, int do_load)
16179 tree arg0 = CALL_EXPR_ARG (exp, 0);
16180 rtx op0 = expand_normal (arg0);
16181 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16182 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16185 || GET_MODE (target) != tmode
16186 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16187 target = gen_reg_rtx (tmode);
16189 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16192 if (VECTOR_MODE_P (mode0))
16193 op0 = safe_vector_operand (op0, mode0);
16195 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16196 op0 = copy_to_mode_reg (mode0, op0);
16199 pat = GEN_FCN (icode) (target, op0);
16207 neon_builtin_compare (const void *a, const void *b)
16209 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16210 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16211 unsigned int soughtcode = key->base_fcode;
16213 if (soughtcode >= memb->base_fcode
16214 && soughtcode < memb->base_fcode + memb->num_vars)
16216 else if (soughtcode < memb->base_fcode)
16222 static enum insn_code
16223 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16225 neon_builtin_datum key, *found;
16228 key.base_fcode = fcode;
16229 found = (neon_builtin_datum *)
16230 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16231 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16232 gcc_assert (found);
16233 idx = fcode - (int) found->base_fcode;
16234 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16237 *itype = found->itype;
16239 return found->codes[idx];
16243 NEON_ARG_COPY_TO_REG,
16248 #define NEON_MAX_BUILTIN_ARGS 5
16250 /* Expand a Neon builtin. */
16252 arm_expand_neon_args (rtx target, int icode, int have_retval,
16257 tree arg[NEON_MAX_BUILTIN_ARGS];
16258 rtx op[NEON_MAX_BUILTIN_ARGS];
16259 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16260 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16265 || GET_MODE (target) != tmode
16266 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16267 target = gen_reg_rtx (tmode);
16269 va_start (ap, exp);
16273 builtin_arg thisarg = va_arg (ap, int);
16275 if (thisarg == NEON_ARG_STOP)
16279 arg[argc] = CALL_EXPR_ARG (exp, argc);
16280 op[argc] = expand_normal (arg[argc]);
16281 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16285 case NEON_ARG_COPY_TO_REG:
16286 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16287 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16288 (op[argc], mode[argc]))
16289 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16292 case NEON_ARG_CONSTANT:
16293 /* FIXME: This error message is somewhat unhelpful. */
16294 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16295 (op[argc], mode[argc]))
16296 error ("argument must be a constant");
16299 case NEON_ARG_STOP:
16300 gcc_unreachable ();
16313 pat = GEN_FCN (icode) (target, op[0]);
16317 pat = GEN_FCN (icode) (target, op[0], op[1]);
16321 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16325 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16329 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16333 gcc_unreachable ();
16339 pat = GEN_FCN (icode) (op[0]);
16343 pat = GEN_FCN (icode) (op[0], op[1]);
16347 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16351 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16355 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16359 gcc_unreachable ();
16370 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16371 constants defined per-instruction or per instruction-variant. Instead, the
16372 required info is looked up in the table neon_builtin_data. */
16374 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16377 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16384 return arm_expand_neon_args (target, icode, 1, exp,
16385 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16389 case NEON_SCALARMUL:
16390 case NEON_SCALARMULL:
16391 case NEON_SCALARMULH:
16392 case NEON_SHIFTINSERT:
16393 case NEON_LOGICBINOP:
16394 return arm_expand_neon_args (target, icode, 1, exp,
16395 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16399 return arm_expand_neon_args (target, icode, 1, exp,
16400 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16401 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16405 case NEON_SHIFTIMM:
16406 return arm_expand_neon_args (target, icode, 1, exp,
16407 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16411 return arm_expand_neon_args (target, icode, 1, exp,
16412 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16416 case NEON_REINTERP:
16417 return arm_expand_neon_args (target, icode, 1, exp,
16418 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16422 return arm_expand_neon_args (target, icode, 1, exp,
16423 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16425 case NEON_RESULTPAIR:
16426 return arm_expand_neon_args (target, icode, 0, exp,
16427 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16431 case NEON_LANEMULL:
16432 case NEON_LANEMULH:
16433 return arm_expand_neon_args (target, icode, 1, exp,
16434 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16435 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16438 return arm_expand_neon_args (target, icode, 1, exp,
16439 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16440 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16442 case NEON_SHIFTACC:
16443 return arm_expand_neon_args (target, icode, 1, exp,
16444 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16445 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16447 case NEON_SCALARMAC:
16448 return arm_expand_neon_args (target, icode, 1, exp,
16449 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16450 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16454 return arm_expand_neon_args (target, icode, 1, exp,
16455 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16459 case NEON_LOADSTRUCT:
16460 return arm_expand_neon_args (target, icode, 1, exp,
16461 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16463 case NEON_LOAD1LANE:
16464 case NEON_LOADSTRUCTLANE:
16465 return arm_expand_neon_args (target, icode, 1, exp,
16466 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16470 case NEON_STORESTRUCT:
16471 return arm_expand_neon_args (target, icode, 0, exp,
16472 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16474 case NEON_STORE1LANE:
16475 case NEON_STORESTRUCTLANE:
16476 return arm_expand_neon_args (target, icode, 0, exp,
16477 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16481 gcc_unreachable ();
16484 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16486 neon_reinterpret (rtx dest, rtx src)
16488 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16491 /* Emit code to place a Neon pair result in memory locations (with equal
16494 neon_emit_pair_result_insn (enum machine_mode mode,
16495 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16498 rtx mem = gen_rtx_MEM (mode, destaddr);
16499 rtx tmp1 = gen_reg_rtx (mode);
16500 rtx tmp2 = gen_reg_rtx (mode);
16502 emit_insn (intfn (tmp1, op1, tmp2, op2));
16504 emit_move_insn (mem, tmp1);
16505 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16506 emit_move_insn (mem, tmp2);
16509 /* Set up operands for a register copy from src to dest, taking care not to
16510 clobber registers in the process.
16511 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16512 be called with a large N, so that should be OK. */
16515 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16517 unsigned int copied = 0, opctr = 0;
16518 unsigned int done = (1 << count) - 1;
16521 while (copied != done)
16523 for (i = 0; i < count; i++)
16527 for (j = 0; good && j < count; j++)
16528 if (i != j && (copied & (1 << j)) == 0
16529 && reg_overlap_mentioned_p (src[j], dest[i]))
16534 operands[opctr++] = dest[i];
16535 operands[opctr++] = src[i];
16541 gcc_assert (opctr == count * 2);
16544 /* Expand an expression EXP that calls a built-in function,
16545 with result going to TARGET if that's convenient
16546 (and in mode MODE if that's convenient).
16547 SUBTARGET may be used as the target for computing one of EXP's operands.
16548 IGNORE is nonzero if the value is to be ignored. */
16551 arm_expand_builtin (tree exp,
16553 rtx subtarget ATTRIBUTE_UNUSED,
16554 enum machine_mode mode ATTRIBUTE_UNUSED,
16555 int ignore ATTRIBUTE_UNUSED)
16557 const struct builtin_description * d;
16558 enum insn_code icode;
16559 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16567 int fcode = DECL_FUNCTION_CODE (fndecl);
16569 enum machine_mode tmode;
16570 enum machine_mode mode0;
16571 enum machine_mode mode1;
16572 enum machine_mode mode2;
16574 if (fcode >= ARM_BUILTIN_NEON_BASE)
16575 return arm_expand_neon_builtin (fcode, exp, target);
16579 case ARM_BUILTIN_TEXTRMSB:
16580 case ARM_BUILTIN_TEXTRMUB:
16581 case ARM_BUILTIN_TEXTRMSH:
16582 case ARM_BUILTIN_TEXTRMUH:
16583 case ARM_BUILTIN_TEXTRMSW:
16584 case ARM_BUILTIN_TEXTRMUW:
16585 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16586 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16587 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16588 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16589 : CODE_FOR_iwmmxt_textrmw);
16591 arg0 = CALL_EXPR_ARG (exp, 0);
16592 arg1 = CALL_EXPR_ARG (exp, 1);
16593 op0 = expand_normal (arg0);
16594 op1 = expand_normal (arg1);
16595 tmode = insn_data[icode].operand[0].mode;
16596 mode0 = insn_data[icode].operand[1].mode;
16597 mode1 = insn_data[icode].operand[2].mode;
16599 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16600 op0 = copy_to_mode_reg (mode0, op0);
16601 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16603 /* @@@ better error message */
16604 error ("selector must be an immediate");
16605 return gen_reg_rtx (tmode);
16608 || GET_MODE (target) != tmode
16609 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16610 target = gen_reg_rtx (tmode);
16611 pat = GEN_FCN (icode) (target, op0, op1);
16617 case ARM_BUILTIN_TINSRB:
16618 case ARM_BUILTIN_TINSRH:
16619 case ARM_BUILTIN_TINSRW:
16620 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16621 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16622 : CODE_FOR_iwmmxt_tinsrw);
16623 arg0 = CALL_EXPR_ARG (exp, 0);
16624 arg1 = CALL_EXPR_ARG (exp, 1);
16625 arg2 = CALL_EXPR_ARG (exp, 2);
16626 op0 = expand_normal (arg0);
16627 op1 = expand_normal (arg1);
16628 op2 = expand_normal (arg2);
16629 tmode = insn_data[icode].operand[0].mode;
16630 mode0 = insn_data[icode].operand[1].mode;
16631 mode1 = insn_data[icode].operand[2].mode;
16632 mode2 = insn_data[icode].operand[3].mode;
16634 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16635 op0 = copy_to_mode_reg (mode0, op0);
16636 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16637 op1 = copy_to_mode_reg (mode1, op1);
16638 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16640 /* @@@ better error message */
16641 error ("selector must be an immediate");
16645 || GET_MODE (target) != tmode
16646 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16647 target = gen_reg_rtx (tmode);
16648 pat = GEN_FCN (icode) (target, op0, op1, op2);
16654 case ARM_BUILTIN_SETWCX:
16655 arg0 = CALL_EXPR_ARG (exp, 0);
16656 arg1 = CALL_EXPR_ARG (exp, 1);
16657 op0 = force_reg (SImode, expand_normal (arg0));
16658 op1 = expand_normal (arg1);
16659 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16662 case ARM_BUILTIN_GETWCX:
16663 arg0 = CALL_EXPR_ARG (exp, 0);
16664 op0 = expand_normal (arg0);
16665 target = gen_reg_rtx (SImode);
16666 emit_insn (gen_iwmmxt_tmrc (target, op0));
16669 case ARM_BUILTIN_WSHUFH:
16670 icode = CODE_FOR_iwmmxt_wshufh;
16671 arg0 = CALL_EXPR_ARG (exp, 0);
16672 arg1 = CALL_EXPR_ARG (exp, 1);
16673 op0 = expand_normal (arg0);
16674 op1 = expand_normal (arg1);
16675 tmode = insn_data[icode].operand[0].mode;
16676 mode1 = insn_data[icode].operand[1].mode;
16677 mode2 = insn_data[icode].operand[2].mode;
16679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16680 op0 = copy_to_mode_reg (mode1, op0);
16681 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16683 /* @@@ better error message */
16684 error ("mask must be an immediate");
16688 || GET_MODE (target) != tmode
16689 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16690 target = gen_reg_rtx (tmode);
16691 pat = GEN_FCN (icode) (target, op0, op1);
16697 case ARM_BUILTIN_WSADB:
16698 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16699 case ARM_BUILTIN_WSADH:
16700 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16701 case ARM_BUILTIN_WSADBZ:
16702 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16703 case ARM_BUILTIN_WSADHZ:
16704 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16706 /* Several three-argument builtins. */
16707 case ARM_BUILTIN_WMACS:
16708 case ARM_BUILTIN_WMACU:
16709 case ARM_BUILTIN_WALIGN:
16710 case ARM_BUILTIN_TMIA:
16711 case ARM_BUILTIN_TMIAPH:
16712 case ARM_BUILTIN_TMIATT:
16713 case ARM_BUILTIN_TMIATB:
16714 case ARM_BUILTIN_TMIABT:
16715 case ARM_BUILTIN_TMIABB:
16716 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16717 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16718 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16719 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16720 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16721 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16722 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16723 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16724 : CODE_FOR_iwmmxt_walign);
16725 arg0 = CALL_EXPR_ARG (exp, 0);
16726 arg1 = CALL_EXPR_ARG (exp, 1);
16727 arg2 = CALL_EXPR_ARG (exp, 2);
16728 op0 = expand_normal (arg0);
16729 op1 = expand_normal (arg1);
16730 op2 = expand_normal (arg2);
16731 tmode = insn_data[icode].operand[0].mode;
16732 mode0 = insn_data[icode].operand[1].mode;
16733 mode1 = insn_data[icode].operand[2].mode;
16734 mode2 = insn_data[icode].operand[3].mode;
16736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16737 op0 = copy_to_mode_reg (mode0, op0);
16738 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16739 op1 = copy_to_mode_reg (mode1, op1);
16740 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16741 op2 = copy_to_mode_reg (mode2, op2);
16743 || GET_MODE (target) != tmode
16744 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16745 target = gen_reg_rtx (tmode);
16746 pat = GEN_FCN (icode) (target, op0, op1, op2);
16752 case ARM_BUILTIN_WZERO:
16753 target = gen_reg_rtx (DImode);
16754 emit_insn (gen_iwmmxt_clrdi (target));
16757 case ARM_BUILTIN_THREAD_POINTER:
16758 return arm_load_tp (target);
16764 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16765 if (d->code == (const enum arm_builtins) fcode)
16766 return arm_expand_binop_builtin (d->icode, exp, target);
16768 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16769 if (d->code == (const enum arm_builtins) fcode)
16770 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16772 /* @@@ Should really do something sensible here. */
16776 /* Return the number (counting from 0) of
16777 the least significant set bit in MASK. */
16780 number_of_first_bit_set (unsigned mask)
16785 (mask & (1 << bit)) == 0;
16792 /* Emit code to push or pop registers to or from the stack. F is the
16793 assembly file. MASK is the registers to push or pop. PUSH is
16794 nonzero if we should push, and zero if we should pop. For debugging
16795 output, if pushing, adjust CFA_OFFSET by the amount of space added
16796 to the stack. REAL_REGS should have the same number of bits set as
16797 MASK, and will be used instead (in the same order) to describe which
16798 registers were saved - this is used to mark the save slots when we
16799 push high registers after moving them to low registers. */
16801 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16802 unsigned long real_regs)
16805 int lo_mask = mask & 0xFF;
16806 int pushed_words = 0;
16810 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16812 /* Special case. Do not generate a POP PC statement here, do it in
16814 thumb_exit (f, -1);
16818 if (ARM_EABI_UNWIND_TABLES && push)
16820 fprintf (f, "\t.save\t{");
16821 for (regno = 0; regno < 15; regno++)
16823 if (real_regs & (1 << regno))
16825 if (real_regs & ((1 << regno) -1))
16827 asm_fprintf (f, "%r", regno);
16830 fprintf (f, "}\n");
16833 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16835 /* Look at the low registers first. */
16836 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16840 asm_fprintf (f, "%r", regno);
16842 if ((lo_mask & ~1) != 0)
16849 if (push && (mask & (1 << LR_REGNUM)))
16851 /* Catch pushing the LR. */
16855 asm_fprintf (f, "%r", LR_REGNUM);
16859 else if (!push && (mask & (1 << PC_REGNUM)))
16861 /* Catch popping the PC. */
16862 if (TARGET_INTERWORK || TARGET_BACKTRACE
16863 || crtl->calls_eh_return)
16865 /* The PC is never poped directly, instead
16866 it is popped into r3 and then BX is used. */
16867 fprintf (f, "}\n");
16869 thumb_exit (f, -1);
16878 asm_fprintf (f, "%r", PC_REGNUM);
16882 fprintf (f, "}\n");
16884 if (push && pushed_words && dwarf2out_do_frame ())
16886 char *l = dwarf2out_cfi_label ();
16887 int pushed_mask = real_regs;
16889 *cfa_offset += pushed_words * 4;
16890 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16893 pushed_mask = real_regs;
16894 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16896 if (pushed_mask & 1)
16897 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16902 /* Generate code to return from a thumb function.
16903 If 'reg_containing_return_addr' is -1, then the return address is
16904 actually on the stack, at the stack pointer. */
16906 thumb_exit (FILE *f, int reg_containing_return_addr)
16908 unsigned regs_available_for_popping;
16909 unsigned regs_to_pop;
16911 unsigned available;
16915 int restore_a4 = FALSE;
16917 /* Compute the registers we need to pop. */
16921 if (reg_containing_return_addr == -1)
16923 regs_to_pop |= 1 << LR_REGNUM;
16927 if (TARGET_BACKTRACE)
16929 /* Restore the (ARM) frame pointer and stack pointer. */
16930 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16934 /* If there is nothing to pop then just emit the BX instruction and
16936 if (pops_needed == 0)
16938 if (crtl->calls_eh_return)
16939 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16941 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16944 /* Otherwise if we are not supporting interworking and we have not created
16945 a backtrace structure and the function was not entered in ARM mode then
16946 just pop the return address straight into the PC. */
16947 else if (!TARGET_INTERWORK
16948 && !TARGET_BACKTRACE
16949 && !is_called_in_ARM_mode (current_function_decl)
16950 && !crtl->calls_eh_return)
16952 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16956 /* Find out how many of the (return) argument registers we can corrupt. */
16957 regs_available_for_popping = 0;
16959 /* If returning via __builtin_eh_return, the bottom three registers
16960 all contain information needed for the return. */
16961 if (crtl->calls_eh_return)
16965 /* If we can deduce the registers used from the function's
16966 return value. This is more reliable that examining
16967 df_regs_ever_live_p () because that will be set if the register is
16968 ever used in the function, not just if the register is used
16969 to hold a return value. */
16971 if (crtl->return_rtx != 0)
16972 mode = GET_MODE (crtl->return_rtx);
16974 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16976 size = GET_MODE_SIZE (mode);
16980 /* In a void function we can use any argument register.
16981 In a function that returns a structure on the stack
16982 we can use the second and third argument registers. */
16983 if (mode == VOIDmode)
16984 regs_available_for_popping =
16985 (1 << ARG_REGISTER (1))
16986 | (1 << ARG_REGISTER (2))
16987 | (1 << ARG_REGISTER (3));
16989 regs_available_for_popping =
16990 (1 << ARG_REGISTER (2))
16991 | (1 << ARG_REGISTER (3));
16993 else if (size <= 4)
16994 regs_available_for_popping =
16995 (1 << ARG_REGISTER (2))
16996 | (1 << ARG_REGISTER (3));
16997 else if (size <= 8)
16998 regs_available_for_popping =
16999 (1 << ARG_REGISTER (3));
17002 /* Match registers to be popped with registers into which we pop them. */
17003 for (available = regs_available_for_popping,
17004 required = regs_to_pop;
17005 required != 0 && available != 0;
17006 available &= ~(available & - available),
17007 required &= ~(required & - required))
17010 /* If we have any popping registers left over, remove them. */
17012 regs_available_for_popping &= ~available;
17014 /* Otherwise if we need another popping register we can use
17015 the fourth argument register. */
17016 else if (pops_needed)
17018 /* If we have not found any free argument registers and
17019 reg a4 contains the return address, we must move it. */
17020 if (regs_available_for_popping == 0
17021 && reg_containing_return_addr == LAST_ARG_REGNUM)
17023 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17024 reg_containing_return_addr = LR_REGNUM;
17026 else if (size > 12)
17028 /* Register a4 is being used to hold part of the return value,
17029 but we have dire need of a free, low register. */
17032 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17035 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17037 /* The fourth argument register is available. */
17038 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17044 /* Pop as many registers as we can. */
17045 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17046 regs_available_for_popping);
17048 /* Process the registers we popped. */
17049 if (reg_containing_return_addr == -1)
17051 /* The return address was popped into the lowest numbered register. */
17052 regs_to_pop &= ~(1 << LR_REGNUM);
17054 reg_containing_return_addr =
17055 number_of_first_bit_set (regs_available_for_popping);
17057 /* Remove this register for the mask of available registers, so that
17058 the return address will not be corrupted by further pops. */
17059 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17062 /* If we popped other registers then handle them here. */
17063 if (regs_available_for_popping)
17067 /* Work out which register currently contains the frame pointer. */
17068 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17070 /* Move it into the correct place. */
17071 asm_fprintf (f, "\tmov\t%r, %r\n",
17072 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17074 /* (Temporarily) remove it from the mask of popped registers. */
17075 regs_available_for_popping &= ~(1 << frame_pointer);
17076 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17078 if (regs_available_for_popping)
17082 /* We popped the stack pointer as well,
17083 find the register that contains it. */
17084 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17086 /* Move it into the stack register. */
17087 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17089 /* At this point we have popped all necessary registers, so
17090 do not worry about restoring regs_available_for_popping
17091 to its correct value:
17093 assert (pops_needed == 0)
17094 assert (regs_available_for_popping == (1 << frame_pointer))
17095 assert (regs_to_pop == (1 << STACK_POINTER)) */
17099 /* Since we have just move the popped value into the frame
17100 pointer, the popping register is available for reuse, and
17101 we know that we still have the stack pointer left to pop. */
17102 regs_available_for_popping |= (1 << frame_pointer);
17106 /* If we still have registers left on the stack, but we no longer have
17107 any registers into which we can pop them, then we must move the return
17108 address into the link register and make available the register that
17110 if (regs_available_for_popping == 0 && pops_needed > 0)
17112 regs_available_for_popping |= 1 << reg_containing_return_addr;
17114 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17115 reg_containing_return_addr);
17117 reg_containing_return_addr = LR_REGNUM;
17120 /* If we have registers left on the stack then pop some more.
17121 We know that at most we will want to pop FP and SP. */
17122 if (pops_needed > 0)
17127 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17128 regs_available_for_popping);
17130 /* We have popped either FP or SP.
17131 Move whichever one it is into the correct register. */
17132 popped_into = number_of_first_bit_set (regs_available_for_popping);
17133 move_to = number_of_first_bit_set (regs_to_pop);
17135 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17137 regs_to_pop &= ~(1 << move_to);
17142 /* If we still have not popped everything then we must have only
17143 had one register available to us and we are now popping the SP. */
17144 if (pops_needed > 0)
17148 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17149 regs_available_for_popping);
17151 popped_into = number_of_first_bit_set (regs_available_for_popping);
17153 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17155 assert (regs_to_pop == (1 << STACK_POINTER))
17156 assert (pops_needed == 1)
17160 /* If necessary restore the a4 register. */
17163 if (reg_containing_return_addr != LR_REGNUM)
17165 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17166 reg_containing_return_addr = LR_REGNUM;
17169 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17172 if (crtl->calls_eh_return)
17173 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17175 /* Return to caller. */
17176 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17181 thumb1_final_prescan_insn (rtx insn)
17183 if (flag_print_asm_name)
17184 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17185 INSN_ADDRESSES (INSN_UID (insn)));
17189 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17191 unsigned HOST_WIDE_INT mask = 0xff;
17194 if (val == 0) /* XXX */
17197 for (i = 0; i < 25; i++)
17198 if ((val & (mask << i)) == val)
17204 /* Returns nonzero if the current function contains,
17205 or might contain a far jump. */
17207 thumb_far_jump_used_p (void)
17211 /* This test is only important for leaf functions. */
17212 /* assert (!leaf_function_p ()); */
17214 /* If we have already decided that far jumps may be used,
17215 do not bother checking again, and always return true even if
17216 it turns out that they are not being used. Once we have made
17217 the decision that far jumps are present (and that hence the link
17218 register will be pushed onto the stack) we cannot go back on it. */
17219 if (cfun->machine->far_jump_used)
17222 /* If this function is not being called from the prologue/epilogue
17223 generation code then it must be being called from the
17224 INITIAL_ELIMINATION_OFFSET macro. */
17225 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17227 /* In this case we know that we are being asked about the elimination
17228 of the arg pointer register. If that register is not being used,
17229 then there are no arguments on the stack, and we do not have to
17230 worry that a far jump might force the prologue to push the link
17231 register, changing the stack offsets. In this case we can just
17232 return false, since the presence of far jumps in the function will
17233 not affect stack offsets.
17235 If the arg pointer is live (or if it was live, but has now been
17236 eliminated and so set to dead) then we do have to test to see if
17237 the function might contain a far jump. This test can lead to some
17238 false negatives, since before reload is completed, then length of
17239 branch instructions is not known, so gcc defaults to returning their
17240 longest length, which in turn sets the far jump attribute to true.
17242 A false negative will not result in bad code being generated, but it
17243 will result in a needless push and pop of the link register. We
17244 hope that this does not occur too often.
17246 If we need doubleword stack alignment this could affect the other
17247 elimination offsets so we can't risk getting it wrong. */
17248 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17249 cfun->machine->arg_pointer_live = 1;
17250 else if (!cfun->machine->arg_pointer_live)
17254 /* Check to see if the function contains a branch
17255 insn with the far jump attribute set. */
17256 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17258 if (GET_CODE (insn) == JUMP_INSN
17259 /* Ignore tablejump patterns. */
17260 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17261 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17262 && get_attr_far_jump (insn) == FAR_JUMP_YES
17265 /* Record the fact that we have decided that
17266 the function does use far jumps. */
17267 cfun->machine->far_jump_used = 1;
17275 /* Return nonzero if FUNC must be entered in ARM mode. */
17277 is_called_in_ARM_mode (tree func)
17279 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17281 /* Ignore the problem about functions whose address is taken. */
17282 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17286 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17292 /* The bits which aren't usefully expanded as rtl. */
17294 thumb_unexpanded_epilogue (void)
17296 arm_stack_offsets *offsets;
17298 unsigned long live_regs_mask = 0;
17299 int high_regs_pushed = 0;
17300 int had_to_push_lr;
17303 if (return_used_this_function)
17306 if (IS_NAKED (arm_current_func_type ()))
17309 offsets = arm_get_frame_offsets ();
17310 live_regs_mask = offsets->saved_regs_mask;
17311 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17313 /* If we can deduce the registers used from the function's return value.
17314 This is more reliable that examining df_regs_ever_live_p () because that
17315 will be set if the register is ever used in the function, not just if
17316 the register is used to hold a return value. */
17317 size = arm_size_return_regs ();
17319 /* The prolog may have pushed some high registers to use as
17320 work registers. e.g. the testsuite file:
17321 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17322 compiles to produce:
17323 push {r4, r5, r6, r7, lr}
17327 as part of the prolog. We have to undo that pushing here. */
17329 if (high_regs_pushed)
17331 unsigned long mask = live_regs_mask & 0xff;
17334 /* The available low registers depend on the size of the value we are
17342 /* Oh dear! We have no low registers into which we can pop
17345 ("no low registers available for popping high registers");
17347 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17348 if (live_regs_mask & (1 << next_hi_reg))
17351 while (high_regs_pushed)
17353 /* Find lo register(s) into which the high register(s) can
17355 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17357 if (mask & (1 << regno))
17358 high_regs_pushed--;
17359 if (high_regs_pushed == 0)
17363 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17365 /* Pop the values into the low register(s). */
17366 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17368 /* Move the value(s) into the high registers. */
17369 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17371 if (mask & (1 << regno))
17373 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17376 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17377 if (live_regs_mask & (1 << next_hi_reg))
17382 live_regs_mask &= ~0x0f00;
17385 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17386 live_regs_mask &= 0xff;
17388 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17390 /* Pop the return address into the PC. */
17391 if (had_to_push_lr)
17392 live_regs_mask |= 1 << PC_REGNUM;
17394 /* Either no argument registers were pushed or a backtrace
17395 structure was created which includes an adjusted stack
17396 pointer, so just pop everything. */
17397 if (live_regs_mask)
17398 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17401 /* We have either just popped the return address into the
17402 PC or it is was kept in LR for the entire function. */
17403 if (!had_to_push_lr)
17404 thumb_exit (asm_out_file, LR_REGNUM);
17408 /* Pop everything but the return address. */
17409 if (live_regs_mask)
17410 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17413 if (had_to_push_lr)
17417 /* We have no free low regs, so save one. */
17418 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17422 /* Get the return address into a temporary register. */
17423 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17424 1 << LAST_ARG_REGNUM);
17428 /* Move the return address to lr. */
17429 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17431 /* Restore the low register. */
17432 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17437 regno = LAST_ARG_REGNUM;
17442 /* Remove the argument registers that were pushed onto the stack. */
17443 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17444 SP_REGNUM, SP_REGNUM,
17445 crtl->args.pretend_args_size);
17447 thumb_exit (asm_out_file, regno);
17453 /* Functions to save and restore machine-specific function data. */
17454 static struct machine_function *
17455 arm_init_machine_status (void)
17457 struct machine_function *machine;
17458 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17460 #if ARM_FT_UNKNOWN != 0
17461 machine->func_type = ARM_FT_UNKNOWN;
17466 /* Return an RTX indicating where the return address to the
17467 calling function can be found. */
17469 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17474 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17477 /* Do anything needed before RTL is emitted for each function. */
17479 arm_init_expanders (void)
17481 /* Arrange to initialize and mark the machine per-function status. */
17482 init_machine_status = arm_init_machine_status;
17484 /* This is to stop the combine pass optimizing away the alignment
17485 adjustment of va_arg. */
17486 /* ??? It is claimed that this should not be necessary. */
17488 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17492 /* Like arm_compute_initial_elimination offset. Simpler because there
17493 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17494 to point at the base of the local variables after static stack
17495 space for a function has been allocated. */
17498 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17500 arm_stack_offsets *offsets;
17502 offsets = arm_get_frame_offsets ();
17506 case ARG_POINTER_REGNUM:
17509 case STACK_POINTER_REGNUM:
17510 return offsets->outgoing_args - offsets->saved_args;
17512 case FRAME_POINTER_REGNUM:
17513 return offsets->soft_frame - offsets->saved_args;
17515 case ARM_HARD_FRAME_POINTER_REGNUM:
17516 return offsets->saved_regs - offsets->saved_args;
17518 case THUMB_HARD_FRAME_POINTER_REGNUM:
17519 return offsets->locals_base - offsets->saved_args;
17522 gcc_unreachable ();
17526 case FRAME_POINTER_REGNUM:
17529 case STACK_POINTER_REGNUM:
17530 return offsets->outgoing_args - offsets->soft_frame;
17532 case ARM_HARD_FRAME_POINTER_REGNUM:
17533 return offsets->saved_regs - offsets->soft_frame;
17535 case THUMB_HARD_FRAME_POINTER_REGNUM:
17536 return offsets->locals_base - offsets->soft_frame;
17539 gcc_unreachable ();
17544 gcc_unreachable ();
17548 /* Generate the rest of a function's prologue. */
17550 thumb1_expand_prologue (void)
17554 HOST_WIDE_INT amount;
17555 arm_stack_offsets *offsets;
17556 unsigned long func_type;
17558 unsigned long live_regs_mask;
17560 func_type = arm_current_func_type ();
17562 /* Naked functions don't have prologues. */
17563 if (IS_NAKED (func_type))
17566 if (IS_INTERRUPT (func_type))
17568 error ("interrupt Service Routines cannot be coded in Thumb mode");
17572 offsets = arm_get_frame_offsets ();
17573 live_regs_mask = offsets->saved_regs_mask;
17574 /* Load the pic register before setting the frame pointer,
17575 so we can use r7 as a temporary work register. */
17576 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17577 arm_load_pic_register (live_regs_mask);
17579 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17580 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17581 stack_pointer_rtx);
17583 amount = offsets->outgoing_args - offsets->saved_regs;
17588 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17589 GEN_INT (- amount)));
17590 RTX_FRAME_RELATED_P (insn) = 1;
17596 /* The stack decrement is too big for an immediate value in a single
17597 insn. In theory we could issue multiple subtracts, but after
17598 three of them it becomes more space efficient to place the full
17599 value in the constant pool and load into a register. (Also the
17600 ARM debugger really likes to see only one stack decrement per
17601 function). So instead we look for a scratch register into which
17602 we can load the decrement, and then we subtract this from the
17603 stack pointer. Unfortunately on the thumb the only available
17604 scratch registers are the argument registers, and we cannot use
17605 these as they may hold arguments to the function. Instead we
17606 attempt to locate a call preserved register which is used by this
17607 function. If we can find one, then we know that it will have
17608 been pushed at the start of the prologue and so we can corrupt
17610 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17611 if (live_regs_mask & (1 << regno))
17614 gcc_assert(regno <= LAST_LO_REGNUM);
17616 reg = gen_rtx_REG (SImode, regno);
17618 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17620 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17621 stack_pointer_rtx, reg));
17622 RTX_FRAME_RELATED_P (insn) = 1;
17623 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17624 plus_constant (stack_pointer_rtx,
17626 RTX_FRAME_RELATED_P (dwarf) = 1;
17628 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
17633 if (frame_pointer_needed)
17634 thumb_set_frame_pointer (offsets);
17636 /* If we are profiling, make sure no instructions are scheduled before
17637 the call to mcount. Similarly if the user has requested no
17638 scheduling in the prolog. Similarly if we want non-call exceptions
17639 using the EABI unwinder, to prevent faulting instructions from being
17640 swapped with a stack adjustment. */
17641 if (crtl->profile || !TARGET_SCHED_PROLOG
17642 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17643 emit_insn (gen_blockage ());
17645 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17646 if (live_regs_mask & 0xff)
17647 cfun->machine->lr_save_eliminated = 0;
17652 thumb1_expand_epilogue (void)
17654 HOST_WIDE_INT amount;
17655 arm_stack_offsets *offsets;
17658 /* Naked functions don't have prologues. */
17659 if (IS_NAKED (arm_current_func_type ()))
17662 offsets = arm_get_frame_offsets ();
17663 amount = offsets->outgoing_args - offsets->saved_regs;
17665 if (frame_pointer_needed)
17667 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17668 amount = offsets->locals_base - offsets->saved_regs;
17671 gcc_assert (amount >= 0);
17675 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17676 GEN_INT (amount)));
17679 /* r3 is always free in the epilogue. */
17680 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17682 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17683 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17687 /* Emit a USE (stack_pointer_rtx), so that
17688 the stack adjustment will not be deleted. */
17689 emit_insn (gen_prologue_use (stack_pointer_rtx));
17691 if (crtl->profile || !TARGET_SCHED_PROLOG)
17692 emit_insn (gen_blockage ());
17694 /* Emit a clobber for each insn that will be restored in the epilogue,
17695 so that flow2 will get register lifetimes correct. */
17696 for (regno = 0; regno < 13; regno++)
17697 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17698 emit_clobber (gen_rtx_REG (SImode, regno));
17700 if (! df_regs_ever_live_p (LR_REGNUM))
17701 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17705 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17707 arm_stack_offsets *offsets;
17708 unsigned long live_regs_mask = 0;
17709 unsigned long l_mask;
17710 unsigned high_regs_pushed = 0;
17711 int cfa_offset = 0;
17714 if (IS_NAKED (arm_current_func_type ()))
17717 if (is_called_in_ARM_mode (current_function_decl))
17721 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17722 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17724 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17726 /* Generate code sequence to switch us into Thumb mode. */
17727 /* The .code 32 directive has already been emitted by
17728 ASM_DECLARE_FUNCTION_NAME. */
17729 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17730 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17732 /* Generate a label, so that the debugger will notice the
17733 change in instruction sets. This label is also used by
17734 the assembler to bypass the ARM code when this function
17735 is called from a Thumb encoded function elsewhere in the
17736 same file. Hence the definition of STUB_NAME here must
17737 agree with the definition in gas/config/tc-arm.c. */
17739 #define STUB_NAME ".real_start_of"
17741 fprintf (f, "\t.code\t16\n");
17743 if (arm_dllexport_name_p (name))
17744 name = arm_strip_name_encoding (name);
17746 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17747 fprintf (f, "\t.thumb_func\n");
17748 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17751 if (crtl->args.pretend_args_size)
17753 /* Output unwind directive for the stack adjustment. */
17754 if (ARM_EABI_UNWIND_TABLES)
17755 fprintf (f, "\t.pad #%d\n",
17756 crtl->args.pretend_args_size);
17758 if (cfun->machine->uses_anonymous_args)
17762 fprintf (f, "\tpush\t{");
17764 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17766 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17767 regno <= LAST_ARG_REGNUM;
17769 asm_fprintf (f, "%r%s", regno,
17770 regno == LAST_ARG_REGNUM ? "" : ", ");
17772 fprintf (f, "}\n");
17775 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17776 SP_REGNUM, SP_REGNUM,
17777 crtl->args.pretend_args_size);
17779 /* We don't need to record the stores for unwinding (would it
17780 help the debugger any if we did?), but record the change in
17781 the stack pointer. */
17782 if (dwarf2out_do_frame ())
17784 char *l = dwarf2out_cfi_label ();
17786 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17787 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17791 /* Get the registers we are going to push. */
17792 offsets = arm_get_frame_offsets ();
17793 live_regs_mask = offsets->saved_regs_mask;
17794 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17795 l_mask = live_regs_mask & 0x40ff;
17796 /* Then count how many other high registers will need to be pushed. */
17797 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17799 if (TARGET_BACKTRACE)
17802 unsigned work_register;
17804 /* We have been asked to create a stack backtrace structure.
17805 The code looks like this:
17809 0 sub SP, #16 Reserve space for 4 registers.
17810 2 push {R7} Push low registers.
17811 4 add R7, SP, #20 Get the stack pointer before the push.
17812 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17813 8 mov R7, PC Get hold of the start of this code plus 12.
17814 10 str R7, [SP, #16] Store it.
17815 12 mov R7, FP Get hold of the current frame pointer.
17816 14 str R7, [SP, #4] Store it.
17817 16 mov R7, LR Get hold of the current return address.
17818 18 str R7, [SP, #12] Store it.
17819 20 add R7, SP, #16 Point at the start of the backtrace structure.
17820 22 mov FP, R7 Put this value into the frame pointer. */
17822 work_register = thumb_find_work_register (live_regs_mask);
17824 if (ARM_EABI_UNWIND_TABLES)
17825 asm_fprintf (f, "\t.pad #16\n");
17828 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17829 SP_REGNUM, SP_REGNUM);
17831 if (dwarf2out_do_frame ())
17833 char *l = dwarf2out_cfi_label ();
17835 cfa_offset = cfa_offset + 16;
17836 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17841 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17842 offset = bit_count (l_mask) * UNITS_PER_WORD;
17847 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17848 offset + 16 + crtl->args.pretend_args_size);
17850 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17853 /* Make sure that the instruction fetching the PC is in the right place
17854 to calculate "start of backtrace creation code + 12". */
17857 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17858 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17860 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17861 ARM_HARD_FRAME_POINTER_REGNUM);
17862 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17867 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17868 ARM_HARD_FRAME_POINTER_REGNUM);
17869 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17871 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17872 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17876 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17877 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17879 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17881 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17882 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17884 /* Optimization: If we are not pushing any low registers but we are going
17885 to push some high registers then delay our first push. This will just
17886 be a push of LR and we can combine it with the push of the first high
17888 else if ((l_mask & 0xff) != 0
17889 || (high_regs_pushed == 0 && l_mask))
17890 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17892 if (high_regs_pushed)
17894 unsigned pushable_regs;
17895 unsigned next_hi_reg;
17897 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17898 if (live_regs_mask & (1 << next_hi_reg))
17901 pushable_regs = l_mask & 0xff;
17903 if (pushable_regs == 0)
17904 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17906 while (high_regs_pushed > 0)
17908 unsigned long real_regs_mask = 0;
17910 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17912 if (pushable_regs & (1 << regno))
17914 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17916 high_regs_pushed --;
17917 real_regs_mask |= (1 << next_hi_reg);
17919 if (high_regs_pushed)
17921 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17923 if (live_regs_mask & (1 << next_hi_reg))
17928 pushable_regs &= ~((1 << regno) - 1);
17934 /* If we had to find a work register and we have not yet
17935 saved the LR then add it to the list of regs to push. */
17936 if (l_mask == (1 << LR_REGNUM))
17938 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17940 real_regs_mask | (1 << LR_REGNUM));
17944 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17949 /* Handle the case of a double word load into a low register from
17950 a computed memory address. The computed address may involve a
17951 register which is overwritten by the load. */
17953 thumb_load_double_from_address (rtx *operands)
17961 gcc_assert (GET_CODE (operands[0]) == REG);
17962 gcc_assert (GET_CODE (operands[1]) == MEM);
17964 /* Get the memory address. */
17965 addr = XEXP (operands[1], 0);
17967 /* Work out how the memory address is computed. */
17968 switch (GET_CODE (addr))
17971 operands[2] = adjust_address (operands[1], SImode, 4);
17973 if (REGNO (operands[0]) == REGNO (addr))
17975 output_asm_insn ("ldr\t%H0, %2", operands);
17976 output_asm_insn ("ldr\t%0, %1", operands);
17980 output_asm_insn ("ldr\t%0, %1", operands);
17981 output_asm_insn ("ldr\t%H0, %2", operands);
17986 /* Compute <address> + 4 for the high order load. */
17987 operands[2] = adjust_address (operands[1], SImode, 4);
17989 output_asm_insn ("ldr\t%0, %1", operands);
17990 output_asm_insn ("ldr\t%H0, %2", operands);
17994 arg1 = XEXP (addr, 0);
17995 arg2 = XEXP (addr, 1);
17997 if (CONSTANT_P (arg1))
17998 base = arg2, offset = arg1;
18000 base = arg1, offset = arg2;
18002 gcc_assert (GET_CODE (base) == REG);
18004 /* Catch the case of <address> = <reg> + <reg> */
18005 if (GET_CODE (offset) == REG)
18007 int reg_offset = REGNO (offset);
18008 int reg_base = REGNO (base);
18009 int reg_dest = REGNO (operands[0]);
18011 /* Add the base and offset registers together into the
18012 higher destination register. */
18013 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18014 reg_dest + 1, reg_base, reg_offset);
18016 /* Load the lower destination register from the address in
18017 the higher destination register. */
18018 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18019 reg_dest, reg_dest + 1);
18021 /* Load the higher destination register from its own address
18023 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18024 reg_dest + 1, reg_dest + 1);
18028 /* Compute <address> + 4 for the high order load. */
18029 operands[2] = adjust_address (operands[1], SImode, 4);
18031 /* If the computed address is held in the low order register
18032 then load the high order register first, otherwise always
18033 load the low order register first. */
18034 if (REGNO (operands[0]) == REGNO (base))
18036 output_asm_insn ("ldr\t%H0, %2", operands);
18037 output_asm_insn ("ldr\t%0, %1", operands);
18041 output_asm_insn ("ldr\t%0, %1", operands);
18042 output_asm_insn ("ldr\t%H0, %2", operands);
18048 /* With no registers to worry about we can just load the value
18050 operands[2] = adjust_address (operands[1], SImode, 4);
18052 output_asm_insn ("ldr\t%H0, %2", operands);
18053 output_asm_insn ("ldr\t%0, %1", operands);
18057 gcc_unreachable ();
18064 thumb_output_move_mem_multiple (int n, rtx *operands)
18071 if (REGNO (operands[4]) > REGNO (operands[5]))
18074 operands[4] = operands[5];
18077 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18078 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18082 if (REGNO (operands[4]) > REGNO (operands[5]))
18085 operands[4] = operands[5];
18088 if (REGNO (operands[5]) > REGNO (operands[6]))
18091 operands[5] = operands[6];
18094 if (REGNO (operands[4]) > REGNO (operands[5]))
18097 operands[4] = operands[5];
18101 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18102 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18106 gcc_unreachable ();
18112 /* Output a call-via instruction for thumb state. */
18114 thumb_call_via_reg (rtx reg)
18116 int regno = REGNO (reg);
18119 gcc_assert (regno < LR_REGNUM);
18121 /* If we are in the normal text section we can use a single instance
18122 per compilation unit. If we are doing function sections, then we need
18123 an entry per section, since we can't rely on reachability. */
18124 if (in_section == text_section)
18126 thumb_call_reg_needed = 1;
18128 if (thumb_call_via_label[regno] == NULL)
18129 thumb_call_via_label[regno] = gen_label_rtx ();
18130 labelp = thumb_call_via_label + regno;
18134 if (cfun->machine->call_via[regno] == NULL)
18135 cfun->machine->call_via[regno] = gen_label_rtx ();
18136 labelp = cfun->machine->call_via + regno;
18139 output_asm_insn ("bl\t%a0", labelp);
18143 /* Routines for generating rtl. */
18145 thumb_expand_movmemqi (rtx *operands)
18147 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18148 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18149 HOST_WIDE_INT len = INTVAL (operands[2]);
18150 HOST_WIDE_INT offset = 0;
18154 emit_insn (gen_movmem12b (out, in, out, in));
18160 emit_insn (gen_movmem8b (out, in, out, in));
18166 rtx reg = gen_reg_rtx (SImode);
18167 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18168 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18175 rtx reg = gen_reg_rtx (HImode);
18176 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18177 plus_constant (in, offset))));
18178 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18186 rtx reg = gen_reg_rtx (QImode);
18187 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18188 plus_constant (in, offset))));
18189 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18195 thumb_reload_out_hi (rtx *operands)
18197 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18200 /* Handle reading a half-word from memory during reload. */
18202 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18204 gcc_unreachable ();
18207 /* Return the length of a function name prefix
18208 that starts with the character 'c'. */
18210 arm_get_strip_length (int c)
18214 ARM_NAME_ENCODING_LENGTHS
18219 /* Return a pointer to a function's name with any
18220 and all prefix encodings stripped from it. */
18222 arm_strip_name_encoding (const char *name)
18226 while ((skip = arm_get_strip_length (* name)))
18232 /* If there is a '*' anywhere in the name's prefix, then
18233 emit the stripped name verbatim, otherwise prepend an
18234 underscore if leading underscores are being used. */
18236 arm_asm_output_labelref (FILE *stream, const char *name)
18241 while ((skip = arm_get_strip_length (* name)))
18243 verbatim |= (*name == '*');
18248 fputs (name, stream);
18250 asm_fprintf (stream, "%U%s", name);
18254 arm_file_start (void)
18258 if (TARGET_UNIFIED_ASM)
18259 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18263 const char *fpu_name;
18264 if (arm_select[0].string)
18265 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18266 else if (arm_select[1].string)
18267 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18269 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18270 all_cores[arm_default_cpu].name);
18272 if (TARGET_SOFT_FLOAT)
18275 fpu_name = "softvfp";
18277 fpu_name = "softfpa";
18281 int set_float_abi_attributes = 0;
18282 switch (arm_fpu_arch)
18287 case FPUTYPE_FPA_EMU2:
18290 case FPUTYPE_FPA_EMU3:
18293 case FPUTYPE_MAVERICK:
18294 fpu_name = "maverick";
18298 set_float_abi_attributes = 1;
18300 case FPUTYPE_VFP3D16:
18301 fpu_name = "vfpv3-d16";
18302 set_float_abi_attributes = 1;
18305 fpu_name = "vfpv3";
18306 set_float_abi_attributes = 1;
18310 set_float_abi_attributes = 1;
18315 if (set_float_abi_attributes)
18317 if (TARGET_HARD_FLOAT)
18318 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18319 if (TARGET_HARD_FLOAT_ABI)
18320 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18323 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18325 /* Some of these attributes only apply when the corresponding features
18326 are used. However we don't have any easy way of figuring this out.
18327 Conservatively record the setting that would have been used. */
18329 /* Tag_ABI_FP_rounding. */
18330 if (flag_rounding_math)
18331 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18332 if (!flag_unsafe_math_optimizations)
18334 /* Tag_ABI_FP_denomal. */
18335 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18336 /* Tag_ABI_FP_exceptions. */
18337 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18339 /* Tag_ABI_FP_user_exceptions. */
18340 if (flag_signaling_nans)
18341 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18342 /* Tag_ABI_FP_number_model. */
18343 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18344 flag_finite_math_only ? 1 : 3);
18346 /* Tag_ABI_align8_needed. */
18347 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18348 /* Tag_ABI_align8_preserved. */
18349 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18350 /* Tag_ABI_enum_size. */
18351 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18352 flag_short_enums ? 1 : 2);
18354 /* Tag_ABI_optimization_goals. */
18357 else if (optimize >= 2)
18363 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18365 if (arm_lang_output_object_attributes_hook)
18366 arm_lang_output_object_attributes_hook();
18368 default_file_start();
18372 arm_file_end (void)
18376 if (NEED_INDICATE_EXEC_STACK)
18377 /* Add .note.GNU-stack. */
18378 file_end_indicate_exec_stack ();
18380 if (! thumb_call_reg_needed)
18383 switch_to_section (text_section);
18384 asm_fprintf (asm_out_file, "\t.code 16\n");
18385 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18387 for (regno = 0; regno < LR_REGNUM; regno++)
18389 rtx label = thumb_call_via_label[regno];
18393 targetm.asm_out.internal_label (asm_out_file, "L",
18394 CODE_LABEL_NUMBER (label));
18395 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18401 /* Symbols in the text segment can be accessed without indirecting via the
18402 constant pool; it may take an extra binary operation, but this is still
18403 faster than indirecting via memory. Don't do this when not optimizing,
18404 since we won't be calculating al of the offsets necessary to do this
18408 arm_encode_section_info (tree decl, rtx rtl, int first)
18410 if (optimize > 0 && TREE_CONSTANT (decl))
18411 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18413 default_encode_section_info (decl, rtl, first);
18415 #endif /* !ARM_PE */
18418 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18420 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18421 && !strcmp (prefix, "L"))
18423 arm_ccfsm_state = 0;
18424 arm_target_insn = NULL;
18426 default_internal_label (stream, prefix, labelno);
18429 /* Output code to add DELTA to the first argument, and then jump
18430 to FUNCTION. Used for C++ multiple inheritance. */
18432 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18433 HOST_WIDE_INT delta,
18434 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18437 static int thunk_label = 0;
18440 int mi_delta = delta;
18441 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18443 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18446 mi_delta = - mi_delta;
18450 int labelno = thunk_label++;
18451 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18452 /* Thunks are entered in arm mode when avaiable. */
18453 if (TARGET_THUMB1_ONLY)
18455 /* push r3 so we can use it as a temporary. */
18456 /* TODO: Omit this save if r3 is not used. */
18457 fputs ("\tpush {r3}\n", file);
18458 fputs ("\tldr\tr3, ", file);
18462 fputs ("\tldr\tr12, ", file);
18464 assemble_name (file, label);
18465 fputc ('\n', file);
18468 /* If we are generating PIC, the ldr instruction below loads
18469 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18470 the address of the add + 8, so we have:
18472 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18475 Note that we have "+ 1" because some versions of GNU ld
18476 don't set the low bit of the result for R_ARM_REL32
18477 relocations against thumb function symbols.
18478 On ARMv6M this is +4, not +8. */
18479 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18480 assemble_name (file, labelpc);
18481 fputs (":\n", file);
18482 if (TARGET_THUMB1_ONLY)
18484 /* This is 2 insns after the start of the thunk, so we know it
18485 is 4-byte aligned. */
18486 fputs ("\tadd\tr3, pc, r3\n", file);
18487 fputs ("\tmov r12, r3\n", file);
18490 fputs ("\tadd\tr12, pc, r12\n", file);
18492 else if (TARGET_THUMB1_ONLY)
18493 fputs ("\tmov r12, r3\n", file);
18495 if (TARGET_THUMB1_ONLY)
18497 if (mi_delta > 255)
18499 fputs ("\tldr\tr3, ", file);
18500 assemble_name (file, label);
18501 fputs ("+4\n", file);
18502 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18503 mi_op, this_regno, this_regno);
18505 else if (mi_delta != 0)
18507 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18508 mi_op, this_regno, this_regno,
18514 /* TODO: Use movw/movt for large constants when available. */
18515 while (mi_delta != 0)
18517 if ((mi_delta & (3 << shift)) == 0)
18521 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18522 mi_op, this_regno, this_regno,
18523 mi_delta & (0xff << shift));
18524 mi_delta &= ~(0xff << shift);
18531 if (TARGET_THUMB1_ONLY)
18532 fputs ("\tpop\t{r3}\n", file);
18534 fprintf (file, "\tbx\tr12\n");
18535 ASM_OUTPUT_ALIGN (file, 2);
18536 assemble_name (file, label);
18537 fputs (":\n", file);
18540 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18541 rtx tem = XEXP (DECL_RTL (function), 0);
18542 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18543 tem = gen_rtx_MINUS (GET_MODE (tem),
18545 gen_rtx_SYMBOL_REF (Pmode,
18546 ggc_strdup (labelpc)));
18547 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18550 /* Output ".word .LTHUNKn". */
18551 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18553 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18554 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18558 fputs ("\tb\t", file);
18559 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18560 if (NEED_PLT_RELOC)
18561 fputs ("(PLT)", file);
18562 fputc ('\n', file);
18567 arm_emit_vector_const (FILE *file, rtx x)
18570 const char * pattern;
18572 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18574 switch (GET_MODE (x))
18576 case V2SImode: pattern = "%08x"; break;
18577 case V4HImode: pattern = "%04x"; break;
18578 case V8QImode: pattern = "%02x"; break;
18579 default: gcc_unreachable ();
18582 fprintf (file, "0x");
18583 for (i = CONST_VECTOR_NUNITS (x); i--;)
18587 element = CONST_VECTOR_ELT (x, i);
18588 fprintf (file, pattern, INTVAL (element));
18595 arm_output_load_gr (rtx *operands)
18602 if (GET_CODE (operands [1]) != MEM
18603 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18604 || GET_CODE (reg = XEXP (sum, 0)) != REG
18605 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18606 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18607 return "wldrw%?\t%0, %1";
18609 /* Fix up an out-of-range load of a GR register. */
18610 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18611 wcgr = operands[0];
18613 output_asm_insn ("ldr%?\t%0, %1", operands);
18615 operands[0] = wcgr;
18617 output_asm_insn ("tmcr%?\t%0, %1", operands);
18618 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18623 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18625 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18626 named arg and all anonymous args onto the stack.
18627 XXX I know the prologue shouldn't be pushing registers, but it is faster
18631 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18632 enum machine_mode mode,
18635 int second_time ATTRIBUTE_UNUSED)
18637 int nregs = cum->nregs;
18639 && ARM_DOUBLEWORD_ALIGN
18640 && arm_needs_doubleword_align (mode, type))
18643 cfun->machine->uses_anonymous_args = 1;
18644 if (nregs < NUM_ARG_REGS)
18645 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18648 /* Return nonzero if the CONSUMER instruction (a store) does not need
18649 PRODUCER's value to calculate the address. */
18652 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18654 rtx value = PATTERN (producer);
18655 rtx addr = PATTERN (consumer);
18657 if (GET_CODE (value) == COND_EXEC)
18658 value = COND_EXEC_CODE (value);
18659 if (GET_CODE (value) == PARALLEL)
18660 value = XVECEXP (value, 0, 0);
18661 value = XEXP (value, 0);
18662 if (GET_CODE (addr) == COND_EXEC)
18663 addr = COND_EXEC_CODE (addr);
18664 if (GET_CODE (addr) == PARALLEL)
18665 addr = XVECEXP (addr, 0, 0);
18666 addr = XEXP (addr, 0);
18668 return !reg_overlap_mentioned_p (value, addr);
18671 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18672 have an early register shift value or amount dependency on the
18673 result of PRODUCER. */
18676 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18678 rtx value = PATTERN (producer);
18679 rtx op = PATTERN (consumer);
18682 if (GET_CODE (value) == COND_EXEC)
18683 value = COND_EXEC_CODE (value);
18684 if (GET_CODE (value) == PARALLEL)
18685 value = XVECEXP (value, 0, 0);
18686 value = XEXP (value, 0);
18687 if (GET_CODE (op) == COND_EXEC)
18688 op = COND_EXEC_CODE (op);
18689 if (GET_CODE (op) == PARALLEL)
18690 op = XVECEXP (op, 0, 0);
18693 early_op = XEXP (op, 0);
18694 /* This is either an actual independent shift, or a shift applied to
18695 the first operand of another operation. We want the whole shift
18697 if (GET_CODE (early_op) == REG)
18700 return !reg_overlap_mentioned_p (value, early_op);
18703 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18704 have an early register shift value dependency on the result of
18708 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18710 rtx value = PATTERN (producer);
18711 rtx op = PATTERN (consumer);
18714 if (GET_CODE (value) == COND_EXEC)
18715 value = COND_EXEC_CODE (value);
18716 if (GET_CODE (value) == PARALLEL)
18717 value = XVECEXP (value, 0, 0);
18718 value = XEXP (value, 0);
18719 if (GET_CODE (op) == COND_EXEC)
18720 op = COND_EXEC_CODE (op);
18721 if (GET_CODE (op) == PARALLEL)
18722 op = XVECEXP (op, 0, 0);
18725 early_op = XEXP (op, 0);
18727 /* This is either an actual independent shift, or a shift applied to
18728 the first operand of another operation. We want the value being
18729 shifted, in either case. */
18730 if (GET_CODE (early_op) != REG)
18731 early_op = XEXP (early_op, 0);
18733 return !reg_overlap_mentioned_p (value, early_op);
18736 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18737 have an early register mult dependency on the result of
18741 arm_no_early_mul_dep (rtx producer, rtx consumer)
18743 rtx value = PATTERN (producer);
18744 rtx op = PATTERN (consumer);
18746 if (GET_CODE (value) == COND_EXEC)
18747 value = COND_EXEC_CODE (value);
18748 if (GET_CODE (value) == PARALLEL)
18749 value = XVECEXP (value, 0, 0);
18750 value = XEXP (value, 0);
18751 if (GET_CODE (op) == COND_EXEC)
18752 op = COND_EXEC_CODE (op);
18753 if (GET_CODE (op) == PARALLEL)
18754 op = XVECEXP (op, 0, 0);
18757 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
18759 if (GET_CODE (XEXP (op, 0)) == MULT)
18760 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
18762 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
18768 /* We can't rely on the caller doing the proper promotion when
18769 using APCS or ATPCS. */
18772 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18774 return !TARGET_AAPCS_BASED;
18778 /* AAPCS based ABIs use short enums by default. */
18781 arm_default_short_enums (void)
18783 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18787 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18790 arm_align_anon_bitfield (void)
18792 return TARGET_AAPCS_BASED;
18796 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18799 arm_cxx_guard_type (void)
18801 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18804 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18805 has an accumulator dependency on the result of the producer (a
18806 multiplication instruction) and no other dependency on that result. */
18808 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18810 rtx mul = PATTERN (producer);
18811 rtx mac = PATTERN (consumer);
18813 rtx mac_op0, mac_op1, mac_acc;
18815 if (GET_CODE (mul) == COND_EXEC)
18816 mul = COND_EXEC_CODE (mul);
18817 if (GET_CODE (mac) == COND_EXEC)
18818 mac = COND_EXEC_CODE (mac);
18820 /* Check that mul is of the form (set (...) (mult ...))
18821 and mla is of the form (set (...) (plus (mult ...) (...))). */
18822 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18823 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18824 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18827 mul_result = XEXP (mul, 0);
18828 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18829 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18830 mac_acc = XEXP (XEXP (mac, 1), 1);
18832 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18833 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18834 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18838 /* The EABI says test the least significant bit of a guard variable. */
18841 arm_cxx_guard_mask_bit (void)
18843 return TARGET_AAPCS_BASED;
18847 /* The EABI specifies that all array cookies are 8 bytes long. */
18850 arm_get_cookie_size (tree type)
18854 if (!TARGET_AAPCS_BASED)
18855 return default_cxx_get_cookie_size (type);
18857 size = build_int_cst (sizetype, 8);
18862 /* The EABI says that array cookies should also contain the element size. */
18865 arm_cookie_has_size (void)
18867 return TARGET_AAPCS_BASED;
18871 /* The EABI says constructors and destructors should return a pointer to
18872 the object constructed/destroyed. */
18875 arm_cxx_cdtor_returns_this (void)
18877 return TARGET_AAPCS_BASED;
18880 /* The EABI says that an inline function may never be the key
18884 arm_cxx_key_method_may_be_inline (void)
18886 return !TARGET_AAPCS_BASED;
18890 arm_cxx_determine_class_data_visibility (tree decl)
18892 if (!TARGET_AAPCS_BASED
18893 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
18896 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18897 is exported. However, on systems without dynamic vague linkage,
18898 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18899 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18900 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18902 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18903 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18907 arm_cxx_class_data_always_comdat (void)
18909 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18910 vague linkage if the class has no key function. */
18911 return !TARGET_AAPCS_BASED;
18915 /* The EABI says __aeabi_atexit should be used to register static
18919 arm_cxx_use_aeabi_atexit (void)
18921 return TARGET_AAPCS_BASED;
18926 arm_set_return_address (rtx source, rtx scratch)
18928 arm_stack_offsets *offsets;
18929 HOST_WIDE_INT delta;
18931 unsigned long saved_regs;
18933 offsets = arm_get_frame_offsets ();
18934 saved_regs = offsets->saved_regs_mask;
18936 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18937 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18940 if (frame_pointer_needed)
18941 addr = plus_constant(hard_frame_pointer_rtx, -4);
18944 /* LR will be the first saved register. */
18945 delta = offsets->outgoing_args - (offsets->frame + 4);
18950 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18951 GEN_INT (delta & ~4095)));
18956 addr = stack_pointer_rtx;
18958 addr = plus_constant (addr, delta);
18960 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18966 thumb_set_return_address (rtx source, rtx scratch)
18968 arm_stack_offsets *offsets;
18969 HOST_WIDE_INT delta;
18970 HOST_WIDE_INT limit;
18973 unsigned long mask;
18977 offsets = arm_get_frame_offsets ();
18978 mask = offsets->saved_regs_mask;
18979 if (mask & (1 << LR_REGNUM))
18982 /* Find the saved regs. */
18983 if (frame_pointer_needed)
18985 delta = offsets->soft_frame - offsets->saved_args;
18986 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18992 delta = offsets->outgoing_args - offsets->saved_args;
18995 /* Allow for the stack frame. */
18996 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18998 /* The link register is always the first saved register. */
19001 /* Construct the address. */
19002 addr = gen_rtx_REG (SImode, reg);
19005 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19006 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19010 addr = plus_constant (addr, delta);
19012 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19015 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19018 /* Implements target hook vector_mode_supported_p. */
19020 arm_vector_mode_supported_p (enum machine_mode mode)
19022 /* Neon also supports V2SImode, etc. listed in the clause below. */
19023 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19024 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19027 if ((mode == V2SImode)
19028 || (mode == V4HImode)
19029 || (mode == V8QImode))
19035 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19036 ARM insns and therefore guarantee that the shift count is modulo 256.
19037 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19038 guarantee no particular behavior for out-of-range counts. */
19040 static unsigned HOST_WIDE_INT
19041 arm_shift_truncation_mask (enum machine_mode mode)
19043 return mode == SImode ? 255 : 0;
19047 /* Map internal gcc register numbers to DWARF2 register numbers. */
19050 arm_dbx_register_number (unsigned int regno)
19055 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19056 compatibility. The EABI defines them as registers 96-103. */
19057 if (IS_FPA_REGNUM (regno))
19058 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19060 /* FIXME: VFPv3 register numbering. */
19061 if (IS_VFP_REGNUM (regno))
19062 return 64 + regno - FIRST_VFP_REGNUM;
19064 if (IS_IWMMXT_GR_REGNUM (regno))
19065 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19067 if (IS_IWMMXT_REGNUM (regno))
19068 return 112 + regno - FIRST_IWMMXT_REGNUM;
19070 gcc_unreachable ();
19074 #ifdef TARGET_UNWIND_INFO
19075 /* Emit unwind directives for a store-multiple instruction or stack pointer
19076 push during alignment.
19077 These should only ever be generated by the function prologue code, so
19078 expect them to have a particular form. */
19081 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19084 HOST_WIDE_INT offset;
19085 HOST_WIDE_INT nregs;
19091 e = XVECEXP (p, 0, 0);
19092 if (GET_CODE (e) != SET)
19095 /* First insn will adjust the stack pointer. */
19096 if (GET_CODE (e) != SET
19097 || GET_CODE (XEXP (e, 0)) != REG
19098 || REGNO (XEXP (e, 0)) != SP_REGNUM
19099 || GET_CODE (XEXP (e, 1)) != PLUS)
19102 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19103 nregs = XVECLEN (p, 0) - 1;
19105 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19108 /* The function prologue may also push pc, but not annotate it as it is
19109 never restored. We turn this into a stack pointer adjustment. */
19110 if (nregs * 4 == offset - 4)
19112 fprintf (asm_out_file, "\t.pad #4\n");
19116 fprintf (asm_out_file, "\t.save {");
19118 else if (IS_VFP_REGNUM (reg))
19121 fprintf (asm_out_file, "\t.vsave {");
19123 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19125 /* FPA registers are done differently. */
19126 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19130 /* Unknown register type. */
19133 /* If the stack increment doesn't match the size of the saved registers,
19134 something has gone horribly wrong. */
19135 if (offset != nregs * reg_size)
19140 /* The remaining insns will describe the stores. */
19141 for (i = 1; i <= nregs; i++)
19143 /* Expect (set (mem <addr>) (reg)).
19144 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19145 e = XVECEXP (p, 0, i);
19146 if (GET_CODE (e) != SET
19147 || GET_CODE (XEXP (e, 0)) != MEM
19148 || GET_CODE (XEXP (e, 1)) != REG)
19151 reg = REGNO (XEXP (e, 1));
19156 fprintf (asm_out_file, ", ");
19157 /* We can't use %r for vfp because we need to use the
19158 double precision register names. */
19159 if (IS_VFP_REGNUM (reg))
19160 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19162 asm_fprintf (asm_out_file, "%r", reg);
19164 #ifdef ENABLE_CHECKING
19165 /* Check that the addresses are consecutive. */
19166 e = XEXP (XEXP (e, 0), 0);
19167 if (GET_CODE (e) == PLUS)
19169 offset += reg_size;
19170 if (GET_CODE (XEXP (e, 0)) != REG
19171 || REGNO (XEXP (e, 0)) != SP_REGNUM
19172 || GET_CODE (XEXP (e, 1)) != CONST_INT
19173 || offset != INTVAL (XEXP (e, 1)))
19177 || GET_CODE (e) != REG
19178 || REGNO (e) != SP_REGNUM)
19182 fprintf (asm_out_file, "}\n");
19185 /* Emit unwind directives for a SET. */
19188 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19196 switch (GET_CODE (e0))
19199 /* Pushing a single register. */
19200 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19201 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19202 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19205 asm_fprintf (asm_out_file, "\t.save ");
19206 if (IS_VFP_REGNUM (REGNO (e1)))
19207 asm_fprintf(asm_out_file, "{d%d}\n",
19208 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19210 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19214 if (REGNO (e0) == SP_REGNUM)
19216 /* A stack increment. */
19217 if (GET_CODE (e1) != PLUS
19218 || GET_CODE (XEXP (e1, 0)) != REG
19219 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19220 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19223 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19224 -INTVAL (XEXP (e1, 1)));
19226 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19228 HOST_WIDE_INT offset;
19230 if (GET_CODE (e1) == PLUS)
19232 if (GET_CODE (XEXP (e1, 0)) != REG
19233 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19235 reg = REGNO (XEXP (e1, 0));
19236 offset = INTVAL (XEXP (e1, 1));
19237 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19238 HARD_FRAME_POINTER_REGNUM, reg,
19239 INTVAL (XEXP (e1, 1)));
19241 else if (GET_CODE (e1) == REG)
19244 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19245 HARD_FRAME_POINTER_REGNUM, reg);
19250 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19252 /* Move from sp to reg. */
19253 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19255 else if (GET_CODE (e1) == PLUS
19256 && GET_CODE (XEXP (e1, 0)) == REG
19257 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19258 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19260 /* Set reg to offset from sp. */
19261 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19262 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19264 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19266 /* Stack pointer save before alignment. */
19268 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19281 /* Emit unwind directives for the given insn. */
19284 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19288 if (!ARM_EABI_UNWIND_TABLES)
19291 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19292 && (TREE_NOTHROW (current_function_decl)
19293 || crtl->all_throwers_are_sibcalls))
19296 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19299 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19301 pat = XEXP (pat, 0);
19303 pat = PATTERN (insn);
19305 switch (GET_CODE (pat))
19308 arm_unwind_emit_set (asm_out_file, pat);
19312 /* Store multiple. */
19313 arm_unwind_emit_sequence (asm_out_file, pat);
19322 /* Output a reference from a function exception table to the type_info
19323 object X. The EABI specifies that the symbol should be relocated by
19324 an R_ARM_TARGET2 relocation. */
19327 arm_output_ttype (rtx x)
19329 fputs ("\t.word\t", asm_out_file);
19330 output_addr_const (asm_out_file, x);
19331 /* Use special relocations for symbol references. */
19332 if (GET_CODE (x) != CONST_INT)
19333 fputs ("(TARGET2)", asm_out_file);
19334 fputc ('\n', asm_out_file);
19338 #endif /* TARGET_UNWIND_INFO */
19341 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19342 stack alignment. */
19345 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19347 rtx unspec = SET_SRC (pattern);
19348 gcc_assert (GET_CODE (unspec) == UNSPEC);
19352 case UNSPEC_STACK_ALIGN:
19353 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19354 put anything on the stack, so hopefully it won't matter.
19355 CFA = SP will be correct after alignment. */
19356 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19357 SET_DEST (pattern));
19360 gcc_unreachable ();
19365 /* Output unwind directives for the start/end of a function. */
19368 arm_output_fn_unwind (FILE * f, bool prologue)
19370 if (!ARM_EABI_UNWIND_TABLES)
19374 fputs ("\t.fnstart\n", f);
19377 /* If this function will never be unwound, then mark it as such.
19378 The came condition is used in arm_unwind_emit to suppress
19379 the frame annotations. */
19380 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19381 && (TREE_NOTHROW (current_function_decl)
19382 || crtl->all_throwers_are_sibcalls))
19383 fputs("\t.cantunwind\n", f);
19385 fputs ("\t.fnend\n", f);
19390 arm_emit_tls_decoration (FILE *fp, rtx x)
19392 enum tls_reloc reloc;
19395 val = XVECEXP (x, 0, 0);
19396 reloc = INTVAL (XVECEXP (x, 0, 1));
19398 output_addr_const (fp, val);
19403 fputs ("(tlsgd)", fp);
19406 fputs ("(tlsldm)", fp);
19409 fputs ("(tlsldo)", fp);
19412 fputs ("(gottpoff)", fp);
19415 fputs ("(tpoff)", fp);
19418 gcc_unreachable ();
19426 fputs (" + (. - ", fp);
19427 output_addr_const (fp, XVECEXP (x, 0, 2));
19429 output_addr_const (fp, XVECEXP (x, 0, 3));
19439 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19442 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19444 gcc_assert (size == 4);
19445 fputs ("\t.word\t", file);
19446 output_addr_const (file, x);
19447 fputs ("(tlsldo)", file);
19451 arm_output_addr_const_extra (FILE *fp, rtx x)
19453 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19454 return arm_emit_tls_decoration (fp, x);
19455 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19458 int labelno = INTVAL (XVECEXP (x, 0, 0));
19460 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19461 assemble_name_raw (fp, label);
19465 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19467 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19471 output_addr_const (fp, XVECEXP (x, 0, 0));
19475 else if (GET_CODE (x) == CONST_VECTOR)
19476 return arm_emit_vector_const (fp, x);
19481 /* Output assembly for a shift instruction.
19482 SET_FLAGS determines how the instruction modifies the condition codes.
19483 0 - Do not set condition codes.
19484 1 - Set condition codes.
19485 2 - Use smallest instruction. */
19487 arm_output_shift(rtx * operands, int set_flags)
19490 static const char flag_chars[3] = {'?', '.', '!'};
19495 c = flag_chars[set_flags];
19496 if (TARGET_UNIFIED_ASM)
19498 shift = shift_op(operands[3], &val);
19502 operands[2] = GEN_INT(val);
19503 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19506 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19509 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19510 output_asm_insn (pattern, operands);
19514 /* Output a Thumb-2 casesi instruction. */
19516 thumb2_output_casesi (rtx *operands)
19518 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19520 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19522 output_asm_insn ("cmp\t%0, %1", operands);
19523 output_asm_insn ("bhi\t%l3", operands);
19524 switch (GET_MODE(diff_vec))
19527 return "tbb\t[%|pc, %0]";
19529 return "tbh\t[%|pc, %0, lsl #1]";
19533 output_asm_insn ("adr\t%4, %l2", operands);
19534 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19535 output_asm_insn ("add\t%4, %4, %5", operands);
19540 output_asm_insn ("adr\t%4, %l2", operands);
19541 return "ldr\t%|pc, [%4, %0, lsl #2]";
19544 gcc_unreachable ();
19548 /* Most ARM cores are single issue, but some newer ones can dual issue.
19549 The scheduler descriptions rely on this being correct. */
19551 arm_issue_rate (void)
19566 /* A table and a function to perform ARM-specific name mangling for
19567 NEON vector types in order to conform to the AAPCS (see "Procedure
19568 Call Standard for the ARM Architecture", Appendix A). To qualify
19569 for emission with the mangled names defined in that document, a
19570 vector type must not only be of the correct mode but also be
19571 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19574 enum machine_mode mode;
19575 const char *element_type_name;
19576 const char *aapcs_name;
19577 } arm_mangle_map_entry;
19579 static arm_mangle_map_entry arm_mangle_map[] = {
19580 /* 64-bit containerized types. */
19581 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19582 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19583 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19584 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19585 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19586 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19587 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19588 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19589 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19590 /* 128-bit containerized types. */
19591 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19592 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19593 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19594 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19595 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19596 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19597 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19598 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19599 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19600 { VOIDmode, NULL, NULL }
19604 arm_mangle_type (const_tree type)
19606 arm_mangle_map_entry *pos = arm_mangle_map;
19608 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19609 has to be managled as if it is in the "std" namespace. */
19610 if (TARGET_AAPCS_BASED
19611 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
19613 static bool warned;
19614 if (!warned && warn_psabi)
19617 inform (input_location,
19618 "the mangling of %<va_list%> has changed in GCC 4.4");
19620 return "St9__va_list";
19623 if (TREE_CODE (type) != VECTOR_TYPE)
19626 /* Check the mode of the vector type, and the name of the vector
19627 element type, against the table. */
19628 while (pos->mode != VOIDmode)
19630 tree elt_type = TREE_TYPE (type);
19632 if (pos->mode == TYPE_MODE (type)
19633 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19634 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19635 pos->element_type_name))
19636 return pos->aapcs_name;
19641 /* Use the default mangling for unrecognized (possibly user-defined)
19646 /* Order of allocation of core registers for Thumb: this allocation is
19647 written over the corresponding initial entries of the array
19648 initialized with REG_ALLOC_ORDER. We allocate all low registers
19649 first. Saving and restoring a low register is usually cheaper than
19650 using a call-clobbered high register. */
19652 static const int thumb_core_reg_alloc_order[] =
19654 3, 2, 1, 0, 4, 5, 6, 7,
19655 14, 12, 8, 9, 10, 11, 13, 15
19658 /* Adjust register allocation order when compiling for Thumb. */
19661 arm_order_regs_for_local_alloc (void)
19663 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19664 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19666 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
19667 sizeof (thumb_core_reg_alloc_order));
19670 /* Set default optimization options. */
19672 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
19674 /* Enable section anchors by default at -O1 or higher.
19675 Use 2 to distinguish from an explicit -fsection-anchors
19676 given on the command line. */
19678 flag_section_anchors = 2;
19681 #include "gt-arm.h"