1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
48 #include "integrate.h"
51 #include "target-def.h"
53 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static int arm_compute_static_chain_stack_bytes (void);
66 static arm_stack_offsets *arm_get_frame_offsets (void);
67 static void arm_add_gc_roots (void);
68 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
69 HOST_WIDE_INT, rtx, rtx, int, int);
70 static unsigned bit_count (unsigned long);
71 static int arm_address_register_rtx_p (rtx, int);
72 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
73 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
74 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
75 inline static int thumb1_index_register_rtx_p (rtx, int);
76 static int thumb_far_jump_used_p (void);
77 static bool thumb_force_lr_save (void);
78 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
79 static rtx emit_sfm (int, int);
80 static unsigned arm_size_return_regs (void);
81 static bool arm_assemble_integer (rtx, unsigned int, int);
82 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
83 static arm_cc get_arm_condition_code (rtx);
84 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
85 static rtx is_jump_table (rtx);
86 static const char *output_multi_immediate (rtx *, const char *, const char *,
88 static const char *shift_op (rtx, HOST_WIDE_INT *);
89 static struct machine_function *arm_init_machine_status (void);
90 static void thumb_exit (FILE *, int);
91 static rtx is_jump_table (rtx);
92 static HOST_WIDE_INT get_jump_table_size (rtx);
93 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_forward_ref (Mfix *);
95 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
96 static Mnode *add_minipool_backward_ref (Mfix *);
97 static void assign_minipool_offsets (Mfix *);
98 static void arm_print_value (FILE *, rtx);
99 static void dump_minipool (rtx);
100 static int arm_barrier_cost (rtx);
101 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
102 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
103 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
105 static void arm_reorg (void);
106 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
107 static unsigned long arm_compute_save_reg0_reg12_mask (void);
108 static unsigned long arm_compute_save_reg_mask (void);
109 static unsigned long arm_isr_value (tree);
110 static unsigned long arm_compute_func_type (void);
111 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
112 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
113 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
114 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
116 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static int arm_comp_type_attributes (const_tree, const_tree);
120 static void arm_set_default_type_attributes (tree);
121 static int arm_adjust_cost (rtx, rtx, rtx, int);
122 static int count_insns_for_constant (HOST_WIDE_INT, int);
123 static int arm_get_strip_length (int);
124 static bool arm_function_ok_for_sibcall (tree, tree);
125 static void arm_internal_label (FILE *, const char *, unsigned long);
126 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
128 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
129 static bool arm_size_rtx_costs (rtx, int, int, int *);
130 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
133 static bool arm_9e_rtx_costs (rtx, int, int, int *);
134 static int arm_address_cost (rtx);
135 static bool arm_memory_load_p (rtx);
136 static bool arm_cirrus_insn_p (rtx);
137 static void cirrus_reorg (rtx);
138 static void arm_init_builtins (void);
139 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
140 static void arm_init_iwmmxt_builtins (void);
141 static rtx safe_vector_operand (rtx, enum machine_mode);
142 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
143 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
144 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
145 static void emit_constant_insn (rtx cond, rtx pattern);
146 static rtx emit_set_insn (rtx, rtx);
147 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
150 #ifdef OBJECT_FORMAT_ELF
151 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
152 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
155 static void arm_encode_section_info (tree, rtx, int);
158 static void arm_file_end (void);
159 static void arm_file_start (void);
161 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
163 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
164 enum machine_mode, const_tree, bool);
165 static bool arm_promote_prototypes (const_tree);
166 static bool arm_default_short_enums (void);
167 static bool arm_align_anon_bitfield (void);
168 static bool arm_return_in_msb (const_tree);
169 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
170 static bool arm_return_in_memory (const_tree, const_tree);
171 #ifdef TARGET_UNWIND_INFO
172 static void arm_unwind_emit (FILE *, rtx);
173 static bool arm_output_ttype (rtx);
175 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
177 static tree arm_cxx_guard_type (void);
178 static bool arm_cxx_guard_mask_bit (void);
179 static tree arm_get_cookie_size (tree);
180 static bool arm_cookie_has_size (void);
181 static bool arm_cxx_cdtor_returns_this (void);
182 static bool arm_cxx_key_method_may_be_inline (void);
183 static void arm_cxx_determine_class_data_visibility (tree);
184 static bool arm_cxx_class_data_always_comdat (void);
185 static bool arm_cxx_use_aeabi_atexit (void);
186 static void arm_init_libfuncs (void);
187 static bool arm_handle_option (size_t, const char *, int);
188 static void arm_target_help (void);
189 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
190 static bool arm_cannot_copy_insn_p (rtx);
191 static bool arm_tls_symbol_p (rtx x);
192 static int arm_issue_rate (void);
193 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
194 static bool arm_allocate_stack_slots_for_args (void);
197 /* Initialize the GCC target structure. */
198 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
199 #undef TARGET_MERGE_DECL_ATTRIBUTES
200 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
203 #undef TARGET_ATTRIBUTE_TABLE
204 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
206 #undef TARGET_ASM_FILE_START
207 #define TARGET_ASM_FILE_START arm_file_start
208 #undef TARGET_ASM_FILE_END
209 #define TARGET_ASM_FILE_END arm_file_end
211 #undef TARGET_ASM_ALIGNED_SI_OP
212 #define TARGET_ASM_ALIGNED_SI_OP NULL
213 #undef TARGET_ASM_INTEGER
214 #define TARGET_ASM_INTEGER arm_assemble_integer
216 #undef TARGET_ASM_FUNCTION_PROLOGUE
217 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
219 #undef TARGET_ASM_FUNCTION_EPILOGUE
220 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
222 #undef TARGET_DEFAULT_TARGET_FLAGS
223 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
224 #undef TARGET_HANDLE_OPTION
225 #define TARGET_HANDLE_OPTION arm_handle_option
227 #define TARGET_HELP arm_target_help
229 #undef TARGET_COMP_TYPE_ATTRIBUTES
230 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
232 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
233 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
235 #undef TARGET_SCHED_ADJUST_COST
236 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
238 #undef TARGET_ENCODE_SECTION_INFO
240 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
242 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
245 #undef TARGET_STRIP_NAME_ENCODING
246 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
248 #undef TARGET_ASM_INTERNAL_LABEL
249 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
251 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
252 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
254 #undef TARGET_ASM_OUTPUT_MI_THUNK
255 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
256 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
257 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
259 /* This will be overridden in arm_override_options. */
260 #undef TARGET_RTX_COSTS
261 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
262 #undef TARGET_ADDRESS_COST
263 #define TARGET_ADDRESS_COST arm_address_cost
265 #undef TARGET_SHIFT_TRUNCATION_MASK
266 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
267 #undef TARGET_VECTOR_MODE_SUPPORTED_P
268 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
270 #undef TARGET_MACHINE_DEPENDENT_REORG
271 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
273 #undef TARGET_INIT_BUILTINS
274 #define TARGET_INIT_BUILTINS arm_init_builtins
275 #undef TARGET_EXPAND_BUILTIN
276 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
278 #undef TARGET_INIT_LIBFUNCS
279 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
281 #undef TARGET_PROMOTE_FUNCTION_ARGS
282 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
283 #undef TARGET_PROMOTE_FUNCTION_RETURN
284 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
285 #undef TARGET_PROMOTE_PROTOTYPES
286 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
287 #undef TARGET_PASS_BY_REFERENCE
288 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
289 #undef TARGET_ARG_PARTIAL_BYTES
290 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
292 #undef TARGET_SETUP_INCOMING_VARARGS
293 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
295 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
296 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
298 #undef TARGET_DEFAULT_SHORT_ENUMS
299 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
301 #undef TARGET_ALIGN_ANON_BITFIELD
302 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
304 #undef TARGET_NARROW_VOLATILE_BITFIELD
305 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
307 #undef TARGET_CXX_GUARD_TYPE
308 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
310 #undef TARGET_CXX_GUARD_MASK_BIT
311 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
313 #undef TARGET_CXX_GET_COOKIE_SIZE
314 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
316 #undef TARGET_CXX_COOKIE_HAS_SIZE
317 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
319 #undef TARGET_CXX_CDTOR_RETURNS_THIS
320 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
322 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
323 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
325 #undef TARGET_CXX_USE_AEABI_ATEXIT
326 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
328 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
329 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
330 arm_cxx_determine_class_data_visibility
332 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
333 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
335 #undef TARGET_RETURN_IN_MSB
336 #define TARGET_RETURN_IN_MSB arm_return_in_msb
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
344 #ifdef TARGET_UNWIND_INFO
345 #undef TARGET_UNWIND_EMIT
346 #define TARGET_UNWIND_EMIT arm_unwind_emit
348 /* EABI unwinding tables use a different format for the typeinfo tables. */
349 #undef TARGET_ASM_TTYPE
350 #define TARGET_ASM_TTYPE arm_output_ttype
352 #undef TARGET_ARM_EABI_UNWINDER
353 #define TARGET_ARM_EABI_UNWINDER true
354 #endif /* TARGET_UNWIND_INFO */
356 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
357 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
359 #undef TARGET_CANNOT_COPY_INSN_P
360 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
363 #undef TARGET_HAVE_TLS
364 #define TARGET_HAVE_TLS true
367 #undef TARGET_CANNOT_FORCE_CONST_MEM
368 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
370 #undef TARGET_SCHED_ISSUE_RATE
371 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
373 #undef TARGET_MANGLE_TYPE
374 #define TARGET_MANGLE_TYPE arm_mangle_type
377 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
378 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
381 struct gcc_target targetm = TARGET_INITIALIZER;
383 /* Obstack for minipool constant handling. */
384 static struct obstack minipool_obstack;
385 static char * minipool_startobj;
387 /* The maximum number of insns skipped which
388 will be conditionalised if possible. */
389 static int max_insns_skipped = 5;
391 extern FILE * asm_out_file;
393 /* True if we are currently building a constant table. */
394 int making_const_table;
396 /* Define the information needed to generate branch insns. This is
397 stored from the compare operation. */
398 rtx arm_compare_op0, arm_compare_op1;
400 /* The processor for which instructions should be scheduled. */
401 enum processor_type arm_tune = arm_none;
403 /* The default processor used if not overridden by commandline. */
404 static enum processor_type arm_default_cpu = arm_none;
406 /* Which floating point model to use. */
407 enum arm_fp_model arm_fp_model;
409 /* Which floating point hardware is available. */
410 enum fputype arm_fpu_arch;
412 /* Which floating point hardware to schedule for. */
413 enum fputype arm_fpu_tune;
415 /* Whether to use floating point hardware. */
416 enum float_abi_type arm_float_abi;
418 /* Which ABI to use. */
419 enum arm_abi_type arm_abi;
421 /* Which thread pointer model to use. */
422 enum arm_tp_type target_thread_pointer = TP_AUTO;
424 /* Used to parse -mstructure_size_boundary command line option. */
425 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
427 /* Used for Thumb call_via trampolines. */
428 rtx thumb_call_via_label[14];
429 static int thumb_call_reg_needed;
431 /* Bit values used to identify processor capabilities. */
432 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
433 #define FL_ARCH3M (1 << 1) /* Extended multiply */
434 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
435 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
436 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
437 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
438 #define FL_THUMB (1 << 6) /* Thumb aware */
439 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
440 #define FL_STRONG (1 << 8) /* StrongARM */
441 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
442 #define FL_XSCALE (1 << 10) /* XScale */
443 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
444 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
445 media instructions. */
446 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
447 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
448 Note: ARM6 & 7 derivatives only. */
449 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
450 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
451 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
453 #define FL_DIV (1 << 18) /* Hardware divide. */
454 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
455 #define FL_NEON (1 << 20) /* Neon instructions. */
457 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
459 #define FL_FOR_ARCH2 FL_NOTM
460 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
461 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
462 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
463 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
464 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
465 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
466 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
467 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
468 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
469 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
470 #define FL_FOR_ARCH6J FL_FOR_ARCH6
471 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
472 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
473 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
474 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
475 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
476 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
477 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
478 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
479 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
481 /* The bits in this mask specify which
482 instructions we are allowed to generate. */
483 static unsigned long insn_flags = 0;
485 /* The bits in this mask specify which instruction scheduling options should
487 static unsigned long tune_flags = 0;
489 /* The following are used in the arm.md file as equivalents to bits
490 in the above two flag variables. */
492 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
495 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
498 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
501 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
504 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
507 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
510 /* Nonzero if this chip supports the ARM 6K extensions. */
513 /* Nonzero if instructions not present in the 'M' profile can be used. */
514 int arm_arch_notm = 0;
516 /* Nonzero if this chip can benefit from load scheduling. */
517 int arm_ld_sched = 0;
519 /* Nonzero if this chip is a StrongARM. */
520 int arm_tune_strongarm = 0;
522 /* Nonzero if this chip is a Cirrus variant. */
523 int arm_arch_cirrus = 0;
525 /* Nonzero if this chip supports Intel Wireless MMX technology. */
526 int arm_arch_iwmmxt = 0;
528 /* Nonzero if this chip is an XScale. */
529 int arm_arch_xscale = 0;
531 /* Nonzero if tuning for XScale */
532 int arm_tune_xscale = 0;
534 /* Nonzero if we want to tune for stores that access the write-buffer.
535 This typically means an ARM6 or ARM7 with MMU or MPU. */
536 int arm_tune_wbuf = 0;
538 /* Nonzero if generating Thumb instructions. */
541 /* Nonzero if we should define __THUMB_INTERWORK__ in the
543 XXX This is a bit of a hack, it's intended to help work around
544 problems in GLD which doesn't understand that armv5t code is
545 interworking clean. */
546 int arm_cpp_interwork = 0;
548 /* Nonzero if chip supports Thumb 2. */
551 /* Nonzero if chip supports integer division instruction. */
554 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
555 must report the mode of the memory reference from PRINT_OPERAND to
556 PRINT_OPERAND_ADDRESS. */
557 enum machine_mode output_memory_reference_mode;
559 /* The register number to be used for the PIC offset register. */
560 unsigned arm_pic_register = INVALID_REGNUM;
562 /* Set to 1 when a return insn is output, this means that the epilogue
564 int return_used_this_function;
566 /* Set to 1 after arm_reorg has started. Reset to start at the start of
567 the next function. */
568 static int after_arm_reorg = 0;
570 /* The maximum number of insns to be used when loading a constant. */
571 static int arm_constant_limit = 3;
573 /* For an explanation of these variables, see final_prescan_insn below. */
575 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
576 enum arm_cond_code arm_current_cc;
578 int arm_target_label;
579 /* The number of conditionally executed insns, including the current insn. */
580 int arm_condexec_count = 0;
581 /* A bitmask specifying the patterns for the IT block.
582 Zero means do not output an IT block before this insn. */
583 int arm_condexec_mask = 0;
584 /* The number of bits used in arm_condexec_mask. */
585 int arm_condexec_masklen = 0;
587 /* The condition codes of the ARM, and the inverse function. */
588 static const char * const arm_condition_codes[] =
590 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
591 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
594 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
595 #define streq(string1, string2) (strcmp (string1, string2) == 0)
597 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
598 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
599 | (1 << PIC_OFFSET_TABLE_REGNUM)))
601 /* Initialization code. */
605 const char *const name;
606 enum processor_type core;
608 const unsigned long flags;
609 bool (* rtx_costs) (rtx, int, int, int *);
612 /* Not all of these give usefully different compilation alternatives,
613 but there is no simple way of generalizing them. */
614 static const struct processors all_cores[] =
617 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
618 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
619 #include "arm-cores.def"
621 {NULL, arm_none, NULL, 0, NULL}
624 static const struct processors all_architectures[] =
626 /* ARM Architectures */
627 /* We don't specify rtx_costs here as it will be figured out
630 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
631 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
632 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
633 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
634 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
635 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
636 implementations that support it, so we will leave it out for now. */
637 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
638 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
639 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
640 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
641 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
642 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
643 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
644 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
645 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
646 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
647 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
648 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
649 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
650 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
651 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
652 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
653 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
654 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
655 {NULL, arm_none, NULL, 0 , NULL}
658 struct arm_cpu_select
662 const struct processors * processors;
665 /* This is a magic structure. The 'string' field is magically filled in
666 with a pointer to the value specified by the user on the command line
667 assuming that the user has specified such a value. */
669 static struct arm_cpu_select arm_select[] =
671 /* string name processors */
672 { NULL, "-mcpu=", all_cores },
673 { NULL, "-march=", all_architectures },
674 { NULL, "-mtune=", all_cores }
677 /* Defines representing the indexes into the above table. */
678 #define ARM_OPT_SET_CPU 0
679 #define ARM_OPT_SET_ARCH 1
680 #define ARM_OPT_SET_TUNE 2
682 /* The name of the preprocessor macro to define for this architecture. */
684 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
693 /* Available values for -mfpu=. */
695 static const struct fpu_desc all_fpus[] =
697 {"fpa", FPUTYPE_FPA},
698 {"fpe2", FPUTYPE_FPA_EMU2},
699 {"fpe3", FPUTYPE_FPA_EMU2},
700 {"maverick", FPUTYPE_MAVERICK},
701 {"vfp", FPUTYPE_VFP},
702 {"vfp3", FPUTYPE_VFP3},
703 {"vfpv3", FPUTYPE_VFP3},
704 {"vfpv3-d16", FPUTYPE_VFP3D16},
705 {"neon", FPUTYPE_NEON}
709 /* Floating point models used by the different hardware.
710 See fputype in arm.h. */
712 static const enum fputype fp_model_for_fpu[] =
714 /* No FP hardware. */
715 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
716 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
717 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
718 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
719 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
720 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
721 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
722 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
723 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
730 enum float_abi_type abi_type;
734 /* Available values for -mfloat-abi=. */
736 static const struct float_abi all_float_abis[] =
738 {"soft", ARM_FLOAT_ABI_SOFT},
739 {"softfp", ARM_FLOAT_ABI_SOFTFP},
740 {"hard", ARM_FLOAT_ABI_HARD}
747 enum arm_abi_type abi_type;
751 /* Available values for -mabi=. */
753 static const struct abi_name arm_all_abis[] =
755 {"apcs-gnu", ARM_ABI_APCS},
756 {"atpcs", ARM_ABI_ATPCS},
757 {"aapcs", ARM_ABI_AAPCS},
758 {"iwmmxt", ARM_ABI_IWMMXT},
759 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
762 /* Supported TLS relocations. */
772 /* Emit an insn that's a simple single-set. Both the operands must be known
775 emit_set_insn (rtx x, rtx y)
777 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
780 /* Return the number of bits set in VALUE. */
782 bit_count (unsigned long value)
784 unsigned long count = 0;
789 value &= value - 1; /* Clear the least-significant set bit. */
795 /* Set up library functions unique to ARM. */
798 arm_init_libfuncs (void)
800 /* There are no special library functions unless we are using the
805 /* The functions below are described in Section 4 of the "Run-Time
806 ABI for the ARM architecture", Version 1.0. */
808 /* Double-precision floating-point arithmetic. Table 2. */
809 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
810 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
811 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
812 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
813 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
815 /* Double-precision comparisons. Table 3. */
816 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
817 set_optab_libfunc (ne_optab, DFmode, NULL);
818 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
819 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
820 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
821 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
822 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
824 /* Single-precision floating-point arithmetic. Table 4. */
825 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
826 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
827 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
828 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
829 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
831 /* Single-precision comparisons. Table 5. */
832 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
833 set_optab_libfunc (ne_optab, SFmode, NULL);
834 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
835 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
836 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
837 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
838 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
840 /* Floating-point to integer conversions. Table 6. */
841 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
842 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
843 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
844 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
845 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
846 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
847 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
848 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
850 /* Conversions between floating types. Table 7. */
851 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
852 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
854 /* Integer to floating-point conversions. Table 8. */
855 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
856 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
857 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
858 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
859 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
860 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
861 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
862 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
864 /* Long long. Table 9. */
865 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
866 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
867 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
868 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
869 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
870 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
871 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
872 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
874 /* Integer (32/32->32) division. \S 4.3.1. */
875 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
876 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
878 /* The divmod functions are designed so that they can be used for
879 plain division, even though they return both the quotient and the
880 remainder. The quotient is returned in the usual location (i.e.,
881 r0 for SImode, {r0, r1} for DImode), just as would be expected
882 for an ordinary division routine. Because the AAPCS calling
883 conventions specify that all of { r0, r1, r2, r3 } are
884 callee-saved registers, there is no need to tell the compiler
885 explicitly that those registers are clobbered by these
887 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
888 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
890 /* For SImode division the ABI provides div-without-mod routines,
892 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
893 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
895 /* We don't have mod libcalls. Fortunately gcc knows how to use the
896 divmod libcalls instead. */
897 set_optab_libfunc (smod_optab, DImode, NULL);
898 set_optab_libfunc (umod_optab, DImode, NULL);
899 set_optab_libfunc (smod_optab, SImode, NULL);
900 set_optab_libfunc (umod_optab, SImode, NULL);
903 /* Implement TARGET_HANDLE_OPTION. */
906 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
911 arm_select[1].string = arg;
915 arm_select[0].string = arg;
918 case OPT_mhard_float:
919 target_float_abi_name = "hard";
922 case OPT_msoft_float:
923 target_float_abi_name = "soft";
927 arm_select[2].string = arg;
936 arm_target_help (void)
939 static int columns = 0;
942 /* If we have not done so already, obtain the desired maximum width of
943 the output. Note - this is a duplication of the code at the start of
944 gcc/opts.c:print_specific_help() - the two copies should probably be
945 replaced by a single function. */
950 GET_ENVIRONMENT (p, "COLUMNS");
953 int value = atoi (p);
960 /* Use a reasonable default. */
964 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
966 /* The - 2 is because we know that the last entry in the array is NULL. */
967 i = ARRAY_SIZE (all_cores) - 2;
969 printf (" %s", all_cores[i].name);
970 remaining = columns - (strlen (all_cores[i].name) + 4);
971 gcc_assert (remaining >= 0);
975 int len = strlen (all_cores[i].name);
977 if (remaining > len + 2)
979 printf (", %s", all_cores[i].name);
980 remaining -= len + 2;
986 printf ("\n %s", all_cores[i].name);
987 remaining = columns - (len + 4);
991 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
993 i = ARRAY_SIZE (all_architectures) - 2;
996 printf (" %s", all_architectures[i].name);
997 remaining = columns - (strlen (all_architectures[i].name) + 4);
998 gcc_assert (remaining >= 0);
1002 int len = strlen (all_architectures[i].name);
1004 if (remaining > len + 2)
1006 printf (", %s", all_architectures[i].name);
1007 remaining -= len + 2;
1013 printf ("\n %s", all_architectures[i].name);
1014 remaining = columns - (len + 4);
1021 /* Fix up any incompatible options that the user has specified.
1022 This has now turned into a maze. */
1024 arm_override_options (void)
1027 enum processor_type target_arch_cpu = arm_none;
1029 /* Set up the flags based on the cpu/architecture selected by the user. */
1030 for (i = ARRAY_SIZE (arm_select); i--;)
1032 struct arm_cpu_select * ptr = arm_select + i;
1034 if (ptr->string != NULL && ptr->string[0] != '\0')
1036 const struct processors * sel;
1038 for (sel = ptr->processors; sel->name != NULL; sel++)
1039 if (streq (ptr->string, sel->name))
1041 /* Set the architecture define. */
1042 if (i != ARM_OPT_SET_TUNE)
1043 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1045 /* Determine the processor core for which we should
1046 tune code-generation. */
1047 if (/* -mcpu= is a sensible default. */
1048 i == ARM_OPT_SET_CPU
1049 /* -mtune= overrides -mcpu= and -march=. */
1050 || i == ARM_OPT_SET_TUNE)
1051 arm_tune = (enum processor_type) (sel - ptr->processors);
1053 /* Remember the CPU associated with this architecture.
1054 If no other option is used to set the CPU type,
1055 we'll use this to guess the most suitable tuning
1057 if (i == ARM_OPT_SET_ARCH)
1058 target_arch_cpu = sel->core;
1060 if (i != ARM_OPT_SET_TUNE)
1062 /* If we have been given an architecture and a processor
1063 make sure that they are compatible. We only generate
1064 a warning though, and we prefer the CPU over the
1066 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1067 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1070 insn_flags = sel->flags;
1076 if (sel->name == NULL)
1077 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1081 /* Guess the tuning options from the architecture if necessary. */
1082 if (arm_tune == arm_none)
1083 arm_tune = target_arch_cpu;
1085 /* If the user did not specify a processor, choose one for them. */
1086 if (insn_flags == 0)
1088 const struct processors * sel;
1089 unsigned int sought;
1090 enum processor_type cpu;
1092 cpu = TARGET_CPU_DEFAULT;
1093 if (cpu == arm_none)
1095 #ifdef SUBTARGET_CPU_DEFAULT
1096 /* Use the subtarget default CPU if none was specified by
1098 cpu = SUBTARGET_CPU_DEFAULT;
1100 /* Default to ARM6. */
1101 if (cpu == arm_none)
1104 sel = &all_cores[cpu];
1106 insn_flags = sel->flags;
1108 /* Now check to see if the user has specified some command line
1109 switch that require certain abilities from the cpu. */
1112 if (TARGET_INTERWORK || TARGET_THUMB)
1114 sought |= (FL_THUMB | FL_MODE32);
1116 /* There are no ARM processors that support both APCS-26 and
1117 interworking. Therefore we force FL_MODE26 to be removed
1118 from insn_flags here (if it was set), so that the search
1119 below will always be able to find a compatible processor. */
1120 insn_flags &= ~FL_MODE26;
1123 if (sought != 0 && ((sought & insn_flags) != sought))
1125 /* Try to locate a CPU type that supports all of the abilities
1126 of the default CPU, plus the extra abilities requested by
1128 for (sel = all_cores; sel->name != NULL; sel++)
1129 if ((sel->flags & sought) == (sought | insn_flags))
1132 if (sel->name == NULL)
1134 unsigned current_bit_count = 0;
1135 const struct processors * best_fit = NULL;
1137 /* Ideally we would like to issue an error message here
1138 saying that it was not possible to find a CPU compatible
1139 with the default CPU, but which also supports the command
1140 line options specified by the programmer, and so they
1141 ought to use the -mcpu=<name> command line option to
1142 override the default CPU type.
1144 If we cannot find a cpu that has both the
1145 characteristics of the default cpu and the given
1146 command line options we scan the array again looking
1147 for a best match. */
1148 for (sel = all_cores; sel->name != NULL; sel++)
1149 if ((sel->flags & sought) == sought)
1153 count = bit_count (sel->flags & insn_flags);
1155 if (count >= current_bit_count)
1158 current_bit_count = count;
1162 gcc_assert (best_fit);
1166 insn_flags = sel->flags;
1168 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1169 arm_default_cpu = (enum processor_type) (sel - all_cores);
1170 if (arm_tune == arm_none)
1171 arm_tune = arm_default_cpu;
1174 /* The processor for which we should tune should now have been
1176 gcc_assert (arm_tune != arm_none);
1178 tune_flags = all_cores[(int)arm_tune].flags;
1180 targetm.rtx_costs = arm_size_rtx_costs;
1182 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1184 /* Make sure that the processor choice does not conflict with any of the
1185 other command line choices. */
1186 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1187 error ("target CPU does not support ARM mode");
1189 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1191 warning (0, "target CPU does not support interworking" );
1192 target_flags &= ~MASK_INTERWORK;
1195 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1197 warning (0, "target CPU does not support THUMB instructions");
1198 target_flags &= ~MASK_THUMB;
1201 if (TARGET_APCS_FRAME && TARGET_THUMB)
1203 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1204 target_flags &= ~MASK_APCS_FRAME;
1207 /* Callee super interworking implies thumb interworking. Adding
1208 this to the flags here simplifies the logic elsewhere. */
1209 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1210 target_flags |= MASK_INTERWORK;
1212 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1213 from here where no function is being compiled currently. */
1214 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1215 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1217 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1218 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1220 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1221 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1223 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1225 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1226 target_flags |= MASK_APCS_FRAME;
1229 if (TARGET_POKE_FUNCTION_NAME)
1230 target_flags |= MASK_APCS_FRAME;
1232 if (TARGET_APCS_REENT && flag_pic)
1233 error ("-fpic and -mapcs-reent are incompatible");
1235 if (TARGET_APCS_REENT)
1236 warning (0, "APCS reentrant code not supported. Ignored");
1238 /* If this target is normally configured to use APCS frames, warn if they
1239 are turned off and debugging is turned on. */
1241 && write_symbols != NO_DEBUG
1242 && !TARGET_APCS_FRAME
1243 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1244 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1246 if (TARGET_APCS_FLOAT)
1247 warning (0, "passing floating point arguments in fp regs not yet supported");
1249 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1250 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1251 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1252 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1253 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1254 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1255 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1256 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1257 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1258 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1259 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1260 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1262 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1263 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1264 thumb_code = (TARGET_ARM == 0);
1265 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1266 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1267 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1268 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1270 /* V5 code we generate is completely interworking capable, so we turn off
1271 TARGET_INTERWORK here to avoid many tests later on. */
1273 /* XXX However, we must pass the right pre-processor defines to CPP
1274 or GLD can get confused. This is a hack. */
1275 if (TARGET_INTERWORK)
1276 arm_cpp_interwork = 1;
1279 target_flags &= ~MASK_INTERWORK;
1281 if (target_abi_name)
1283 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1285 if (streq (arm_all_abis[i].name, target_abi_name))
1287 arm_abi = arm_all_abis[i].abi_type;
1291 if (i == ARRAY_SIZE (arm_all_abis))
1292 error ("invalid ABI option: -mabi=%s", target_abi_name);
1295 arm_abi = ARM_DEFAULT_ABI;
1297 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1298 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1300 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1301 error ("iwmmxt abi requires an iwmmxt capable cpu");
1303 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1304 if (target_fpu_name == NULL && target_fpe_name != NULL)
1306 if (streq (target_fpe_name, "2"))
1307 target_fpu_name = "fpe2";
1308 else if (streq (target_fpe_name, "3"))
1309 target_fpu_name = "fpe3";
1311 error ("invalid floating point emulation option: -mfpe=%s",
1314 if (target_fpu_name != NULL)
1316 /* The user specified a FPU. */
1317 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1319 if (streq (all_fpus[i].name, target_fpu_name))
1321 arm_fpu_arch = all_fpus[i].fpu;
1322 arm_fpu_tune = arm_fpu_arch;
1323 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1327 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1328 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1332 #ifdef FPUTYPE_DEFAULT
1333 /* Use the default if it is specified for this platform. */
1334 arm_fpu_arch = FPUTYPE_DEFAULT;
1335 arm_fpu_tune = FPUTYPE_DEFAULT;
1337 /* Pick one based on CPU type. */
1338 /* ??? Some targets assume FPA is the default.
1339 if ((insn_flags & FL_VFP) != 0)
1340 arm_fpu_arch = FPUTYPE_VFP;
1343 if (arm_arch_cirrus)
1344 arm_fpu_arch = FPUTYPE_MAVERICK;
1346 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1348 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1349 arm_fpu_tune = FPUTYPE_FPA;
1351 arm_fpu_tune = arm_fpu_arch;
1352 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1353 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1356 if (target_float_abi_name != NULL)
1358 /* The user specified a FP ABI. */
1359 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1361 if (streq (all_float_abis[i].name, target_float_abi_name))
1363 arm_float_abi = all_float_abis[i].abi_type;
1367 if (i == ARRAY_SIZE (all_float_abis))
1368 error ("invalid floating point abi: -mfloat-abi=%s",
1369 target_float_abi_name);
1372 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1374 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1375 sorry ("-mfloat-abi=hard and VFP");
1377 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1378 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1379 will ever exist. GCC makes no attempt to support this combination. */
1380 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1381 sorry ("iWMMXt and hardware floating point");
1383 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1384 if (TARGET_THUMB2 && TARGET_IWMMXT)
1385 sorry ("Thumb-2 iWMMXt");
1387 /* If soft-float is specified then don't use FPU. */
1388 if (TARGET_SOFT_FLOAT)
1389 arm_fpu_arch = FPUTYPE_NONE;
1391 /* For arm2/3 there is no need to do any scheduling if there is only
1392 a floating point emulator, or we are doing software floating-point. */
1393 if ((TARGET_SOFT_FLOAT
1394 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1395 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1396 && (tune_flags & FL_MODE32) == 0)
1397 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1399 if (target_thread_switch)
1401 if (strcmp (target_thread_switch, "soft") == 0)
1402 target_thread_pointer = TP_SOFT;
1403 else if (strcmp (target_thread_switch, "auto") == 0)
1404 target_thread_pointer = TP_AUTO;
1405 else if (strcmp (target_thread_switch, "cp15") == 0)
1406 target_thread_pointer = TP_CP15;
1408 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1411 /* Use the cp15 method if it is available. */
1412 if (target_thread_pointer == TP_AUTO)
1414 if (arm_arch6k && !TARGET_THUMB)
1415 target_thread_pointer = TP_CP15;
1417 target_thread_pointer = TP_SOFT;
1420 if (TARGET_HARD_TP && TARGET_THUMB1)
1421 error ("can not use -mtp=cp15 with 16-bit Thumb");
1423 /* Override the default structure alignment for AAPCS ABI. */
1424 if (TARGET_AAPCS_BASED)
1425 arm_structure_size_boundary = 8;
1427 if (structure_size_string != NULL)
1429 int size = strtol (structure_size_string, NULL, 0);
1431 if (size == 8 || size == 32
1432 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1433 arm_structure_size_boundary = size;
1435 warning (0, "structure size boundary can only be set to %s",
1436 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1439 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1441 error ("RTP PIC is incompatible with Thumb");
1445 /* If stack checking is disabled, we can use r10 as the PIC register,
1446 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1447 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1449 if (TARGET_VXWORKS_RTP)
1450 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1451 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1454 if (flag_pic && TARGET_VXWORKS_RTP)
1455 arm_pic_register = 9;
1457 if (arm_pic_register_string != NULL)
1459 int pic_register = decode_reg_name (arm_pic_register_string);
1462 warning (0, "-mpic-register= is useless without -fpic");
1464 /* Prevent the user from choosing an obviously stupid PIC register. */
1465 else if (pic_register < 0 || call_used_regs[pic_register]
1466 || pic_register == HARD_FRAME_POINTER_REGNUM
1467 || pic_register == STACK_POINTER_REGNUM
1468 || pic_register >= PC_REGNUM
1469 || (TARGET_VXWORKS_RTP
1470 && (unsigned int) pic_register != arm_pic_register))
1471 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1473 arm_pic_register = pic_register;
1476 /* ??? We might want scheduling for thumb2. */
1477 if (TARGET_THUMB && flag_schedule_insns)
1479 /* Don't warn since it's on by default in -O2. */
1480 flag_schedule_insns = 0;
1485 arm_constant_limit = 1;
1487 /* If optimizing for size, bump the number of instructions that we
1488 are prepared to conditionally execute (even on a StrongARM). */
1489 max_insns_skipped = 6;
1493 /* For processors with load scheduling, it never costs more than
1494 2 cycles to load a constant, and the load scheduler may well
1495 reduce that to 1. */
1497 arm_constant_limit = 1;
1499 /* On XScale the longer latency of a load makes it more difficult
1500 to achieve a good schedule, so it's faster to synthesize
1501 constants that can be done in two insns. */
1502 if (arm_tune_xscale)
1503 arm_constant_limit = 2;
1505 /* StrongARM has early execution of branches, so a sequence
1506 that is worth skipping is shorter. */
1507 if (arm_tune_strongarm)
1508 max_insns_skipped = 3;
1511 /* Register global variables with the garbage collector. */
1512 arm_add_gc_roots ();
1516 arm_add_gc_roots (void)
1518 gcc_obstack_init(&minipool_obstack);
1519 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1522 /* A table of known ARM exception types.
1523 For use with the interrupt function attribute. */
1527 const char *const arg;
1528 const unsigned long return_value;
1532 static const isr_attribute_arg isr_attribute_args [] =
1534 { "IRQ", ARM_FT_ISR },
1535 { "irq", ARM_FT_ISR },
1536 { "FIQ", ARM_FT_FIQ },
1537 { "fiq", ARM_FT_FIQ },
1538 { "ABORT", ARM_FT_ISR },
1539 { "abort", ARM_FT_ISR },
1540 { "ABORT", ARM_FT_ISR },
1541 { "abort", ARM_FT_ISR },
1542 { "UNDEF", ARM_FT_EXCEPTION },
1543 { "undef", ARM_FT_EXCEPTION },
1544 { "SWI", ARM_FT_EXCEPTION },
1545 { "swi", ARM_FT_EXCEPTION },
1546 { NULL, ARM_FT_NORMAL }
1549 /* Returns the (interrupt) function type of the current
1550 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1552 static unsigned long
1553 arm_isr_value (tree argument)
1555 const isr_attribute_arg * ptr;
1559 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1561 /* No argument - default to IRQ. */
1562 if (argument == NULL_TREE)
1565 /* Get the value of the argument. */
1566 if (TREE_VALUE (argument) == NULL_TREE
1567 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1568 return ARM_FT_UNKNOWN;
1570 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1572 /* Check it against the list of known arguments. */
1573 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1574 if (streq (arg, ptr->arg))
1575 return ptr->return_value;
1577 /* An unrecognized interrupt type. */
1578 return ARM_FT_UNKNOWN;
1581 /* Computes the type of the current function. */
1583 static unsigned long
1584 arm_compute_func_type (void)
1586 unsigned long type = ARM_FT_UNKNOWN;
1590 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1592 /* Decide if the current function is volatile. Such functions
1593 never return, and many memory cycles can be saved by not storing
1594 register values that will never be needed again. This optimization
1595 was added to speed up context switching in a kernel application. */
1597 && (TREE_NOTHROW (current_function_decl)
1598 || !(flag_unwind_tables
1599 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1600 && TREE_THIS_VOLATILE (current_function_decl))
1601 type |= ARM_FT_VOLATILE;
1603 if (cfun->static_chain_decl != NULL)
1604 type |= ARM_FT_NESTED;
1606 attr = DECL_ATTRIBUTES (current_function_decl);
1608 a = lookup_attribute ("naked", attr);
1610 type |= ARM_FT_NAKED;
1612 a = lookup_attribute ("isr", attr);
1614 a = lookup_attribute ("interrupt", attr);
1617 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1619 type |= arm_isr_value (TREE_VALUE (a));
1624 /* Returns the type of the current function. */
1627 arm_current_func_type (void)
1629 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1630 cfun->machine->func_type = arm_compute_func_type ();
1632 return cfun->machine->func_type;
1636 arm_allocate_stack_slots_for_args (void)
1638 /* Naked functions should not allocate stack slots for arguments. */
1639 return !IS_NAKED (arm_current_func_type ());
1643 /* Return 1 if it is possible to return using a single instruction.
1644 If SIBLING is non-null, this is a test for a return before a sibling
1645 call. SIBLING is the call insn, so we can examine its register usage. */
1648 use_return_insn (int iscond, rtx sibling)
1651 unsigned int func_type;
1652 unsigned long saved_int_regs;
1653 unsigned HOST_WIDE_INT stack_adjust;
1654 arm_stack_offsets *offsets;
1656 /* Never use a return instruction before reload has run. */
1657 if (!reload_completed)
1660 func_type = arm_current_func_type ();
1662 /* Naked, volatile and stack alignment functions need special
1664 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1667 /* So do interrupt functions that use the frame pointer and Thumb
1668 interrupt functions. */
1669 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1672 offsets = arm_get_frame_offsets ();
1673 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1675 /* As do variadic functions. */
1676 if (crtl->args.pretend_args_size
1677 || cfun->machine->uses_anonymous_args
1678 /* Or if the function calls __builtin_eh_return () */
1679 || crtl->calls_eh_return
1680 /* Or if the function calls alloca */
1681 || cfun->calls_alloca
1682 /* Or if there is a stack adjustment. However, if the stack pointer
1683 is saved on the stack, we can use a pre-incrementing stack load. */
1684 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1685 && stack_adjust == 4)))
1688 saved_int_regs = offsets->saved_regs_mask;
1690 /* Unfortunately, the insn
1692 ldmib sp, {..., sp, ...}
1694 triggers a bug on most SA-110 based devices, such that the stack
1695 pointer won't be correctly restored if the instruction takes a
1696 page fault. We work around this problem by popping r3 along with
1697 the other registers, since that is never slower than executing
1698 another instruction.
1700 We test for !arm_arch5 here, because code for any architecture
1701 less than this could potentially be run on one of the buggy
1703 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1705 /* Validate that r3 is a call-clobbered register (always true in
1706 the default abi) ... */
1707 if (!call_used_regs[3])
1710 /* ... that it isn't being used for a return value ... */
1711 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1714 /* ... or for a tail-call argument ... */
1717 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1719 if (find_regno_fusage (sibling, USE, 3))
1723 /* ... and that there are no call-saved registers in r0-r2
1724 (always true in the default ABI). */
1725 if (saved_int_regs & 0x7)
1729 /* Can't be done if interworking with Thumb, and any registers have been
1731 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1734 /* On StrongARM, conditional returns are expensive if they aren't
1735 taken and multiple registers have been stacked. */
1736 if (iscond && arm_tune_strongarm)
1738 /* Conditional return when just the LR is stored is a simple
1739 conditional-load instruction, that's not expensive. */
1740 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1744 && arm_pic_register != INVALID_REGNUM
1745 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1749 /* If there are saved registers but the LR isn't saved, then we need
1750 two instructions for the return. */
1751 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1754 /* Can't be done if any of the FPA regs are pushed,
1755 since this also requires an insn. */
1756 if (TARGET_HARD_FLOAT && TARGET_FPA)
1757 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1758 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1761 /* Likewise VFP regs. */
1762 if (TARGET_HARD_FLOAT && TARGET_VFP)
1763 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1764 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1767 if (TARGET_REALLY_IWMMXT)
1768 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1769 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1775 /* Return TRUE if int I is a valid immediate ARM constant. */
1778 const_ok_for_arm (HOST_WIDE_INT i)
1782 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1783 be all zero, or all one. */
1784 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1785 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1786 != ((~(unsigned HOST_WIDE_INT) 0)
1787 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1790 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1792 /* Fast return for 0 and small values. We must do this for zero, since
1793 the code below can't handle that one case. */
1794 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1797 /* Get the number of trailing zeros. */
1798 lowbit = ffs((int) i) - 1;
1800 /* Only even shifts are allowed in ARM mode so round down to the
1801 nearest even number. */
1805 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1810 /* Allow rotated constants in ARM mode. */
1812 && ((i & ~0xc000003f) == 0
1813 || (i & ~0xf000000f) == 0
1814 || (i & ~0xfc000003) == 0))
1821 /* Allow repeated pattern. */
1824 if (i == v || i == (v | (v << 8)))
1831 /* Return true if I is a valid constant for the operation CODE. */
1833 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1835 if (const_ok_for_arm (i))
1841 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1843 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1849 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1856 /* Emit a sequence of insns to handle a large constant.
1857 CODE is the code of the operation required, it can be any of SET, PLUS,
1858 IOR, AND, XOR, MINUS;
1859 MODE is the mode in which the operation is being performed;
1860 VAL is the integer to operate on;
1861 SOURCE is the other operand (a register, or a null-pointer for SET);
1862 SUBTARGETS means it is safe to create scratch registers if that will
1863 either produce a simpler sequence, or we will want to cse the values.
1864 Return value is the number of insns emitted. */
1866 /* ??? Tweak this for thumb2. */
1868 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1869 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1873 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1874 cond = COND_EXEC_TEST (PATTERN (insn));
1878 if (subtargets || code == SET
1879 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1880 && REGNO (target) != REGNO (source)))
1882 /* After arm_reorg has been called, we can't fix up expensive
1883 constants by pushing them into memory so we must synthesize
1884 them in-line, regardless of the cost. This is only likely to
1885 be more costly on chips that have load delay slots and we are
1886 compiling without running the scheduler (so no splitting
1887 occurred before the final instruction emission).
1889 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1891 if (!after_arm_reorg
1893 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1895 > arm_constant_limit + (code != SET)))
1899 /* Currently SET is the only monadic value for CODE, all
1900 the rest are diadic. */
1901 emit_set_insn (target, GEN_INT (val));
1906 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1908 emit_set_insn (temp, GEN_INT (val));
1909 /* For MINUS, the value is subtracted from, since we never
1910 have subtraction of a constant. */
1912 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1914 emit_set_insn (target,
1915 gen_rtx_fmt_ee (code, mode, source, temp));
1921 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1925 /* Return the number of ARM instructions required to synthesize the given
1928 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1930 HOST_WIDE_INT temp1;
1938 if (remainder & (3 << (i - 2)))
1943 temp1 = remainder & ((0x0ff << end)
1944 | ((i < end) ? (0xff >> (32 - end)) : 0));
1945 remainder &= ~temp1;
1950 } while (remainder);
1954 /* Emit an instruction with the indicated PATTERN. If COND is
1955 non-NULL, conditionalize the execution of the instruction on COND
1959 emit_constant_insn (rtx cond, rtx pattern)
1962 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1963 emit_insn (pattern);
1966 /* As above, but extra parameter GENERATE which, if clear, suppresses
1968 /* ??? This needs more work for thumb2. */
1971 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1972 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1977 int can_negate_initial = 0;
1980 int num_bits_set = 0;
1981 int set_sign_bit_copies = 0;
1982 int clear_sign_bit_copies = 0;
1983 int clear_zero_bit_copies = 0;
1984 int set_zero_bit_copies = 0;
1986 unsigned HOST_WIDE_INT temp1, temp2;
1987 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1989 /* Find out which operations are safe for a given CODE. Also do a quick
1990 check for degenerate cases; these can occur when DImode operations
2002 can_negate_initial = 1;
2006 if (remainder == 0xffffffff)
2009 emit_constant_insn (cond,
2010 gen_rtx_SET (VOIDmode, target,
2011 GEN_INT (ARM_SIGN_EXTEND (val))));
2016 if (reload_completed && rtx_equal_p (target, source))
2019 emit_constant_insn (cond,
2020 gen_rtx_SET (VOIDmode, target, source));
2029 emit_constant_insn (cond,
2030 gen_rtx_SET (VOIDmode, target, const0_rtx));
2033 if (remainder == 0xffffffff)
2035 if (reload_completed && rtx_equal_p (target, source))
2038 emit_constant_insn (cond,
2039 gen_rtx_SET (VOIDmode, target, source));
2048 if (reload_completed && rtx_equal_p (target, source))
2051 emit_constant_insn (cond,
2052 gen_rtx_SET (VOIDmode, target, source));
2056 /* We don't know how to handle other cases yet. */
2057 gcc_assert (remainder == 0xffffffff);
2060 emit_constant_insn (cond,
2061 gen_rtx_SET (VOIDmode, target,
2062 gen_rtx_NOT (mode, source)));
2066 /* We treat MINUS as (val - source), since (source - val) is always
2067 passed as (source + (-val)). */
2071 emit_constant_insn (cond,
2072 gen_rtx_SET (VOIDmode, target,
2073 gen_rtx_NEG (mode, source)));
2076 if (const_ok_for_arm (val))
2079 emit_constant_insn (cond,
2080 gen_rtx_SET (VOIDmode, target,
2081 gen_rtx_MINUS (mode, GEN_INT (val),
2093 /* If we can do it in one insn get out quickly. */
2094 if (const_ok_for_arm (val)
2095 || (can_negate_initial && const_ok_for_arm (-val))
2096 || (can_invert && const_ok_for_arm (~val)))
2099 emit_constant_insn (cond,
2100 gen_rtx_SET (VOIDmode, target,
2102 ? gen_rtx_fmt_ee (code, mode, source,
2108 /* Calculate a few attributes that may be useful for specific
2110 for (i = 31; i >= 0; i--)
2112 if ((remainder & (1 << i)) == 0)
2113 clear_sign_bit_copies++;
2118 for (i = 31; i >= 0; i--)
2120 if ((remainder & (1 << i)) != 0)
2121 set_sign_bit_copies++;
2126 for (i = 0; i <= 31; i++)
2128 if ((remainder & (1 << i)) == 0)
2129 clear_zero_bit_copies++;
2134 for (i = 0; i <= 31; i++)
2136 if ((remainder & (1 << i)) != 0)
2137 set_zero_bit_copies++;
2145 /* See if we can use movw. */
2146 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2149 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2154 /* See if we can do this by sign_extending a constant that is known
2155 to be negative. This is a good, way of doing it, since the shift
2156 may well merge into a subsequent insn. */
2157 if (set_sign_bit_copies > 1)
2159 if (const_ok_for_arm
2160 (temp1 = ARM_SIGN_EXTEND (remainder
2161 << (set_sign_bit_copies - 1))))
2165 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2166 emit_constant_insn (cond,
2167 gen_rtx_SET (VOIDmode, new_src,
2169 emit_constant_insn (cond,
2170 gen_ashrsi3 (target, new_src,
2171 GEN_INT (set_sign_bit_copies - 1)));
2175 /* For an inverted constant, we will need to set the low bits,
2176 these will be shifted out of harm's way. */
2177 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2178 if (const_ok_for_arm (~temp1))
2182 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2183 emit_constant_insn (cond,
2184 gen_rtx_SET (VOIDmode, new_src,
2186 emit_constant_insn (cond,
2187 gen_ashrsi3 (target, new_src,
2188 GEN_INT (set_sign_bit_copies - 1)));
2194 /* See if we can calculate the value as the difference between two
2195 valid immediates. */
2196 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2198 int topshift = clear_sign_bit_copies & ~1;
2200 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2201 & (0xff000000 >> topshift));
2203 /* If temp1 is zero, then that means the 9 most significant
2204 bits of remainder were 1 and we've caused it to overflow.
2205 When topshift is 0 we don't need to do anything since we
2206 can borrow from 'bit 32'. */
2207 if (temp1 == 0 && topshift != 0)
2208 temp1 = 0x80000000 >> (topshift - 1);
2210 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2212 if (const_ok_for_arm (temp2))
2216 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2217 emit_constant_insn (cond,
2218 gen_rtx_SET (VOIDmode, new_src,
2220 emit_constant_insn (cond,
2221 gen_addsi3 (target, new_src,
2229 /* See if we can generate this by setting the bottom (or the top)
2230 16 bits, and then shifting these into the other half of the
2231 word. We only look for the simplest cases, to do more would cost
2232 too much. Be careful, however, not to generate this when the
2233 alternative would take fewer insns. */
2234 if (val & 0xffff0000)
2236 temp1 = remainder & 0xffff0000;
2237 temp2 = remainder & 0x0000ffff;
2239 /* Overlaps outside this range are best done using other methods. */
2240 for (i = 9; i < 24; i++)
2242 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2243 && !const_ok_for_arm (temp2))
2245 rtx new_src = (subtargets
2246 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2248 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2249 source, subtargets, generate);
2257 gen_rtx_ASHIFT (mode, source,
2264 /* Don't duplicate cases already considered. */
2265 for (i = 17; i < 24; i++)
2267 if (((temp1 | (temp1 >> i)) == remainder)
2268 && !const_ok_for_arm (temp1))
2270 rtx new_src = (subtargets
2271 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2273 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2274 source, subtargets, generate);
2279 gen_rtx_SET (VOIDmode, target,
2282 gen_rtx_LSHIFTRT (mode, source,
2293 /* If we have IOR or XOR, and the constant can be loaded in a
2294 single instruction, and we can find a temporary to put it in,
2295 then this can be done in two instructions instead of 3-4. */
2297 /* TARGET can't be NULL if SUBTARGETS is 0 */
2298 || (reload_completed && !reg_mentioned_p (target, source)))
2300 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2304 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2306 emit_constant_insn (cond,
2307 gen_rtx_SET (VOIDmode, sub,
2309 emit_constant_insn (cond,
2310 gen_rtx_SET (VOIDmode, target,
2311 gen_rtx_fmt_ee (code, mode,
2321 if (set_sign_bit_copies > 8
2322 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2326 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2327 rtx shift = GEN_INT (set_sign_bit_copies);
2331 gen_rtx_SET (VOIDmode, sub,
2333 gen_rtx_ASHIFT (mode,
2338 gen_rtx_SET (VOIDmode, target,
2340 gen_rtx_LSHIFTRT (mode, sub,
2346 if (set_zero_bit_copies > 8
2347 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2351 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2352 rtx shift = GEN_INT (set_zero_bit_copies);
2356 gen_rtx_SET (VOIDmode, sub,
2358 gen_rtx_LSHIFTRT (mode,
2363 gen_rtx_SET (VOIDmode, target,
2365 gen_rtx_ASHIFT (mode, sub,
2371 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2375 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2376 emit_constant_insn (cond,
2377 gen_rtx_SET (VOIDmode, sub,
2378 gen_rtx_NOT (mode, source)));
2381 sub = gen_reg_rtx (mode);
2382 emit_constant_insn (cond,
2383 gen_rtx_SET (VOIDmode, sub,
2384 gen_rtx_AND (mode, source,
2386 emit_constant_insn (cond,
2387 gen_rtx_SET (VOIDmode, target,
2388 gen_rtx_NOT (mode, sub)));
2395 /* See if two shifts will do 2 or more insn's worth of work. */
2396 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2398 HOST_WIDE_INT shift_mask = ((0xffffffff
2399 << (32 - clear_sign_bit_copies))
2402 if ((remainder | shift_mask) != 0xffffffff)
2406 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2407 insns = arm_gen_constant (AND, mode, cond,
2408 remainder | shift_mask,
2409 new_src, source, subtargets, 1);
2414 rtx targ = subtargets ? NULL_RTX : target;
2415 insns = arm_gen_constant (AND, mode, cond,
2416 remainder | shift_mask,
2417 targ, source, subtargets, 0);
2423 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2424 rtx shift = GEN_INT (clear_sign_bit_copies);
2426 emit_insn (gen_ashlsi3 (new_src, source, shift));
2427 emit_insn (gen_lshrsi3 (target, new_src, shift));
2433 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2435 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2437 if ((remainder | shift_mask) != 0xffffffff)
2441 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2443 insns = arm_gen_constant (AND, mode, cond,
2444 remainder | shift_mask,
2445 new_src, source, subtargets, 1);
2450 rtx targ = subtargets ? NULL_RTX : target;
2452 insns = arm_gen_constant (AND, mode, cond,
2453 remainder | shift_mask,
2454 targ, source, subtargets, 0);
2460 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2461 rtx shift = GEN_INT (clear_zero_bit_copies);
2463 emit_insn (gen_lshrsi3 (new_src, source, shift));
2464 emit_insn (gen_ashlsi3 (target, new_src, shift));
2476 for (i = 0; i < 32; i++)
2477 if (remainder & (1 << i))
2480 if (code == AND || (can_invert && num_bits_set > 16))
2481 remainder = (~remainder) & 0xffffffff;
2482 else if (code == PLUS && num_bits_set > 16)
2483 remainder = (-remainder) & 0xffffffff;
2490 /* Now try and find a way of doing the job in either two or three
2492 We start by looking for the largest block of zeros that are aligned on
2493 a 2-bit boundary, we then fill up the temps, wrapping around to the
2494 top of the word when we drop off the bottom.
2495 In the worst case this code should produce no more than four insns.
2496 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2497 best place to start. */
2499 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2505 int best_consecutive_zeros = 0;
2507 for (i = 0; i < 32; i += 2)
2509 int consecutive_zeros = 0;
2511 if (!(remainder & (3 << i)))
2513 while ((i < 32) && !(remainder & (3 << i)))
2515 consecutive_zeros += 2;
2518 if (consecutive_zeros > best_consecutive_zeros)
2520 best_consecutive_zeros = consecutive_zeros;
2521 best_start = i - consecutive_zeros;
2527 /* So long as it won't require any more insns to do so, it's
2528 desirable to emit a small constant (in bits 0...9) in the last
2529 insn. This way there is more chance that it can be combined with
2530 a later addressing insn to form a pre-indexed load or store
2531 operation. Consider:
2533 *((volatile int *)0xe0000100) = 1;
2534 *((volatile int *)0xe0000110) = 2;
2536 We want this to wind up as:
2540 str rB, [rA, #0x100]
2542 str rB, [rA, #0x110]
2544 rather than having to synthesize both large constants from scratch.
2546 Therefore, we calculate how many insns would be required to emit
2547 the constant starting from `best_start', and also starting from
2548 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2549 yield a shorter sequence, we may as well use zero. */
2551 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2552 && (count_insns_for_constant (remainder, 0) <=
2553 count_insns_for_constant (remainder, best_start)))
2557 /* Now start emitting the insns. */
2565 if (remainder & (3 << (i - 2)))
2570 temp1 = remainder & ((0x0ff << end)
2571 | ((i < end) ? (0xff >> (32 - end)) : 0));
2572 remainder &= ~temp1;
2576 rtx new_src, temp1_rtx;
2578 if (code == SET || code == MINUS)
2580 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2581 if (can_invert && code != MINUS)
2586 if (remainder && subtargets)
2587 new_src = gen_reg_rtx (mode);
2592 else if (can_negate)
2596 temp1 = trunc_int_for_mode (temp1, mode);
2597 temp1_rtx = GEN_INT (temp1);
2601 else if (code == MINUS)
2602 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2604 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2606 emit_constant_insn (cond,
2607 gen_rtx_SET (VOIDmode, new_src,
2617 else if (code == MINUS)
2626 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2639 /* Canonicalize a comparison so that we are more likely to recognize it.
2640 This can be done for a few constant compares, where we can make the
2641 immediate value easier to load. */
2644 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2647 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2648 unsigned HOST_WIDE_INT maxval;
2649 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2660 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2662 *op1 = GEN_INT (i + 1);
2663 return code == GT ? GE : LT;
2670 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2672 *op1 = GEN_INT (i - 1);
2673 return code == GE ? GT : LE;
2679 if (i != ~((unsigned HOST_WIDE_INT) 0)
2680 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2682 *op1 = GEN_INT (i + 1);
2683 return code == GTU ? GEU : LTU;
2690 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2692 *op1 = GEN_INT (i - 1);
2693 return code == GEU ? GTU : LEU;
2705 /* Define how to find the value returned by a function. */
2708 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2710 enum machine_mode mode;
2711 int unsignedp ATTRIBUTE_UNUSED;
2712 rtx r ATTRIBUTE_UNUSED;
2714 mode = TYPE_MODE (type);
2715 /* Promote integer types. */
2716 if (INTEGRAL_TYPE_P (type))
2717 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2719 /* Promotes small structs returned in a register to full-word size
2720 for big-endian AAPCS. */
2721 if (arm_return_in_msb (type))
2723 HOST_WIDE_INT size = int_size_in_bytes (type);
2724 if (size % UNITS_PER_WORD != 0)
2726 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2727 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2731 return LIBCALL_VALUE(mode);
2734 /* Determine the amount of memory needed to store the possible return
2735 registers of an untyped call. */
2737 arm_apply_result_size (void)
2743 if (TARGET_HARD_FLOAT_ABI)
2747 if (TARGET_MAVERICK)
2750 if (TARGET_IWMMXT_ABI)
2757 /* Decide whether a type should be returned in memory (true)
2758 or in a register (false). This is called as the target hook
2759 TARGET_RETURN_IN_MEMORY. */
2761 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2765 size = int_size_in_bytes (type);
2767 /* Vector values should be returned using ARM registers, not memory (unless
2768 they're over 16 bytes, which will break since we only have four
2769 call-clobbered registers to play with). */
2770 if (TREE_CODE (type) == VECTOR_TYPE)
2771 return (size < 0 || size > (4 * UNITS_PER_WORD));
2773 if (!AGGREGATE_TYPE_P (type) &&
2774 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2775 /* All simple types are returned in registers.
2776 For AAPCS, complex types are treated the same as aggregates. */
2779 if (arm_abi != ARM_ABI_APCS)
2781 /* ATPCS and later return aggregate types in memory only if they are
2782 larger than a word (or are variable size). */
2783 return (size < 0 || size > UNITS_PER_WORD);
2786 /* For the arm-wince targets we choose to be compatible with Microsoft's
2787 ARM and Thumb compilers, which always return aggregates in memory. */
2789 /* All structures/unions bigger than one word are returned in memory.
2790 Also catch the case where int_size_in_bytes returns -1. In this case
2791 the aggregate is either huge or of variable size, and in either case
2792 we will want to return it via memory and not in a register. */
2793 if (size < 0 || size > UNITS_PER_WORD)
2796 if (TREE_CODE (type) == RECORD_TYPE)
2800 /* For a struct the APCS says that we only return in a register
2801 if the type is 'integer like' and every addressable element
2802 has an offset of zero. For practical purposes this means
2803 that the structure can have at most one non bit-field element
2804 and that this element must be the first one in the structure. */
2806 /* Find the first field, ignoring non FIELD_DECL things which will
2807 have been created by C++. */
2808 for (field = TYPE_FIELDS (type);
2809 field && TREE_CODE (field) != FIELD_DECL;
2810 field = TREE_CHAIN (field))
2814 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2816 /* Check that the first field is valid for returning in a register. */
2818 /* ... Floats are not allowed */
2819 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2822 /* ... Aggregates that are not themselves valid for returning in
2823 a register are not allowed. */
2824 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2827 /* Now check the remaining fields, if any. Only bitfields are allowed,
2828 since they are not addressable. */
2829 for (field = TREE_CHAIN (field);
2831 field = TREE_CHAIN (field))
2833 if (TREE_CODE (field) != FIELD_DECL)
2836 if (!DECL_BIT_FIELD_TYPE (field))
2843 if (TREE_CODE (type) == UNION_TYPE)
2847 /* Unions can be returned in registers if every element is
2848 integral, or can be returned in an integer register. */
2849 for (field = TYPE_FIELDS (type);
2851 field = TREE_CHAIN (field))
2853 if (TREE_CODE (field) != FIELD_DECL)
2856 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2859 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2865 #endif /* not ARM_WINCE */
2867 /* Return all other types in memory. */
2871 /* Indicate whether or not words of a double are in big-endian order. */
2874 arm_float_words_big_endian (void)
2876 if (TARGET_MAVERICK)
2879 /* For FPA, float words are always big-endian. For VFP, floats words
2880 follow the memory system mode. */
2888 return (TARGET_BIG_END ? 1 : 0);
2893 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2894 for a call to a function whose data type is FNTYPE.
2895 For a library call, FNTYPE is NULL. */
2897 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2898 rtx libname ATTRIBUTE_UNUSED,
2899 tree fndecl ATTRIBUTE_UNUSED)
2901 /* On the ARM, the offset starts at 0. */
2903 pcum->iwmmxt_nregs = 0;
2904 pcum->can_split = true;
2906 /* Varargs vectors are treated the same as long long.
2907 named_count avoids having to change the way arm handles 'named' */
2908 pcum->named_count = 0;
2911 if (TARGET_REALLY_IWMMXT && fntype)
2915 for (fn_arg = TYPE_ARG_TYPES (fntype);
2917 fn_arg = TREE_CHAIN (fn_arg))
2918 pcum->named_count += 1;
2920 if (! pcum->named_count)
2921 pcum->named_count = INT_MAX;
2926 /* Return true if mode/type need doubleword alignment. */
2928 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2930 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2931 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2935 /* Determine where to put an argument to a function.
2936 Value is zero to push the argument on the stack,
2937 or a hard register in which to store the argument.
2939 MODE is the argument's machine mode.
2940 TYPE is the data type of the argument (as a tree).
2941 This is null for libcalls where that information may
2943 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2944 the preceding args and about the function being called.
2945 NAMED is nonzero if this argument is a named parameter
2946 (otherwise it is an extra parameter matching an ellipsis). */
2949 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2950 tree type, int named)
2954 /* Varargs vectors are treated the same as long long.
2955 named_count avoids having to change the way arm handles 'named' */
2956 if (TARGET_IWMMXT_ABI
2957 && arm_vector_mode_supported_p (mode)
2958 && pcum->named_count > pcum->nargs + 1)
2960 if (pcum->iwmmxt_nregs <= 9)
2961 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2964 pcum->can_split = false;
2969 /* Put doubleword aligned quantities in even register pairs. */
2971 && ARM_DOUBLEWORD_ALIGN
2972 && arm_needs_doubleword_align (mode, type))
2975 if (mode == VOIDmode)
2976 /* Pick an arbitrary value for operand 2 of the call insn. */
2979 /* Only allow splitting an arg between regs and memory if all preceding
2980 args were allocated to regs. For args passed by reference we only count
2981 the reference pointer. */
2982 if (pcum->can_split)
2985 nregs = ARM_NUM_REGS2 (mode, type);
2987 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2990 return gen_rtx_REG (mode, pcum->nregs);
2994 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2995 tree type, bool named ATTRIBUTE_UNUSED)
2997 int nregs = pcum->nregs;
2999 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3002 if (NUM_ARG_REGS > nregs
3003 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3005 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3010 /* Variable sized types are passed by reference. This is a GCC
3011 extension to the ARM ABI. */
3014 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3015 enum machine_mode mode ATTRIBUTE_UNUSED,
3016 const_tree type, bool named ATTRIBUTE_UNUSED)
3018 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3021 /* Encode the current state of the #pragma [no_]long_calls. */
3024 OFF, /* No #pragma [no_]long_calls is in effect. */
3025 LONG, /* #pragma long_calls is in effect. */
3026 SHORT /* #pragma no_long_calls is in effect. */
3029 static arm_pragma_enum arm_pragma_long_calls = OFF;
3032 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3034 arm_pragma_long_calls = LONG;
3038 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3040 arm_pragma_long_calls = SHORT;
3044 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3046 arm_pragma_long_calls = OFF;
3049 /* Table of machine attributes. */
3050 const struct attribute_spec arm_attribute_table[] =
3052 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3053 /* Function calls made to this symbol must be done indirectly, because
3054 it may lie outside of the 26 bit addressing range of a normal function
3056 { "long_call", 0, 0, false, true, true, NULL },
3057 /* Whereas these functions are always known to reside within the 26 bit
3058 addressing range. */
3059 { "short_call", 0, 0, false, true, true, NULL },
3060 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3061 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3062 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3063 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3065 /* ARM/PE has three new attributes:
3067 dllexport - for exporting a function/variable that will live in a dll
3068 dllimport - for importing a function/variable from a dll
3070 Microsoft allows multiple declspecs in one __declspec, separating
3071 them with spaces. We do NOT support this. Instead, use __declspec
3074 { "dllimport", 0, 0, true, false, false, NULL },
3075 { "dllexport", 0, 0, true, false, false, NULL },
3076 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3077 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3078 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3079 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3080 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3082 { NULL, 0, 0, false, false, false, NULL }
3085 /* Handle an attribute requiring a FUNCTION_DECL;
3086 arguments as in struct attribute_spec.handler. */
3088 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3089 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3091 if (TREE_CODE (*node) != FUNCTION_DECL)
3093 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3094 IDENTIFIER_POINTER (name));
3095 *no_add_attrs = true;
3101 /* Handle an "interrupt" or "isr" attribute;
3102 arguments as in struct attribute_spec.handler. */
3104 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3109 if (TREE_CODE (*node) != FUNCTION_DECL)
3111 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3112 IDENTIFIER_POINTER (name));
3113 *no_add_attrs = true;
3115 /* FIXME: the argument if any is checked for type attributes;
3116 should it be checked for decl ones? */
3120 if (TREE_CODE (*node) == FUNCTION_TYPE
3121 || TREE_CODE (*node) == METHOD_TYPE)
3123 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3125 warning (OPT_Wattributes, "%qs attribute ignored",
3126 IDENTIFIER_POINTER (name));
3127 *no_add_attrs = true;
3130 else if (TREE_CODE (*node) == POINTER_TYPE
3131 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3132 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3133 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3135 *node = build_variant_type_copy (*node);
3136 TREE_TYPE (*node) = build_type_attribute_variant
3138 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3139 *no_add_attrs = true;
3143 /* Possibly pass this attribute on from the type to a decl. */
3144 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3145 | (int) ATTR_FLAG_FUNCTION_NEXT
3146 | (int) ATTR_FLAG_ARRAY_NEXT))
3148 *no_add_attrs = true;
3149 return tree_cons (name, args, NULL_TREE);
3153 warning (OPT_Wattributes, "%qs attribute ignored",
3154 IDENTIFIER_POINTER (name));
3162 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3163 /* Handle the "notshared" attribute. This attribute is another way of
3164 requesting hidden visibility. ARM's compiler supports
3165 "__declspec(notshared)"; we support the same thing via an
3169 arm_handle_notshared_attribute (tree *node,
3170 tree name ATTRIBUTE_UNUSED,
3171 tree args ATTRIBUTE_UNUSED,
3172 int flags ATTRIBUTE_UNUSED,
3175 tree decl = TYPE_NAME (*node);
3179 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3180 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3181 *no_add_attrs = false;
3187 /* Return 0 if the attributes for two types are incompatible, 1 if they
3188 are compatible, and 2 if they are nearly compatible (which causes a
3189 warning to be generated). */
3191 arm_comp_type_attributes (const_tree type1, const_tree type2)
3195 /* Check for mismatch of non-default calling convention. */
3196 if (TREE_CODE (type1) != FUNCTION_TYPE)
3199 /* Check for mismatched call attributes. */
3200 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3201 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3202 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3203 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3205 /* Only bother to check if an attribute is defined. */
3206 if (l1 | l2 | s1 | s2)
3208 /* If one type has an attribute, the other must have the same attribute. */
3209 if ((l1 != l2) || (s1 != s2))
3212 /* Disallow mixed attributes. */
3213 if ((l1 & s2) || (l2 & s1))
3217 /* Check for mismatched ISR attribute. */
3218 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3220 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3221 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3223 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3230 /* Assigns default attributes to newly defined type. This is used to
3231 set short_call/long_call attributes for function types of
3232 functions defined inside corresponding #pragma scopes. */
3234 arm_set_default_type_attributes (tree type)
3236 /* Add __attribute__ ((long_call)) to all functions, when
3237 inside #pragma long_calls or __attribute__ ((short_call)),
3238 when inside #pragma no_long_calls. */
3239 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3241 tree type_attr_list, attr_name;
3242 type_attr_list = TYPE_ATTRIBUTES (type);
3244 if (arm_pragma_long_calls == LONG)
3245 attr_name = get_identifier ("long_call");
3246 else if (arm_pragma_long_calls == SHORT)
3247 attr_name = get_identifier ("short_call");
3251 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3252 TYPE_ATTRIBUTES (type) = type_attr_list;
3256 /* Return true if DECL is known to be linked into section SECTION. */
3259 arm_function_in_section_p (tree decl, section *section)
3261 /* We can only be certain about functions defined in the same
3262 compilation unit. */
3263 if (!TREE_STATIC (decl))
3266 /* Make sure that SYMBOL always binds to the definition in this
3267 compilation unit. */
3268 if (!targetm.binds_local_p (decl))
3271 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3272 if (!DECL_SECTION_NAME (decl))
3274 /* Make sure that we will not create a unique section for DECL. */
3275 if (flag_function_sections || DECL_ONE_ONLY (decl))
3279 return function_section (decl) == section;
3282 /* Return nonzero if a 32-bit "long_call" should be generated for
3283 a call from the current function to DECL. We generate a long_call
3286 a. has an __attribute__((long call))
3287 or b. is within the scope of a #pragma long_calls
3288 or c. the -mlong-calls command line switch has been specified
3290 However we do not generate a long call if the function:
3292 d. has an __attribute__ ((short_call))
3293 or e. is inside the scope of a #pragma no_long_calls
3294 or f. is defined in the same section as the current function. */
3297 arm_is_long_call_p (tree decl)
3302 return TARGET_LONG_CALLS;
3304 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3305 if (lookup_attribute ("short_call", attrs))
3308 /* For "f", be conservative, and only cater for cases in which the
3309 whole of the current function is placed in the same section. */
3310 if (!flag_reorder_blocks_and_partition
3311 && arm_function_in_section_p (decl, current_function_section ()))
3314 if (lookup_attribute ("long_call", attrs))
3317 return TARGET_LONG_CALLS;
3320 /* Return nonzero if it is ok to make a tail-call to DECL. */
3322 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3324 unsigned long func_type;
3326 if (cfun->machine->sibcall_blocked)
3329 /* Never tailcall something for which we have no decl, or if we
3330 are in Thumb mode. */
3331 if (decl == NULL || TARGET_THUMB)
3334 /* The PIC register is live on entry to VxWorks PLT entries, so we
3335 must make the call before restoring the PIC register. */
3336 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3339 /* Cannot tail-call to long calls, since these are out of range of
3340 a branch instruction. */
3341 if (arm_is_long_call_p (decl))
3344 /* If we are interworking and the function is not declared static
3345 then we can't tail-call it unless we know that it exists in this
3346 compilation unit (since it might be a Thumb routine). */
3347 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3350 func_type = arm_current_func_type ();
3351 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3352 if (IS_INTERRUPT (func_type))
3355 /* Never tailcall if function may be called with a misaligned SP. */
3356 if (IS_STACKALIGN (func_type))
3359 /* Everything else is ok. */
3364 /* Addressing mode support functions. */
3366 /* Return nonzero if X is a legitimate immediate operand when compiling
3367 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3369 legitimate_pic_operand_p (rtx x)
3371 if (GET_CODE (x) == SYMBOL_REF
3372 || (GET_CODE (x) == CONST
3373 && GET_CODE (XEXP (x, 0)) == PLUS
3374 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3380 /* Record that the current function needs a PIC register. Initialize
3381 cfun->machine->pic_reg if we have not already done so. */
3384 require_pic_register (void)
3386 /* A lot of the logic here is made obscure by the fact that this
3387 routine gets called as part of the rtx cost estimation process.
3388 We don't want those calls to affect any assumptions about the real
3389 function; and further, we can't call entry_of_function() until we
3390 start the real expansion process. */
3391 if (!crtl->uses_pic_offset_table)
3393 gcc_assert (can_create_pseudo_p ());
3394 if (arm_pic_register != INVALID_REGNUM)
3396 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3398 /* Play games to avoid marking the function as needing pic
3399 if we are being called as part of the cost-estimation
3401 if (current_ir_type () != IR_GIMPLE)
3402 crtl->uses_pic_offset_table = 1;
3408 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3410 /* Play games to avoid marking the function as needing pic
3411 if we are being called as part of the cost-estimation
3413 if (current_ir_type () != IR_GIMPLE)
3415 crtl->uses_pic_offset_table = 1;
3418 arm_load_pic_register (0UL);
3422 emit_insn_after (seq, entry_of_function ());
3429 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3431 if (GET_CODE (orig) == SYMBOL_REF
3432 || GET_CODE (orig) == LABEL_REF)
3434 rtx pic_ref, address;
3438 /* If this function doesn't have a pic register, create one now. */
3439 require_pic_register ();
3443 gcc_assert (can_create_pseudo_p ());
3444 reg = gen_reg_rtx (Pmode);
3450 address = gen_reg_rtx (Pmode);
3455 emit_insn (gen_pic_load_addr_arm (address, orig));
3456 else if (TARGET_THUMB2)
3457 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3458 else /* TARGET_THUMB1 */
3459 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3461 /* VxWorks does not impose a fixed gap between segments; the run-time
3462 gap can be different from the object-file gap. We therefore can't
3463 use GOTOFF unless we are absolutely sure that the symbol is in the
3464 same segment as the GOT. Unfortunately, the flexibility of linker
3465 scripts means that we can't be sure of that in general, so assume
3466 that GOTOFF is never valid on VxWorks. */
3467 if ((GET_CODE (orig) == LABEL_REF
3468 || (GET_CODE (orig) == SYMBOL_REF &&
3469 SYMBOL_REF_LOCAL_P (orig)))
3471 && !TARGET_VXWORKS_RTP)
3472 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3475 pic_ref = gen_const_mem (Pmode,
3476 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3480 insn = emit_move_insn (reg, pic_ref);
3482 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3484 set_unique_reg_note (insn, REG_EQUAL, orig);
3488 else if (GET_CODE (orig) == CONST)
3492 if (GET_CODE (XEXP (orig, 0)) == PLUS
3493 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3496 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3497 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3502 gcc_assert (can_create_pseudo_p ());
3503 reg = gen_reg_rtx (Pmode);
3506 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3508 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3509 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3510 base == reg ? 0 : reg);
3512 if (GET_CODE (offset) == CONST_INT)
3514 /* The base register doesn't really matter, we only want to
3515 test the index for the appropriate mode. */
3516 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3518 gcc_assert (can_create_pseudo_p ());
3519 offset = force_reg (Pmode, offset);
3522 if (GET_CODE (offset) == CONST_INT)
3523 return plus_constant (base, INTVAL (offset));
3526 if (GET_MODE_SIZE (mode) > 4
3527 && (GET_MODE_CLASS (mode) == MODE_INT
3528 || TARGET_SOFT_FLOAT))
3530 emit_insn (gen_addsi3 (reg, base, offset));
3534 return gen_rtx_PLUS (Pmode, base, offset);
3541 /* Find a spare register to use during the prolog of a function. */
3544 thumb_find_work_register (unsigned long pushed_regs_mask)
3548 /* Check the argument registers first as these are call-used. The
3549 register allocation order means that sometimes r3 might be used
3550 but earlier argument registers might not, so check them all. */
3551 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3552 if (!df_regs_ever_live_p (reg))
3555 /* Before going on to check the call-saved registers we can try a couple
3556 more ways of deducing that r3 is available. The first is when we are
3557 pushing anonymous arguments onto the stack and we have less than 4
3558 registers worth of fixed arguments(*). In this case r3 will be part of
3559 the variable argument list and so we can be sure that it will be
3560 pushed right at the start of the function. Hence it will be available
3561 for the rest of the prologue.
3562 (*): ie crtl->args.pretend_args_size is greater than 0. */
3563 if (cfun->machine->uses_anonymous_args
3564 && crtl->args.pretend_args_size > 0)
3565 return LAST_ARG_REGNUM;
3567 /* The other case is when we have fixed arguments but less than 4 registers
3568 worth. In this case r3 might be used in the body of the function, but
3569 it is not being used to convey an argument into the function. In theory
3570 we could just check crtl->args.size to see how many bytes are
3571 being passed in argument registers, but it seems that it is unreliable.
3572 Sometimes it will have the value 0 when in fact arguments are being
3573 passed. (See testcase execute/20021111-1.c for an example). So we also
3574 check the args_info.nregs field as well. The problem with this field is
3575 that it makes no allowances for arguments that are passed to the
3576 function but which are not used. Hence we could miss an opportunity
3577 when a function has an unused argument in r3. But it is better to be
3578 safe than to be sorry. */
3579 if (! cfun->machine->uses_anonymous_args
3580 && crtl->args.size >= 0
3581 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3582 && crtl->args.info.nregs < 4)
3583 return LAST_ARG_REGNUM;
3585 /* Otherwise look for a call-saved register that is going to be pushed. */
3586 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3587 if (pushed_regs_mask & (1 << reg))
3592 /* Thumb-2 can use high regs. */
3593 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3594 if (pushed_regs_mask & (1 << reg))
3597 /* Something went wrong - thumb_compute_save_reg_mask()
3598 should have arranged for a suitable register to be pushed. */
3602 static GTY(()) int pic_labelno;
3604 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3608 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3610 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3611 rtx global_offset_table;
3613 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3616 gcc_assert (flag_pic);
3618 pic_reg = cfun->machine->pic_reg;
3619 if (TARGET_VXWORKS_RTP)
3621 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3622 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3623 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3625 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3627 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3628 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3632 /* We use an UNSPEC rather than a LABEL_REF because this label
3633 never appears in the code stream. */
3635 labelno = GEN_INT (pic_labelno++);
3636 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3637 l1 = gen_rtx_CONST (VOIDmode, l1);
3640 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3641 /* On the ARM the PC register contains 'dot + 8' at the time of the
3642 addition, on the Thumb it is 'dot + 4'. */
3643 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3646 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3647 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3650 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3652 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3653 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3657 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3658 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3660 else if (TARGET_THUMB2)
3662 /* Thumb-2 only allows very limited access to the PC. Calculate the
3663 address in a temporary register. */
3664 if (arm_pic_register != INVALID_REGNUM)
3666 pic_tmp = gen_rtx_REG (SImode,
3667 thumb_find_work_register (saved_regs));
3671 gcc_assert (can_create_pseudo_p ());
3672 pic_tmp = gen_reg_rtx (Pmode);
3675 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3676 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3677 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3679 else /* TARGET_THUMB1 */
3681 if (arm_pic_register != INVALID_REGNUM
3682 && REGNO (pic_reg) > LAST_LO_REGNUM)
3684 /* We will have pushed the pic register, so we should always be
3685 able to find a work register. */
3686 pic_tmp = gen_rtx_REG (SImode,
3687 thumb_find_work_register (saved_regs));
3688 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3689 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3692 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3693 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3697 /* Need to emit this whether or not we obey regdecls,
3698 since setjmp/longjmp can cause life info to screw up. */
3703 /* Return nonzero if X is valid as an ARM state addressing register. */
3705 arm_address_register_rtx_p (rtx x, int strict_p)
3709 if (GET_CODE (x) != REG)
3715 return ARM_REGNO_OK_FOR_BASE_P (regno);
3717 return (regno <= LAST_ARM_REGNUM
3718 || regno >= FIRST_PSEUDO_REGISTER
3719 || regno == FRAME_POINTER_REGNUM
3720 || regno == ARG_POINTER_REGNUM);
3723 /* Return TRUE if this rtx is the difference of a symbol and a label,
3724 and will reduce to a PC-relative relocation in the object file.
3725 Expressions like this can be left alone when generating PIC, rather
3726 than forced through the GOT. */
3728 pcrel_constant_p (rtx x)
3730 if (GET_CODE (x) == MINUS)
3731 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3736 /* Return nonzero if X is a valid ARM state address operand. */
3738 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3742 enum rtx_code code = GET_CODE (x);
3744 if (arm_address_register_rtx_p (x, strict_p))
3747 use_ldrd = (TARGET_LDRD
3749 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3751 if (code == POST_INC || code == PRE_DEC
3752 || ((code == PRE_INC || code == POST_DEC)
3753 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3754 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3756 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3757 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3758 && GET_CODE (XEXP (x, 1)) == PLUS
3759 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3761 rtx addend = XEXP (XEXP (x, 1), 1);
3763 /* Don't allow ldrd post increment by register because it's hard
3764 to fixup invalid register choices. */
3766 && GET_CODE (x) == POST_MODIFY
3767 && GET_CODE (addend) == REG)
3770 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3771 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3774 /* After reload constants split into minipools will have addresses
3775 from a LABEL_REF. */
3776 else if (reload_completed
3777 && (code == LABEL_REF
3779 && GET_CODE (XEXP (x, 0)) == PLUS
3780 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3781 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3784 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3787 else if (code == PLUS)
3789 rtx xop0 = XEXP (x, 0);
3790 rtx xop1 = XEXP (x, 1);
3792 return ((arm_address_register_rtx_p (xop0, strict_p)
3793 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3794 || (arm_address_register_rtx_p (xop1, strict_p)
3795 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3799 /* Reload currently can't handle MINUS, so disable this for now */
3800 else if (GET_CODE (x) == MINUS)
3802 rtx xop0 = XEXP (x, 0);
3803 rtx xop1 = XEXP (x, 1);
3805 return (arm_address_register_rtx_p (xop0, strict_p)
3806 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3810 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3811 && code == SYMBOL_REF
3812 && CONSTANT_POOL_ADDRESS_P (x)
3814 && symbol_mentioned_p (get_pool_constant (x))
3815 && ! pcrel_constant_p (get_pool_constant (x))))
3821 /* Return nonzero if X is a valid Thumb-2 address operand. */
3823 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3826 enum rtx_code code = GET_CODE (x);
3828 if (arm_address_register_rtx_p (x, strict_p))
3831 use_ldrd = (TARGET_LDRD
3833 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3835 if (code == POST_INC || code == PRE_DEC
3836 || ((code == PRE_INC || code == POST_DEC)
3837 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3838 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3840 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3841 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3842 && GET_CODE (XEXP (x, 1)) == PLUS
3843 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3845 /* Thumb-2 only has autoincrement by constant. */
3846 rtx addend = XEXP (XEXP (x, 1), 1);
3847 HOST_WIDE_INT offset;
3849 if (GET_CODE (addend) != CONST_INT)
3852 offset = INTVAL(addend);
3853 if (GET_MODE_SIZE (mode) <= 4)
3854 return (offset > -256 && offset < 256);
3856 return (use_ldrd && offset > -1024 && offset < 1024
3857 && (offset & 3) == 0);
3860 /* After reload constants split into minipools will have addresses
3861 from a LABEL_REF. */
3862 else if (reload_completed
3863 && (code == LABEL_REF
3865 && GET_CODE (XEXP (x, 0)) == PLUS
3866 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3867 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3870 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3873 else if (code == PLUS)
3875 rtx xop0 = XEXP (x, 0);
3876 rtx xop1 = XEXP (x, 1);
3878 return ((arm_address_register_rtx_p (xop0, strict_p)
3879 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3880 || (arm_address_register_rtx_p (xop1, strict_p)
3881 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3884 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3885 && code == SYMBOL_REF
3886 && CONSTANT_POOL_ADDRESS_P (x)
3888 && symbol_mentioned_p (get_pool_constant (x))
3889 && ! pcrel_constant_p (get_pool_constant (x))))
3895 /* Return nonzero if INDEX is valid for an address index operand in
3898 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3901 HOST_WIDE_INT range;
3902 enum rtx_code code = GET_CODE (index);
3904 /* Standard coprocessor addressing modes. */
3905 if (TARGET_HARD_FLOAT
3906 && (TARGET_FPA || TARGET_MAVERICK)
3907 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3908 || (TARGET_MAVERICK && mode == DImode)))
3909 return (code == CONST_INT && INTVAL (index) < 1024
3910 && INTVAL (index) > -1024
3911 && (INTVAL (index) & 3) == 0);
3914 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3915 return (code == CONST_INT
3916 && INTVAL (index) < 1016
3917 && INTVAL (index) > -1024
3918 && (INTVAL (index) & 3) == 0);
3920 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3921 return (code == CONST_INT
3922 && INTVAL (index) < 1024
3923 && INTVAL (index) > -1024
3924 && (INTVAL (index) & 3) == 0);
3926 if (arm_address_register_rtx_p (index, strict_p)
3927 && (GET_MODE_SIZE (mode) <= 4))
3930 if (mode == DImode || mode == DFmode)
3932 if (code == CONST_INT)
3934 HOST_WIDE_INT val = INTVAL (index);
3937 return val > -256 && val < 256;
3939 return val > -4096 && val < 4092;
3942 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3945 if (GET_MODE_SIZE (mode) <= 4
3948 || (mode == QImode && outer == SIGN_EXTEND))))
3952 rtx xiop0 = XEXP (index, 0);
3953 rtx xiop1 = XEXP (index, 1);
3955 return ((arm_address_register_rtx_p (xiop0, strict_p)
3956 && power_of_two_operand (xiop1, SImode))
3957 || (arm_address_register_rtx_p (xiop1, strict_p)
3958 && power_of_two_operand (xiop0, SImode)));
3960 else if (code == LSHIFTRT || code == ASHIFTRT
3961 || code == ASHIFT || code == ROTATERT)
3963 rtx op = XEXP (index, 1);
3965 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3966 && GET_CODE (op) == CONST_INT
3968 && INTVAL (op) <= 31);
3972 /* For ARM v4 we may be doing a sign-extend operation during the
3976 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3982 range = (mode == HImode) ? 4095 : 4096;
3984 return (code == CONST_INT
3985 && INTVAL (index) < range
3986 && INTVAL (index) > -range);
3989 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3990 index operand. i.e. 1, 2, 4 or 8. */
3992 thumb2_index_mul_operand (rtx op)
3996 if (GET_CODE(op) != CONST_INT)
4000 return (val == 1 || val == 2 || val == 4 || val == 8);
4003 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4005 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4007 enum rtx_code code = GET_CODE (index);
4009 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4010 /* Standard coprocessor addressing modes. */
4011 if (TARGET_HARD_FLOAT
4012 && (TARGET_FPA || TARGET_MAVERICK)
4013 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4014 || (TARGET_MAVERICK && mode == DImode)))
4015 return (code == CONST_INT && INTVAL (index) < 1024
4016 && INTVAL (index) > -1024
4017 && (INTVAL (index) & 3) == 0);
4019 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4021 /* For DImode assume values will usually live in core regs
4022 and only allow LDRD addressing modes. */
4023 if (!TARGET_LDRD || mode != DImode)
4024 return (code == CONST_INT
4025 && INTVAL (index) < 1024
4026 && INTVAL (index) > -1024
4027 && (INTVAL (index) & 3) == 0);
4031 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4032 return (code == CONST_INT
4033 && INTVAL (index) < 1016
4034 && INTVAL (index) > -1024
4035 && (INTVAL (index) & 3) == 0);
4037 if (arm_address_register_rtx_p (index, strict_p)
4038 && (GET_MODE_SIZE (mode) <= 4))
4041 if (mode == DImode || mode == DFmode)
4043 HOST_WIDE_INT val = INTVAL (index);
4044 /* ??? Can we assume ldrd for thumb2? */
4045 /* Thumb-2 ldrd only has reg+const addressing modes. */
4046 if (code != CONST_INT)
4049 /* ldrd supports offsets of +-1020.
4050 However the ldr fallback does not. */
4051 return val > -256 && val < 256 && (val & 3) == 0;
4056 rtx xiop0 = XEXP (index, 0);
4057 rtx xiop1 = XEXP (index, 1);
4059 return ((arm_address_register_rtx_p (xiop0, strict_p)
4060 && thumb2_index_mul_operand (xiop1))
4061 || (arm_address_register_rtx_p (xiop1, strict_p)
4062 && thumb2_index_mul_operand (xiop0)));
4064 else if (code == ASHIFT)
4066 rtx op = XEXP (index, 1);
4068 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4069 && GET_CODE (op) == CONST_INT
4071 && INTVAL (op) <= 3);
4074 return (code == CONST_INT
4075 && INTVAL (index) < 4096
4076 && INTVAL (index) > -256);
4079 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4081 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4085 if (GET_CODE (x) != REG)
4091 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4093 return (regno <= LAST_LO_REGNUM
4094 || regno > LAST_VIRTUAL_REGISTER
4095 || regno == FRAME_POINTER_REGNUM
4096 || (GET_MODE_SIZE (mode) >= 4
4097 && (regno == STACK_POINTER_REGNUM
4098 || regno >= FIRST_PSEUDO_REGISTER
4099 || x == hard_frame_pointer_rtx
4100 || x == arg_pointer_rtx)));
4103 /* Return nonzero if x is a legitimate index register. This is the case
4104 for any base register that can access a QImode object. */
4106 thumb1_index_register_rtx_p (rtx x, int strict_p)
4108 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4111 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4113 The AP may be eliminated to either the SP or the FP, so we use the
4114 least common denominator, e.g. SImode, and offsets from 0 to 64.
4116 ??? Verify whether the above is the right approach.
4118 ??? Also, the FP may be eliminated to the SP, so perhaps that
4119 needs special handling also.
4121 ??? Look at how the mips16 port solves this problem. It probably uses
4122 better ways to solve some of these problems.
4124 Although it is not incorrect, we don't accept QImode and HImode
4125 addresses based on the frame pointer or arg pointer until the
4126 reload pass starts. This is so that eliminating such addresses
4127 into stack based ones won't produce impossible code. */
4129 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4131 /* ??? Not clear if this is right. Experiment. */
4132 if (GET_MODE_SIZE (mode) < 4
4133 && !(reload_in_progress || reload_completed)
4134 && (reg_mentioned_p (frame_pointer_rtx, x)
4135 || reg_mentioned_p (arg_pointer_rtx, x)
4136 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4137 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4138 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4139 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4142 /* Accept any base register. SP only in SImode or larger. */
4143 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4146 /* This is PC relative data before arm_reorg runs. */
4147 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4148 && GET_CODE (x) == SYMBOL_REF
4149 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4152 /* This is PC relative data after arm_reorg runs. */
4153 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4154 && (GET_CODE (x) == LABEL_REF
4155 || (GET_CODE (x) == CONST
4156 && GET_CODE (XEXP (x, 0)) == PLUS
4157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4158 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4161 /* Post-inc indexing only supported for SImode and larger. */
4162 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4163 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4166 else if (GET_CODE (x) == PLUS)
4168 /* REG+REG address can be any two index registers. */
4169 /* We disallow FRAME+REG addressing since we know that FRAME
4170 will be replaced with STACK, and SP relative addressing only
4171 permits SP+OFFSET. */
4172 if (GET_MODE_SIZE (mode) <= 4
4173 && XEXP (x, 0) != frame_pointer_rtx
4174 && XEXP (x, 1) != frame_pointer_rtx
4175 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4176 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4179 /* REG+const has 5-7 bit offset for non-SP registers. */
4180 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4181 || XEXP (x, 0) == arg_pointer_rtx)
4182 && GET_CODE (XEXP (x, 1)) == CONST_INT
4183 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4186 /* REG+const has 10-bit offset for SP, but only SImode and
4187 larger is supported. */
4188 /* ??? Should probably check for DI/DFmode overflow here
4189 just like GO_IF_LEGITIMATE_OFFSET does. */
4190 else if (GET_CODE (XEXP (x, 0)) == REG
4191 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4192 && GET_MODE_SIZE (mode) >= 4
4193 && GET_CODE (XEXP (x, 1)) == CONST_INT
4194 && INTVAL (XEXP (x, 1)) >= 0
4195 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4196 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4199 else if (GET_CODE (XEXP (x, 0)) == REG
4200 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4201 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4202 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4203 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4204 && GET_MODE_SIZE (mode) >= 4
4205 && GET_CODE (XEXP (x, 1)) == CONST_INT
4206 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4210 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4211 && GET_MODE_SIZE (mode) == 4
4212 && GET_CODE (x) == SYMBOL_REF
4213 && CONSTANT_POOL_ADDRESS_P (x)
4215 && symbol_mentioned_p (get_pool_constant (x))
4216 && ! pcrel_constant_p (get_pool_constant (x))))
4222 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4223 instruction of mode MODE. */
4225 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4227 switch (GET_MODE_SIZE (mode))
4230 return val >= 0 && val < 32;
4233 return val >= 0 && val < 64 && (val & 1) == 0;
4237 && (val + GET_MODE_SIZE (mode)) <= 128
4242 /* Build the SYMBOL_REF for __tls_get_addr. */
4244 static GTY(()) rtx tls_get_addr_libfunc;
4247 get_tls_get_addr (void)
4249 if (!tls_get_addr_libfunc)
4250 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4251 return tls_get_addr_libfunc;
4255 arm_load_tp (rtx target)
4258 target = gen_reg_rtx (SImode);
4262 /* Can return in any reg. */
4263 emit_insn (gen_load_tp_hard (target));
4267 /* Always returned in r0. Immediately copy the result into a pseudo,
4268 otherwise other uses of r0 (e.g. setting up function arguments) may
4269 clobber the value. */
4273 emit_insn (gen_load_tp_soft ());
4275 tmp = gen_rtx_REG (SImode, 0);
4276 emit_move_insn (target, tmp);
4282 load_tls_operand (rtx x, rtx reg)
4286 if (reg == NULL_RTX)
4287 reg = gen_reg_rtx (SImode);
4289 tmp = gen_rtx_CONST (SImode, x);
4291 emit_move_insn (reg, tmp);
4297 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4299 rtx insns, label, labelno, sum;
4303 labelno = GEN_INT (pic_labelno++);
4304 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4305 label = gen_rtx_CONST (VOIDmode, label);
4307 sum = gen_rtx_UNSPEC (Pmode,
4308 gen_rtvec (4, x, GEN_INT (reloc), label,
4309 GEN_INT (TARGET_ARM ? 8 : 4)),
4311 reg = load_tls_operand (sum, reg);
4314 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4315 else if (TARGET_THUMB2)
4318 /* Thumb-2 only allows very limited access to the PC. Calculate
4319 the address in a temporary register. */
4320 tmp = gen_reg_rtx (SImode);
4321 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4322 emit_insn (gen_addsi3(reg, reg, tmp));
4324 else /* TARGET_THUMB1 */
4325 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4327 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4328 Pmode, 1, reg, Pmode);
4330 insns = get_insns ();
4337 legitimize_tls_address (rtx x, rtx reg)
4339 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4340 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4344 case TLS_MODEL_GLOBAL_DYNAMIC:
4345 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4346 dest = gen_reg_rtx (Pmode);
4347 emit_libcall_block (insns, dest, ret, x);
4350 case TLS_MODEL_LOCAL_DYNAMIC:
4351 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4353 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4354 share the LDM result with other LD model accesses. */
4355 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4357 dest = gen_reg_rtx (Pmode);
4358 emit_libcall_block (insns, dest, ret, eqv);
4360 /* Load the addend. */
4361 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4363 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4364 return gen_rtx_PLUS (Pmode, dest, addend);
4366 case TLS_MODEL_INITIAL_EXEC:
4367 labelno = GEN_INT (pic_labelno++);
4368 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4369 label = gen_rtx_CONST (VOIDmode, label);
4370 sum = gen_rtx_UNSPEC (Pmode,
4371 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4372 GEN_INT (TARGET_ARM ? 8 : 4)),
4374 reg = load_tls_operand (sum, reg);
4377 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4378 else if (TARGET_THUMB2)
4381 /* Thumb-2 only allows very limited access to the PC. Calculate
4382 the address in a temporary register. */
4383 tmp = gen_reg_rtx (SImode);
4384 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4385 emit_insn (gen_addsi3(reg, reg, tmp));
4386 emit_move_insn (reg, gen_const_mem (SImode, reg));
4390 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4391 emit_move_insn (reg, gen_const_mem (SImode, reg));
4394 tp = arm_load_tp (NULL_RTX);
4396 return gen_rtx_PLUS (Pmode, tp, reg);
4398 case TLS_MODEL_LOCAL_EXEC:
4399 tp = arm_load_tp (NULL_RTX);
4401 reg = gen_rtx_UNSPEC (Pmode,
4402 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4404 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4406 return gen_rtx_PLUS (Pmode, tp, reg);
4413 /* Try machine-dependent ways of modifying an illegitimate address
4414 to be legitimate. If we find one, return the new, valid address. */
4416 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4418 if (arm_tls_symbol_p (x))
4419 return legitimize_tls_address (x, NULL_RTX);
4421 if (GET_CODE (x) == PLUS)
4423 rtx xop0 = XEXP (x, 0);
4424 rtx xop1 = XEXP (x, 1);
4426 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4427 xop0 = force_reg (SImode, xop0);
4429 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4430 xop1 = force_reg (SImode, xop1);
4432 if (ARM_BASE_REGISTER_RTX_P (xop0)
4433 && GET_CODE (xop1) == CONST_INT)
4435 HOST_WIDE_INT n, low_n;
4439 /* VFP addressing modes actually allow greater offsets, but for
4440 now we just stick with the lowest common denominator. */
4442 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4454 low_n = ((mode) == TImode ? 0
4455 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4459 base_reg = gen_reg_rtx (SImode);
4460 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4461 emit_move_insn (base_reg, val);
4462 x = plus_constant (base_reg, low_n);
4464 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4465 x = gen_rtx_PLUS (SImode, xop0, xop1);
4468 /* XXX We don't allow MINUS any more -- see comment in
4469 arm_legitimate_address_p (). */
4470 else if (GET_CODE (x) == MINUS)
4472 rtx xop0 = XEXP (x, 0);
4473 rtx xop1 = XEXP (x, 1);
4475 if (CONSTANT_P (xop0))
4476 xop0 = force_reg (SImode, xop0);
4478 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4479 xop1 = force_reg (SImode, xop1);
4481 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4482 x = gen_rtx_MINUS (SImode, xop0, xop1);
4485 /* Make sure to take full advantage of the pre-indexed addressing mode
4486 with absolute addresses which often allows for the base register to
4487 be factorized for multiple adjacent memory references, and it might
4488 even allows for the mini pool to be avoided entirely. */
4489 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4492 HOST_WIDE_INT mask, base, index;
4495 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4496 use a 8-bit index. So let's use a 12-bit index for SImode only and
4497 hope that arm_gen_constant will enable ldrb to use more bits. */
4498 bits = (mode == SImode) ? 12 : 8;
4499 mask = (1 << bits) - 1;
4500 base = INTVAL (x) & ~mask;
4501 index = INTVAL (x) & mask;
4502 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4504 /* It'll most probably be more efficient to generate the base
4505 with more bits set and use a negative index instead. */
4509 base_reg = force_reg (SImode, GEN_INT (base));
4510 x = plus_constant (base_reg, index);
4515 /* We need to find and carefully transform any SYMBOL and LABEL
4516 references; so go back to the original address expression. */
4517 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4519 if (new_x != orig_x)
4527 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4528 to be legitimate. If we find one, return the new, valid address. */
4530 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4532 if (arm_tls_symbol_p (x))
4533 return legitimize_tls_address (x, NULL_RTX);
4535 if (GET_CODE (x) == PLUS
4536 && GET_CODE (XEXP (x, 1)) == CONST_INT
4537 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4538 || INTVAL (XEXP (x, 1)) < 0))
4540 rtx xop0 = XEXP (x, 0);
4541 rtx xop1 = XEXP (x, 1);
4542 HOST_WIDE_INT offset = INTVAL (xop1);
4544 /* Try and fold the offset into a biasing of the base register and
4545 then offsetting that. Don't do this when optimizing for space
4546 since it can cause too many CSEs. */
4547 if (optimize_size && offset >= 0
4548 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4550 HOST_WIDE_INT delta;
4553 delta = offset - (256 - GET_MODE_SIZE (mode));
4554 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4555 delta = 31 * GET_MODE_SIZE (mode);
4557 delta = offset & (~31 * GET_MODE_SIZE (mode));
4559 xop0 = force_operand (plus_constant (xop0, offset - delta),
4561 x = plus_constant (xop0, delta);
4563 else if (offset < 0 && offset > -256)
4564 /* Small negative offsets are best done with a subtract before the
4565 dereference, forcing these into a register normally takes two
4567 x = force_operand (x, NULL_RTX);
4570 /* For the remaining cases, force the constant into a register. */
4571 xop1 = force_reg (SImode, xop1);
4572 x = gen_rtx_PLUS (SImode, xop0, xop1);
4575 else if (GET_CODE (x) == PLUS
4576 && s_register_operand (XEXP (x, 1), SImode)
4577 && !s_register_operand (XEXP (x, 0), SImode))
4579 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4581 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4586 /* We need to find and carefully transform any SYMBOL and LABEL
4587 references; so go back to the original address expression. */
4588 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4590 if (new_x != orig_x)
4598 thumb_legitimize_reload_address (rtx *x_p,
4599 enum machine_mode mode,
4600 int opnum, int type,
4601 int ind_levels ATTRIBUTE_UNUSED)
4605 if (GET_CODE (x) == PLUS
4606 && GET_MODE_SIZE (mode) < 4
4607 && REG_P (XEXP (x, 0))
4608 && XEXP (x, 0) == stack_pointer_rtx
4609 && GET_CODE (XEXP (x, 1)) == CONST_INT
4610 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4615 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4616 Pmode, VOIDmode, 0, 0, opnum, type);
4620 /* If both registers are hi-regs, then it's better to reload the
4621 entire expression rather than each register individually. That
4622 only requires one reload register rather than two. */
4623 if (GET_CODE (x) == PLUS
4624 && REG_P (XEXP (x, 0))
4625 && REG_P (XEXP (x, 1))
4626 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4627 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4632 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4633 Pmode, VOIDmode, 0, 0, opnum, type);
4640 /* Test for various thread-local symbols. */
4642 /* Return TRUE if X is a thread-local symbol. */
4645 arm_tls_symbol_p (rtx x)
4647 if (! TARGET_HAVE_TLS)
4650 if (GET_CODE (x) != SYMBOL_REF)
4653 return SYMBOL_REF_TLS_MODEL (x) != 0;
4656 /* Helper for arm_tls_referenced_p. */
4659 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4661 if (GET_CODE (*x) == SYMBOL_REF)
4662 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4664 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4665 TLS offsets, not real symbol references. */
4666 if (GET_CODE (*x) == UNSPEC
4667 && XINT (*x, 1) == UNSPEC_TLS)
4673 /* Return TRUE if X contains any TLS symbol references. */
4676 arm_tls_referenced_p (rtx x)
4678 if (! TARGET_HAVE_TLS)
4681 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4687 arm_cannot_force_const_mem (rtx x)
4691 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4693 split_const (x, &base, &offset);
4694 if (GET_CODE (base) == SYMBOL_REF
4695 && !offset_within_block_p (base, INTVAL (offset)))
4698 return arm_tls_referenced_p (x);
4701 #define REG_OR_SUBREG_REG(X) \
4702 (GET_CODE (X) == REG \
4703 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4705 #define REG_OR_SUBREG_RTX(X) \
4706 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4708 #ifndef COSTS_N_INSNS
4709 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4712 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4714 enum machine_mode mode = GET_MODE (x);
4727 return COSTS_N_INSNS (1);
4730 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4733 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4740 return COSTS_N_INSNS (2) + cycles;
4742 return COSTS_N_INSNS (1) + 16;
4745 return (COSTS_N_INSNS (1)
4746 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4747 + GET_CODE (SET_DEST (x)) == MEM));
4752 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4754 if (thumb_shiftable_const (INTVAL (x)))
4755 return COSTS_N_INSNS (2);
4756 return COSTS_N_INSNS (3);
4758 else if ((outer == PLUS || outer == COMPARE)
4759 && INTVAL (x) < 256 && INTVAL (x) > -256)
4761 else if (outer == AND
4762 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4763 return COSTS_N_INSNS (1);
4764 else if (outer == ASHIFT || outer == ASHIFTRT
4765 || outer == LSHIFTRT)
4767 return COSTS_N_INSNS (2);
4773 return COSTS_N_INSNS (3);
4791 /* XXX another guess. */
4792 /* Memory costs quite a lot for the first word, but subsequent words
4793 load at the equivalent of a single insn each. */
4794 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4795 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4800 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4805 /* XXX still guessing. */
4806 switch (GET_MODE (XEXP (x, 0)))
4809 return (1 + (mode == DImode ? 4 : 0)
4810 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4813 return (4 + (mode == DImode ? 4 : 0)
4814 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4817 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4829 /* Worker routine for arm_rtx_costs. */
4830 /* ??? This needs updating for thumb2. */
4832 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4834 enum machine_mode mode = GET_MODE (x);
4835 enum rtx_code subcode;
4841 /* Memory costs quite a lot for the first word, but subsequent words
4842 load at the equivalent of a single insn each. */
4843 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4844 + (GET_CODE (x) == SYMBOL_REF
4845 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4851 return optimize_size ? COSTS_N_INSNS (2) : 100;
4854 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4861 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4863 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4864 + ((GET_CODE (XEXP (x, 0)) == REG
4865 || (GET_CODE (XEXP (x, 0)) == SUBREG
4866 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4868 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4869 || (GET_CODE (XEXP (x, 0)) == SUBREG
4870 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4872 + ((GET_CODE (XEXP (x, 1)) == REG
4873 || (GET_CODE (XEXP (x, 1)) == SUBREG
4874 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4875 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4879 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4881 extra_cost = rtx_cost (XEXP (x, 1), code);
4882 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4883 extra_cost += 4 * ARM_NUM_REGS (mode);
4888 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4889 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4890 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4891 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4894 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4895 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4896 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4897 && arm_const_double_rtx (XEXP (x, 1))))
4899 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4900 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4901 && arm_const_double_rtx (XEXP (x, 0))))
4904 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4905 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4906 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4907 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4908 || subcode == ASHIFTRT || subcode == LSHIFTRT
4909 || subcode == ROTATE || subcode == ROTATERT
4911 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4912 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4913 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4914 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4915 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4916 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4917 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4922 if (arm_arch6 && mode == SImode
4923 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4924 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4925 return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
4926 + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
4928 if (GET_CODE (XEXP (x, 0)) == MULT)
4930 extra_cost = rtx_cost (XEXP (x, 0), code);
4931 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4932 extra_cost += 4 * ARM_NUM_REGS (mode);
4936 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4937 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4938 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4939 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4940 && arm_const_double_rtx (XEXP (x, 1))))
4944 case AND: case XOR: case IOR:
4947 /* Normally the frame registers will be spilt into reg+const during
4948 reload, so it is a bad idea to combine them with other instructions,
4949 since then they might not be moved outside of loops. As a compromise
4950 we allow integration with ops that have a constant as their second
4952 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4953 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4954 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4955 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4956 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4960 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4961 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4962 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4963 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4966 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4967 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4968 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4969 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4970 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4973 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4974 return (1 + extra_cost
4975 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4976 || subcode == LSHIFTRT || subcode == ASHIFTRT
4977 || subcode == ROTATE || subcode == ROTATERT
4979 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4980 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4981 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4982 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4983 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4984 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4990 /* This should have been handled by the CPU specific routines. */
4994 if (arm_arch3m && mode == SImode
4995 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4996 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4997 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4998 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4999 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5000 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5005 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5006 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5010 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5012 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5015 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5023 return 4 + (mode == DImode ? 4 : 0);
5026 if (arm_arch_thumb2 && mode == SImode)
5027 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5029 if (GET_MODE (XEXP (x, 0)) == QImode)
5030 return (4 + (mode == DImode ? 4 : 0)
5031 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5034 if (arm_arch6 && mode == SImode)
5035 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5037 switch (GET_MODE (XEXP (x, 0)))
5040 return (1 + (mode == DImode ? 4 : 0)
5041 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5044 return (4 + (mode == DImode ? 4 : 0)
5045 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5048 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5063 if (const_ok_for_arm (INTVAL (x)))
5064 return outer == SET ? 2 : -1;
5065 else if (outer == AND
5066 && const_ok_for_arm (~INTVAL (x)))
5068 else if ((outer == COMPARE
5069 || outer == PLUS || outer == MINUS)
5070 && const_ok_for_arm (-INTVAL (x)))
5081 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5082 return outer == SET ? 2 : -1;
5083 else if ((outer == COMPARE || outer == PLUS)
5084 && neg_const_double_rtx_ok_for_fpa (x))
5093 /* RTX costs when optimizing for size. */
5095 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5097 enum machine_mode mode = GET_MODE (x);
5101 /* XXX TBD. For now, use the standard costs. */
5102 *total = thumb1_rtx_costs (x, code, outer_code);
5106 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5110 /* A memory access costs 1 insn if the mode is small, or the address is
5111 a single register, otherwise it costs one insn per word. */
5112 if (REG_P (XEXP (x, 0)))
5113 *total = COSTS_N_INSNS (1);
5115 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5122 /* Needs a libcall, so it costs about this. */
5123 *total = COSTS_N_INSNS (2);
5127 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5129 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5137 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5139 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5142 else if (mode == SImode)
5144 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5145 /* Slightly disparage register shifts, but not by much. */
5146 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5147 *total += 1 + rtx_cost (XEXP (x, 1), code);
5151 /* Needs a libcall. */
5152 *total = COSTS_N_INSNS (2);
5156 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5158 *total = COSTS_N_INSNS (1);
5164 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5165 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5167 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5168 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5169 || subcode1 == ROTATE || subcode1 == ROTATERT
5170 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5171 || subcode1 == ASHIFTRT)
5173 /* It's just the cost of the two operands. */
5178 *total = COSTS_N_INSNS (1);
5182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5186 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5188 *total = COSTS_N_INSNS (1);
5193 case AND: case XOR: case IOR:
5196 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5198 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5199 || subcode == LSHIFTRT || subcode == ASHIFTRT
5200 || (code == AND && subcode == NOT))
5202 /* It's just the cost of the two operands. */
5208 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5212 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5216 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5217 *total = COSTS_N_INSNS (1);
5220 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5229 if (cc_register (XEXP (x, 0), VOIDmode))
5232 *total = COSTS_N_INSNS (1);
5236 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5237 *total = COSTS_N_INSNS (1);
5239 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5244 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5246 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5247 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5250 *total += COSTS_N_INSNS (1);
5255 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5257 switch (GET_MODE (XEXP (x, 0)))
5260 *total += COSTS_N_INSNS (1);
5264 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5270 *total += COSTS_N_INSNS (2);
5275 *total += COSTS_N_INSNS (1);
5280 if (const_ok_for_arm (INTVAL (x)))
5281 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5282 else if (const_ok_for_arm (~INTVAL (x)))
5283 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5284 else if (const_ok_for_arm (-INTVAL (x)))
5286 if (outer_code == COMPARE || outer_code == PLUS
5287 || outer_code == MINUS)
5290 *total = COSTS_N_INSNS (1);
5293 *total = COSTS_N_INSNS (2);
5299 *total = COSTS_N_INSNS (2);
5303 *total = COSTS_N_INSNS (4);
5307 if (mode != VOIDmode)
5308 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5310 *total = COSTS_N_INSNS (4); /* How knows? */
5315 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5316 supported on any "slowmul" cores, so it can be ignored. */
5319 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5321 enum machine_mode mode = GET_MODE (x);
5325 *total = thumb1_rtx_costs (x, code, outer_code);
5332 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5339 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5341 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5342 & (unsigned HOST_WIDE_INT) 0xffffffff);
5343 int cost, const_ok = const_ok_for_arm (i);
5344 int j, booth_unit_size;
5346 /* Tune as appropriate. */
5347 cost = const_ok ? 4 : 8;
5348 booth_unit_size = 2;
5349 for (j = 0; i && j < 32; j += booth_unit_size)
5351 i >>= booth_unit_size;
5359 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5360 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5364 *total = arm_rtx_costs_1 (x, code, outer_code);
5370 /* RTX cost for cores with a fast multiply unit (M variants). */
5373 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5375 enum machine_mode mode = GET_MODE (x);
5379 *total = thumb1_rtx_costs (x, code, outer_code);
5383 /* ??? should thumb2 use different costs? */
5387 /* There is no point basing this on the tuning, since it is always the
5388 fast variant if it exists at all. */
5390 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5391 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5392 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5399 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5406 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5408 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5409 & (unsigned HOST_WIDE_INT) 0xffffffff);
5410 int cost, const_ok = const_ok_for_arm (i);
5411 int j, booth_unit_size;
5413 /* Tune as appropriate. */
5414 cost = const_ok ? 4 : 8;
5415 booth_unit_size = 8;
5416 for (j = 0; i && j < 32; j += booth_unit_size)
5418 i >>= booth_unit_size;
5426 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5427 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5431 *total = arm_rtx_costs_1 (x, code, outer_code);
5437 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5438 so it can be ignored. */
5441 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5443 enum machine_mode mode = GET_MODE (x);
5447 *total = thumb1_rtx_costs (x, code, outer_code);
5454 /* There is no point basing this on the tuning, since it is always the
5455 fast variant if it exists at all. */
5457 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5458 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5459 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5466 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5473 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5475 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5476 & (unsigned HOST_WIDE_INT) 0xffffffff);
5477 int cost, const_ok = const_ok_for_arm (i);
5478 unsigned HOST_WIDE_INT masked_const;
5480 /* The cost will be related to two insns.
5481 First a load of the constant (MOV or LDR), then a multiply. */
5484 cost += 1; /* LDR is probably more expensive because
5485 of longer result latency. */
5486 masked_const = i & 0xffff8000;
5487 if (masked_const != 0 && masked_const != 0xffff8000)
5489 masked_const = i & 0xf8000000;
5490 if (masked_const == 0 || masked_const == 0xf8000000)
5499 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5500 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5504 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5505 will stall until the multiplication is complete. */
5506 if (GET_CODE (XEXP (x, 0)) == MULT)
5507 *total = 4 + rtx_cost (XEXP (x, 0), code);
5509 *total = arm_rtx_costs_1 (x, code, outer_code);
5513 *total = arm_rtx_costs_1 (x, code, outer_code);
5519 /* RTX costs for 9e (and later) cores. */
5522 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5524 enum machine_mode mode = GET_MODE (x);
5533 *total = COSTS_N_INSNS (3);
5537 *total = thumb1_rtx_costs (x, code, outer_code);
5545 /* There is no point basing this on the tuning, since it is always the
5546 fast variant if it exists at all. */
5548 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5549 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5550 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5574 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5575 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5579 *total = arm_rtx_costs_1 (x, code, outer_code);
5583 /* All address computations that can be done are free, but rtx cost returns
5584 the same for practically all of them. So we weight the different types
5585 of address here in the order (most pref first):
5586 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5588 arm_arm_address_cost (rtx x)
5590 enum rtx_code c = GET_CODE (x);
5592 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5594 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5597 if (c == PLUS || c == MINUS)
5599 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5602 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5612 arm_thumb_address_cost (rtx x)
5614 enum rtx_code c = GET_CODE (x);
5619 && GET_CODE (XEXP (x, 0)) == REG
5620 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5627 arm_address_cost (rtx x)
5629 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5633 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5637 /* Some true dependencies can have a higher cost depending
5638 on precisely how certain input operands are used. */
5640 && REG_NOTE_KIND (link) == 0
5641 && recog_memoized (insn) >= 0
5642 && recog_memoized (dep) >= 0)
5644 int shift_opnum = get_attr_shift (insn);
5645 enum attr_type attr_type = get_attr_type (dep);
5647 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5648 operand for INSN. If we have a shifted input operand and the
5649 instruction we depend on is another ALU instruction, then we may
5650 have to account for an additional stall. */
5651 if (shift_opnum != 0
5652 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5654 rtx shifted_operand;
5657 /* Get the shifted operand. */
5658 extract_insn (insn);
5659 shifted_operand = recog_data.operand[shift_opnum];
5661 /* Iterate over all the operands in DEP. If we write an operand
5662 that overlaps with SHIFTED_OPERAND, then we have increase the
5663 cost of this dependency. */
5665 preprocess_constraints ();
5666 for (opno = 0; opno < recog_data.n_operands; opno++)
5668 /* We can ignore strict inputs. */
5669 if (recog_data.operand_type[opno] == OP_IN)
5672 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5679 /* XXX This is not strictly true for the FPA. */
5680 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5681 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5684 /* Call insns don't incur a stall, even if they follow a load. */
5685 if (REG_NOTE_KIND (link) == 0
5686 && GET_CODE (insn) == CALL_INSN)
5689 if ((i_pat = single_set (insn)) != NULL
5690 && GET_CODE (SET_SRC (i_pat)) == MEM
5691 && (d_pat = single_set (dep)) != NULL
5692 && GET_CODE (SET_DEST (d_pat)) == MEM)
5694 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5695 /* This is a load after a store, there is no conflict if the load reads
5696 from a cached area. Assume that loads from the stack, and from the
5697 constant pool are cached, and that others will miss. This is a
5700 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5701 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5702 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5703 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5710 static int fp_consts_inited = 0;
5712 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5713 static const char * const strings_fp[8] =
5716 "4", "5", "0.5", "10"
5719 static REAL_VALUE_TYPE values_fp[8];
5722 init_fp_table (void)
5728 fp_consts_inited = 1;
5730 fp_consts_inited = 8;
5732 for (i = 0; i < fp_consts_inited; i++)
5734 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5739 /* Return TRUE if rtx X is a valid immediate FP constant. */
5741 arm_const_double_rtx (rtx x)
5746 if (!fp_consts_inited)
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5750 if (REAL_VALUE_MINUS_ZERO (r))
5753 for (i = 0; i < fp_consts_inited; i++)
5754 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5760 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5762 neg_const_double_rtx_ok_for_fpa (rtx x)
5767 if (!fp_consts_inited)
5770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5771 r = REAL_VALUE_NEGATE (r);
5772 if (REAL_VALUE_MINUS_ZERO (r))
5775 for (i = 0; i < 8; i++)
5776 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5783 /* VFPv3 has a fairly wide range of representable immediates, formed from
5784 "quarter-precision" floating-point values. These can be evaluated using this
5785 formula (with ^ for exponentiation):
5789 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5790 16 <= n <= 31 and 0 <= r <= 7.
5792 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5794 - A (most-significant) is the sign bit.
5795 - BCD are the exponent (encoded as r XOR 3).
5796 - EFGH are the mantissa (encoded as n - 16).
5799 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5800 fconst[sd] instruction, or -1 if X isn't suitable. */
5802 vfp3_const_double_index (rtx x)
5804 REAL_VALUE_TYPE r, m;
5806 unsigned HOST_WIDE_INT mantissa, mant_hi;
5807 unsigned HOST_WIDE_INT mask;
5808 HOST_WIDE_INT m1, m2;
5809 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5811 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5814 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5816 /* We can't represent these things, so detect them first. */
5817 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5820 /* Extract sign, exponent and mantissa. */
5821 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5822 r = REAL_VALUE_ABS (r);
5823 exponent = REAL_EXP (&r);
5824 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5825 highest (sign) bit, with a fixed binary point at bit point_pos.
5826 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5827 bits for the mantissa, this may fail (low bits would be lost). */
5828 real_ldexp (&m, &r, point_pos - exponent);
5829 REAL_VALUE_TO_INT (&m1, &m2, m);
5833 /* If there are bits set in the low part of the mantissa, we can't
5834 represent this value. */
5838 /* Now make it so that mantissa contains the most-significant bits, and move
5839 the point_pos to indicate that the least-significant bits have been
5841 point_pos -= HOST_BITS_PER_WIDE_INT;
5844 /* We can permit four significant bits of mantissa only, plus a high bit
5845 which is always 1. */
5846 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5847 if ((mantissa & mask) != 0)
5850 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5851 mantissa >>= point_pos - 5;
5853 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5854 floating-point immediate zero with Neon using an integer-zero load, but
5855 that case is handled elsewhere.) */
5859 gcc_assert (mantissa >= 16 && mantissa <= 31);
5861 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5862 normalized significands are in the range [1, 2). (Our mantissa is shifted
5863 left 4 places at this point relative to normalized IEEE754 values). GCC
5864 internally uses [0.5, 1) (see real.c), so the exponent returned from
5865 REAL_EXP must be altered. */
5866 exponent = 5 - exponent;
5868 if (exponent < 0 || exponent > 7)
5871 /* Sign, mantissa and exponent are now in the correct form to plug into the
5872 formula described in the comment above. */
5873 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5876 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5878 vfp3_const_double_rtx (rtx x)
5883 return vfp3_const_double_index (x) != -1;
5886 /* Recognize immediates which can be used in various Neon instructions. Legal
5887 immediates are described by the following table (for VMVN variants, the
5888 bitwise inverse of the constant shown is recognized. In either case, VMOV
5889 is output and the correct instruction to use for a given constant is chosen
5890 by the assembler). The constant shown is replicated across all elements of
5891 the destination vector.
5893 insn elems variant constant (binary)
5894 ---- ----- ------- -----------------
5895 vmov i32 0 00000000 00000000 00000000 abcdefgh
5896 vmov i32 1 00000000 00000000 abcdefgh 00000000
5897 vmov i32 2 00000000 abcdefgh 00000000 00000000
5898 vmov i32 3 abcdefgh 00000000 00000000 00000000
5899 vmov i16 4 00000000 abcdefgh
5900 vmov i16 5 abcdefgh 00000000
5901 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5902 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5903 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5904 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5905 vmvn i16 10 00000000 abcdefgh
5906 vmvn i16 11 abcdefgh 00000000
5907 vmov i32 12 00000000 00000000 abcdefgh 11111111
5908 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5909 vmov i32 14 00000000 abcdefgh 11111111 11111111
5910 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5912 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5913 eeeeeeee ffffffff gggggggg hhhhhhhh
5914 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5916 For case 18, B = !b. Representable values are exactly those accepted by
5917 vfp3_const_double_index, but are output as floating-point numbers rather
5920 Variants 0-5 (inclusive) may also be used as immediates for the second
5921 operand of VORR/VBIC instructions.
5923 The INVERSE argument causes the bitwise inverse of the given operand to be
5924 recognized instead (used for recognizing legal immediates for the VAND/VORN
5925 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5926 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5927 output, rather than the real insns vbic/vorr).
5929 INVERSE makes no difference to the recognition of float vectors.
5931 The return value is the variant of immediate as shown in the above table, or
5932 -1 if the given value doesn't match any of the listed patterns.
5935 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5936 rtx *modconst, int *elementwidth)
5938 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5940 for (i = 0; i < idx; i += (STRIDE)) \
5945 immtype = (CLASS); \
5946 elsize = (ELSIZE); \
5950 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5951 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5952 unsigned char bytes[16];
5953 int immtype = -1, matches;
5954 unsigned int invmask = inverse ? 0xff : 0;
5956 /* Vectors of float constants. */
5957 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5959 rtx el0 = CONST_VECTOR_ELT (op, 0);
5962 if (!vfp3_const_double_rtx (el0))
5965 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5967 for (i = 1; i < n_elts; i++)
5969 rtx elt = CONST_VECTOR_ELT (op, i);
5972 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5974 if (!REAL_VALUES_EQUAL (r0, re))
5979 *modconst = CONST_VECTOR_ELT (op, 0);
5987 /* Splat vector constant out into a byte vector. */
5988 for (i = 0; i < n_elts; i++)
5990 rtx el = CONST_VECTOR_ELT (op, i);
5991 unsigned HOST_WIDE_INT elpart;
5992 unsigned int part, parts;
5994 if (GET_CODE (el) == CONST_INT)
5996 elpart = INTVAL (el);
5999 else if (GET_CODE (el) == CONST_DOUBLE)
6001 elpart = CONST_DOUBLE_LOW (el);
6007 for (part = 0; part < parts; part++)
6010 for (byte = 0; byte < innersize; byte++)
6012 bytes[idx++] = (elpart & 0xff) ^ invmask;
6013 elpart >>= BITS_PER_UNIT;
6015 if (GET_CODE (el) == CONST_DOUBLE)
6016 elpart = CONST_DOUBLE_HIGH (el);
6021 gcc_assert (idx == GET_MODE_SIZE (mode));
6025 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6026 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6028 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6029 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6031 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6034 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6035 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6037 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6039 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6041 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6042 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6044 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6045 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6047 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6048 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6050 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6051 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6053 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6055 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6057 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6058 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6060 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6061 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6063 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6064 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6066 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6069 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6071 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6072 && bytes[i] == bytes[(i + 8) % idx]);
6080 *elementwidth = elsize;
6084 unsigned HOST_WIDE_INT imm = 0;
6086 /* Un-invert bytes of recognized vector, if necessary. */
6088 for (i = 0; i < idx; i++)
6089 bytes[i] ^= invmask;
6093 /* FIXME: Broken on 32-bit H_W_I hosts. */
6094 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6096 for (i = 0; i < 8; i++)
6097 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6098 << (i * BITS_PER_UNIT);
6100 *modconst = GEN_INT (imm);
6104 unsigned HOST_WIDE_INT imm = 0;
6106 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6107 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6109 *modconst = GEN_INT (imm);
6117 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6118 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6119 float elements), and a modified constant (whatever should be output for a
6120 VMOV) in *MODCONST. */
6123 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6124 rtx *modconst, int *elementwidth)
6128 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6134 *modconst = tmpconst;
6137 *elementwidth = tmpwidth;
6142 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6143 the immediate is valid, write a constant suitable for using as an operand
6144 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6145 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6148 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6149 rtx *modconst, int *elementwidth)
6153 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6155 if (retval < 0 || retval > 5)
6159 *modconst = tmpconst;
6162 *elementwidth = tmpwidth;
6167 /* Return a string suitable for output of Neon immediate logic operation
6171 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6172 int inverse, int quad)
6174 int width, is_valid;
6175 static char templ[40];
6177 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6179 gcc_assert (is_valid != 0);
6182 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6184 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6189 /* Output a sequence of pairwise operations to implement a reduction.
6190 NOTE: We do "too much work" here, because pairwise operations work on two
6191 registers-worth of operands in one go. Unfortunately we can't exploit those
6192 extra calculations to do the full operation in fewer steps, I don't think.
6193 Although all vector elements of the result but the first are ignored, we
6194 actually calculate the same result in each of the elements. An alternative
6195 such as initially loading a vector with zero to use as each of the second
6196 operands would use up an additional register and take an extra instruction,
6197 for no particular gain. */
6200 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6201 rtx (*reduc) (rtx, rtx, rtx))
6203 enum machine_mode inner = GET_MODE_INNER (mode);
6204 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6207 for (i = parts / 2; i >= 1; i /= 2)
6209 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6210 emit_insn (reduc (dest, tmpsum, tmpsum));
6215 /* Initialize a vector with non-constant elements. FIXME: We can do better
6216 than the current implementation (building a vector on the stack and then
6217 loading it) in many cases. See rs6000.c. */
6220 neon_expand_vector_init (rtx target, rtx vals)
6222 enum machine_mode mode = GET_MODE (target);
6223 enum machine_mode inner = GET_MODE_INNER (mode);
6224 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6227 gcc_assert (VECTOR_MODE_P (mode));
6229 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6230 for (i = 0; i < n_elts; i++)
6231 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6232 XVECEXP (vals, 0, i));
6234 emit_move_insn (target, mem);
6237 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6238 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6239 reported source locations are bogus. */
6242 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6247 gcc_assert (GET_CODE (operand) == CONST_INT);
6249 lane = INTVAL (operand);
6251 if (lane < low || lane >= high)
6255 /* Bounds-check lanes. */
6258 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6260 bounds_check (operand, low, high, "lane out of range");
6263 /* Bounds-check constants. */
6266 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6268 bounds_check (operand, low, high, "constant out of range");
6272 neon_element_bits (enum machine_mode mode)
6275 return GET_MODE_BITSIZE (mode);
6277 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6281 /* Predicates for `match_operand' and `match_operator'. */
6283 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6285 cirrus_memory_offset (rtx op)
6287 /* Reject eliminable registers. */
6288 if (! (reload_in_progress || reload_completed)
6289 && ( reg_mentioned_p (frame_pointer_rtx, op)
6290 || reg_mentioned_p (arg_pointer_rtx, op)
6291 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6292 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6293 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6294 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6297 if (GET_CODE (op) == MEM)
6303 /* Match: (mem (reg)). */
6304 if (GET_CODE (ind) == REG)
6310 if (GET_CODE (ind) == PLUS
6311 && GET_CODE (XEXP (ind, 0)) == REG
6312 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6313 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6320 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6321 WB is true if full writeback address modes are allowed and is false
6322 if limited writeback address modes (POST_INC and PRE_DEC) are
6326 arm_coproc_mem_operand (rtx op, bool wb)
6330 /* Reject eliminable registers. */
6331 if (! (reload_in_progress || reload_completed)
6332 && ( reg_mentioned_p (frame_pointer_rtx, op)
6333 || reg_mentioned_p (arg_pointer_rtx, op)
6334 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6335 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6336 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6337 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6340 /* Constants are converted into offsets from labels. */
6341 if (GET_CODE (op) != MEM)
6346 if (reload_completed
6347 && (GET_CODE (ind) == LABEL_REF
6348 || (GET_CODE (ind) == CONST
6349 && GET_CODE (XEXP (ind, 0)) == PLUS
6350 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6351 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6354 /* Match: (mem (reg)). */
6355 if (GET_CODE (ind) == REG)
6356 return arm_address_register_rtx_p (ind, 0);
6358 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6359 acceptable in any case (subject to verification by
6360 arm_address_register_rtx_p). We need WB to be true to accept
6361 PRE_INC and POST_DEC. */
6362 if (GET_CODE (ind) == POST_INC
6363 || GET_CODE (ind) == PRE_DEC
6365 && (GET_CODE (ind) == PRE_INC
6366 || GET_CODE (ind) == POST_DEC)))
6367 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6370 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6371 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6372 && GET_CODE (XEXP (ind, 1)) == PLUS
6373 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6374 ind = XEXP (ind, 1);
6379 if (GET_CODE (ind) == PLUS
6380 && GET_CODE (XEXP (ind, 0)) == REG
6381 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6382 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6383 && INTVAL (XEXP (ind, 1)) > -1024
6384 && INTVAL (XEXP (ind, 1)) < 1024
6385 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6391 /* Return TRUE if OP is a memory operand which we can load or store a vector
6392 to/from. If CORE is true, we're moving from ARM registers not Neon
6395 neon_vector_mem_operand (rtx op, bool core)
6399 /* Reject eliminable registers. */
6400 if (! (reload_in_progress || reload_completed)
6401 && ( reg_mentioned_p (frame_pointer_rtx, op)
6402 || reg_mentioned_p (arg_pointer_rtx, op)
6403 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6404 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6405 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6406 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6409 /* Constants are converted into offsets from labels. */
6410 if (GET_CODE (op) != MEM)
6415 if (reload_completed
6416 && (GET_CODE (ind) == LABEL_REF
6417 || (GET_CODE (ind) == CONST
6418 && GET_CODE (XEXP (ind, 0)) == PLUS
6419 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6420 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6423 /* Match: (mem (reg)). */
6424 if (GET_CODE (ind) == REG)
6425 return arm_address_register_rtx_p (ind, 0);
6427 /* Allow post-increment with Neon registers. */
6428 if (!core && GET_CODE (ind) == POST_INC)
6429 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6432 /* FIXME: We can support this too if we use VLD1/VST1. */
6434 && GET_CODE (ind) == POST_MODIFY
6435 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6436 && GET_CODE (XEXP (ind, 1)) == PLUS
6437 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6438 ind = XEXP (ind, 1);
6445 && GET_CODE (ind) == PLUS
6446 && GET_CODE (XEXP (ind, 0)) == REG
6447 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6448 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6449 && INTVAL (XEXP (ind, 1)) > -1024
6450 && INTVAL (XEXP (ind, 1)) < 1016
6451 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6457 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6460 neon_struct_mem_operand (rtx op)
6464 /* Reject eliminable registers. */
6465 if (! (reload_in_progress || reload_completed)
6466 && ( reg_mentioned_p (frame_pointer_rtx, op)
6467 || reg_mentioned_p (arg_pointer_rtx, op)
6468 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6469 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6470 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6471 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6474 /* Constants are converted into offsets from labels. */
6475 if (GET_CODE (op) != MEM)
6480 if (reload_completed
6481 && (GET_CODE (ind) == LABEL_REF
6482 || (GET_CODE (ind) == CONST
6483 && GET_CODE (XEXP (ind, 0)) == PLUS
6484 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6485 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6488 /* Match: (mem (reg)). */
6489 if (GET_CODE (ind) == REG)
6490 return arm_address_register_rtx_p (ind, 0);
6495 /* Return true if X is a register that will be eliminated later on. */
6497 arm_eliminable_register (rtx x)
6499 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6500 || REGNO (x) == ARG_POINTER_REGNUM
6501 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6502 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6505 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6506 coprocessor registers. Otherwise return NO_REGS. */
6509 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6512 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6513 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6514 && neon_vector_mem_operand (x, FALSE))
6517 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6520 return GENERAL_REGS;
6523 /* Values which must be returned in the most-significant end of the return
6527 arm_return_in_msb (const_tree valtype)
6529 return (TARGET_AAPCS_BASED
6531 && (AGGREGATE_TYPE_P (valtype)
6532 || TREE_CODE (valtype) == COMPLEX_TYPE));
6535 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6536 Use by the Cirrus Maverick code which has to workaround
6537 a hardware bug triggered by such instructions. */
6539 arm_memory_load_p (rtx insn)
6541 rtx body, lhs, rhs;;
6543 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6546 body = PATTERN (insn);
6548 if (GET_CODE (body) != SET)
6551 lhs = XEXP (body, 0);
6552 rhs = XEXP (body, 1);
6554 lhs = REG_OR_SUBREG_RTX (lhs);
6556 /* If the destination is not a general purpose
6557 register we do not have to worry. */
6558 if (GET_CODE (lhs) != REG
6559 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6562 /* As well as loads from memory we also have to react
6563 to loads of invalid constants which will be turned
6564 into loads from the minipool. */
6565 return (GET_CODE (rhs) == MEM
6566 || GET_CODE (rhs) == SYMBOL_REF
6567 || note_invalid_constants (insn, -1, false));
6570 /* Return TRUE if INSN is a Cirrus instruction. */
6572 arm_cirrus_insn_p (rtx insn)
6574 enum attr_cirrus attr;
6576 /* get_attr cannot accept USE or CLOBBER. */
6578 || GET_CODE (insn) != INSN
6579 || GET_CODE (PATTERN (insn)) == USE
6580 || GET_CODE (PATTERN (insn)) == CLOBBER)
6583 attr = get_attr_cirrus (insn);
6585 return attr != CIRRUS_NOT;
6588 /* Cirrus reorg for invalid instruction combinations. */
6590 cirrus_reorg (rtx first)
6592 enum attr_cirrus attr;
6593 rtx body = PATTERN (first);
6597 /* Any branch must be followed by 2 non Cirrus instructions. */
6598 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6601 t = next_nonnote_insn (first);
6603 if (arm_cirrus_insn_p (t))
6606 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6610 emit_insn_after (gen_nop (), first);
6615 /* (float (blah)) is in parallel with a clobber. */
6616 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6617 body = XVECEXP (body, 0, 0);
6619 if (GET_CODE (body) == SET)
6621 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6623 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6624 be followed by a non Cirrus insn. */
6625 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6627 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6628 emit_insn_after (gen_nop (), first);
6632 else if (arm_memory_load_p (first))
6634 unsigned int arm_regno;
6636 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6637 ldr/cfmv64hr combination where the Rd field is the same
6638 in both instructions must be split with a non Cirrus
6645 /* Get Arm register number for ldr insn. */
6646 if (GET_CODE (lhs) == REG)
6647 arm_regno = REGNO (lhs);
6650 gcc_assert (GET_CODE (rhs) == REG);
6651 arm_regno = REGNO (rhs);
6655 first = next_nonnote_insn (first);
6657 if (! arm_cirrus_insn_p (first))
6660 body = PATTERN (first);
6662 /* (float (blah)) is in parallel with a clobber. */
6663 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6664 body = XVECEXP (body, 0, 0);
6666 if (GET_CODE (body) == FLOAT)
6667 body = XEXP (body, 0);
6669 if (get_attr_cirrus (first) == CIRRUS_MOVE
6670 && GET_CODE (XEXP (body, 1)) == REG
6671 && arm_regno == REGNO (XEXP (body, 1)))
6672 emit_insn_after (gen_nop (), first);
6678 /* get_attr cannot accept USE or CLOBBER. */
6680 || GET_CODE (first) != INSN
6681 || GET_CODE (PATTERN (first)) == USE
6682 || GET_CODE (PATTERN (first)) == CLOBBER)
6685 attr = get_attr_cirrus (first);
6687 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6688 must be followed by a non-coprocessor instruction. */
6689 if (attr == CIRRUS_COMPARE)
6693 t = next_nonnote_insn (first);
6695 if (arm_cirrus_insn_p (t))
6698 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6702 emit_insn_after (gen_nop (), first);
6708 /* Return TRUE if X references a SYMBOL_REF. */
6710 symbol_mentioned_p (rtx x)
6715 if (GET_CODE (x) == SYMBOL_REF)
6718 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6719 are constant offsets, not symbols. */
6720 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6723 fmt = GET_RTX_FORMAT (GET_CODE (x));
6725 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6731 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6732 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6735 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6742 /* Return TRUE if X references a LABEL_REF. */
6744 label_mentioned_p (rtx x)
6749 if (GET_CODE (x) == LABEL_REF)
6752 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6753 instruction, but they are constant offsets, not symbols. */
6754 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6757 fmt = GET_RTX_FORMAT (GET_CODE (x));
6758 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6764 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6765 if (label_mentioned_p (XVECEXP (x, i, j)))
6768 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6776 tls_mentioned_p (rtx x)
6778 switch (GET_CODE (x))
6781 return tls_mentioned_p (XEXP (x, 0));
6784 if (XINT (x, 1) == UNSPEC_TLS)
6792 /* Must not copy a SET whose source operand is PC-relative. */
6795 arm_cannot_copy_insn_p (rtx insn)
6797 rtx pat = PATTERN (insn);
6799 if (GET_CODE (pat) == SET)
6801 rtx rhs = SET_SRC (pat);
6803 if (GET_CODE (rhs) == UNSPEC
6804 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6807 if (GET_CODE (rhs) == MEM
6808 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6809 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6819 enum rtx_code code = GET_CODE (x);
6836 /* Return 1 if memory locations are adjacent. */
6838 adjacent_mem_locations (rtx a, rtx b)
6840 /* We don't guarantee to preserve the order of these memory refs. */
6841 if (volatile_refs_p (a) || volatile_refs_p (b))
6844 if ((GET_CODE (XEXP (a, 0)) == REG
6845 || (GET_CODE (XEXP (a, 0)) == PLUS
6846 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6847 && (GET_CODE (XEXP (b, 0)) == REG
6848 || (GET_CODE (XEXP (b, 0)) == PLUS
6849 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6851 HOST_WIDE_INT val0 = 0, val1 = 0;
6855 if (GET_CODE (XEXP (a, 0)) == PLUS)
6857 reg0 = XEXP (XEXP (a, 0), 0);
6858 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6863 if (GET_CODE (XEXP (b, 0)) == PLUS)
6865 reg1 = XEXP (XEXP (b, 0), 0);
6866 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6871 /* Don't accept any offset that will require multiple
6872 instructions to handle, since this would cause the
6873 arith_adjacentmem pattern to output an overlong sequence. */
6874 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6877 /* Don't allow an eliminable register: register elimination can make
6878 the offset too large. */
6879 if (arm_eliminable_register (reg0))
6882 val_diff = val1 - val0;
6886 /* If the target has load delay slots, then there's no benefit
6887 to using an ldm instruction unless the offset is zero and
6888 we are optimizing for size. */
6889 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6890 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6891 && (val_diff == 4 || val_diff == -4));
6894 return ((REGNO (reg0) == REGNO (reg1))
6895 && (val_diff == 4 || val_diff == -4));
6902 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6903 HOST_WIDE_INT *load_offset)
6905 int unsorted_regs[4];
6906 HOST_WIDE_INT unsorted_offsets[4];
6911 /* Can only handle 2, 3, or 4 insns at present,
6912 though could be easily extended if required. */
6913 gcc_assert (nops >= 2 && nops <= 4);
6915 /* Loop over the operands and check that the memory references are
6916 suitable (i.e. immediate offsets from the same base register). At
6917 the same time, extract the target register, and the memory
6919 for (i = 0; i < nops; i++)
6924 /* Convert a subreg of a mem into the mem itself. */
6925 if (GET_CODE (operands[nops + i]) == SUBREG)
6926 operands[nops + i] = alter_subreg (operands + (nops + i));
6928 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6930 /* Don't reorder volatile memory references; it doesn't seem worth
6931 looking for the case where the order is ok anyway. */
6932 if (MEM_VOLATILE_P (operands[nops + i]))
6935 offset = const0_rtx;
6937 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6938 || (GET_CODE (reg) == SUBREG
6939 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6940 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6941 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6943 || (GET_CODE (reg) == SUBREG
6944 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6945 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6950 base_reg = REGNO (reg);
6951 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6952 ? REGNO (operands[i])
6953 : REGNO (SUBREG_REG (operands[i])));
6958 if (base_reg != (int) REGNO (reg))
6959 /* Not addressed from the same base register. */
6962 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6963 ? REGNO (operands[i])
6964 : REGNO (SUBREG_REG (operands[i])));
6965 if (unsorted_regs[i] < unsorted_regs[order[0]])
6969 /* If it isn't an integer register, or if it overwrites the
6970 base register but isn't the last insn in the list, then
6971 we can't do this. */
6972 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6973 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6976 unsorted_offsets[i] = INTVAL (offset);
6979 /* Not a suitable memory address. */
6983 /* All the useful information has now been extracted from the
6984 operands into unsorted_regs and unsorted_offsets; additionally,
6985 order[0] has been set to the lowest numbered register in the
6986 list. Sort the registers into order, and check that the memory
6987 offsets are ascending and adjacent. */
6989 for (i = 1; i < nops; i++)
6993 order[i] = order[i - 1];
6994 for (j = 0; j < nops; j++)
6995 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6996 && (order[i] == order[i - 1]
6997 || unsorted_regs[j] < unsorted_regs[order[i]]))
7000 /* Have we found a suitable register? if not, one must be used more
7002 if (order[i] == order[i - 1])
7005 /* Is the memory address adjacent and ascending? */
7006 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7014 for (i = 0; i < nops; i++)
7015 regs[i] = unsorted_regs[order[i]];
7017 *load_offset = unsorted_offsets[order[0]];
7020 if (unsorted_offsets[order[0]] == 0)
7021 return 1; /* ldmia */
7023 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7024 return 2; /* ldmib */
7026 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7027 return 3; /* ldmda */
7029 if (unsorted_offsets[order[nops - 1]] == -4)
7030 return 4; /* ldmdb */
7032 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7033 if the offset isn't small enough. The reason 2 ldrs are faster
7034 is because these ARMs are able to do more than one cache access
7035 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7036 whilst the ARM8 has a double bandwidth cache. This means that
7037 these cores can do both an instruction fetch and a data fetch in
7038 a single cycle, so the trick of calculating the address into a
7039 scratch register (one of the result regs) and then doing a load
7040 multiple actually becomes slower (and no smaller in code size).
7041 That is the transformation
7043 ldr rd1, [rbase + offset]
7044 ldr rd2, [rbase + offset + 4]
7048 add rd1, rbase, offset
7049 ldmia rd1, {rd1, rd2}
7051 produces worse code -- '3 cycles + any stalls on rd2' instead of
7052 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7053 access per cycle, the first sequence could never complete in less
7054 than 6 cycles, whereas the ldm sequence would only take 5 and
7055 would make better use of sequential accesses if not hitting the
7058 We cheat here and test 'arm_ld_sched' which we currently know to
7059 only be true for the ARM8, ARM9 and StrongARM. If this ever
7060 changes, then the test below needs to be reworked. */
7061 if (nops == 2 && arm_ld_sched)
7064 /* Can't do it without setting up the offset, only do this if it takes
7065 no more than one insn. */
7066 return (const_ok_for_arm (unsorted_offsets[order[0]])
7067 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7071 emit_ldm_seq (rtx *operands, int nops)
7075 HOST_WIDE_INT offset;
7079 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7082 strcpy (buf, "ldm%(ia%)\t");
7086 strcpy (buf, "ldm%(ib%)\t");
7090 strcpy (buf, "ldm%(da%)\t");
7094 strcpy (buf, "ldm%(db%)\t");
7099 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7100 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7103 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7104 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7106 output_asm_insn (buf, operands);
7108 strcpy (buf, "ldm%(ia%)\t");
7115 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7116 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7118 for (i = 1; i < nops; i++)
7119 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7120 reg_names[regs[i]]);
7122 strcat (buf, "}\t%@ phole ldm");
7124 output_asm_insn (buf, operands);
7129 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7130 HOST_WIDE_INT * load_offset)
7132 int unsorted_regs[4];
7133 HOST_WIDE_INT unsorted_offsets[4];
7138 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7139 extended if required. */
7140 gcc_assert (nops >= 2 && nops <= 4);
7142 /* Loop over the operands and check that the memory references are
7143 suitable (i.e. immediate offsets from the same base register). At
7144 the same time, extract the target register, and the memory
7146 for (i = 0; i < nops; i++)
7151 /* Convert a subreg of a mem into the mem itself. */
7152 if (GET_CODE (operands[nops + i]) == SUBREG)
7153 operands[nops + i] = alter_subreg (operands + (nops + i));
7155 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7157 /* Don't reorder volatile memory references; it doesn't seem worth
7158 looking for the case where the order is ok anyway. */
7159 if (MEM_VOLATILE_P (operands[nops + i]))
7162 offset = const0_rtx;
7164 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7165 || (GET_CODE (reg) == SUBREG
7166 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7167 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7168 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7170 || (GET_CODE (reg) == SUBREG
7171 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7172 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7177 base_reg = REGNO (reg);
7178 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7179 ? REGNO (operands[i])
7180 : REGNO (SUBREG_REG (operands[i])));
7185 if (base_reg != (int) REGNO (reg))
7186 /* Not addressed from the same base register. */
7189 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7190 ? REGNO (operands[i])
7191 : REGNO (SUBREG_REG (operands[i])));
7192 if (unsorted_regs[i] < unsorted_regs[order[0]])
7196 /* If it isn't an integer register, then we can't do this. */
7197 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7200 unsorted_offsets[i] = INTVAL (offset);
7203 /* Not a suitable memory address. */
7207 /* All the useful information has now been extracted from the
7208 operands into unsorted_regs and unsorted_offsets; additionally,
7209 order[0] has been set to the lowest numbered register in the
7210 list. Sort the registers into order, and check that the memory
7211 offsets are ascending and adjacent. */
7213 for (i = 1; i < nops; i++)
7217 order[i] = order[i - 1];
7218 for (j = 0; j < nops; j++)
7219 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7220 && (order[i] == order[i - 1]
7221 || unsorted_regs[j] < unsorted_regs[order[i]]))
7224 /* Have we found a suitable register? if not, one must be used more
7226 if (order[i] == order[i - 1])
7229 /* Is the memory address adjacent and ascending? */
7230 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7238 for (i = 0; i < nops; i++)
7239 regs[i] = unsorted_regs[order[i]];
7241 *load_offset = unsorted_offsets[order[0]];
7244 if (unsorted_offsets[order[0]] == 0)
7245 return 1; /* stmia */
7247 if (unsorted_offsets[order[0]] == 4)
7248 return 2; /* stmib */
7250 if (unsorted_offsets[order[nops - 1]] == 0)
7251 return 3; /* stmda */
7253 if (unsorted_offsets[order[nops - 1]] == -4)
7254 return 4; /* stmdb */
7260 emit_stm_seq (rtx *operands, int nops)
7264 HOST_WIDE_INT offset;
7268 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7271 strcpy (buf, "stm%(ia%)\t");
7275 strcpy (buf, "stm%(ib%)\t");
7279 strcpy (buf, "stm%(da%)\t");
7283 strcpy (buf, "stm%(db%)\t");
7290 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7291 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7293 for (i = 1; i < nops; i++)
7294 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7295 reg_names[regs[i]]);
7297 strcat (buf, "}\t%@ phole stm");
7299 output_asm_insn (buf, operands);
7303 /* Routines for use in generating RTL. */
7306 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7307 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7309 HOST_WIDE_INT offset = *offsetp;
7312 int sign = up ? 1 : -1;
7315 /* XScale has load-store double instructions, but they have stricter
7316 alignment requirements than load-store multiple, so we cannot
7319 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7320 the pipeline until completion.
7328 An ldr instruction takes 1-3 cycles, but does not block the
7337 Best case ldr will always win. However, the more ldr instructions
7338 we issue, the less likely we are to be able to schedule them well.
7339 Using ldr instructions also increases code size.
7341 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7342 for counts of 3 or 4 regs. */
7343 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7349 for (i = 0; i < count; i++)
7351 addr = plus_constant (from, i * 4 * sign);
7352 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7353 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7359 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7369 result = gen_rtx_PARALLEL (VOIDmode,
7370 rtvec_alloc (count + (write_back ? 1 : 0)));
7373 XVECEXP (result, 0, 0)
7374 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7379 for (j = 0; i < count; i++, j++)
7381 addr = plus_constant (from, j * 4 * sign);
7382 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7383 XVECEXP (result, 0, i)
7384 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7395 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7396 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7398 HOST_WIDE_INT offset = *offsetp;
7401 int sign = up ? 1 : -1;
7404 /* See arm_gen_load_multiple for discussion of
7405 the pros/cons of ldm/stm usage for XScale. */
7406 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7412 for (i = 0; i < count; i++)
7414 addr = plus_constant (to, i * 4 * sign);
7415 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7416 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7422 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7432 result = gen_rtx_PARALLEL (VOIDmode,
7433 rtvec_alloc (count + (write_back ? 1 : 0)));
7436 XVECEXP (result, 0, 0)
7437 = gen_rtx_SET (VOIDmode, to,
7438 plus_constant (to, count * 4 * sign));
7443 for (j = 0; i < count; i++, j++)
7445 addr = plus_constant (to, j * 4 * sign);
7446 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7447 XVECEXP (result, 0, i)
7448 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7459 arm_gen_movmemqi (rtx *operands)
7461 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7462 HOST_WIDE_INT srcoffset, dstoffset;
7464 rtx src, dst, srcbase, dstbase;
7465 rtx part_bytes_reg = NULL;
7468 if (GET_CODE (operands[2]) != CONST_INT
7469 || GET_CODE (operands[3]) != CONST_INT
7470 || INTVAL (operands[2]) > 64
7471 || INTVAL (operands[3]) & 3)
7474 dstbase = operands[0];
7475 srcbase = operands[1];
7477 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7478 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7480 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7481 out_words_to_go = INTVAL (operands[2]) / 4;
7482 last_bytes = INTVAL (operands[2]) & 3;
7483 dstoffset = srcoffset = 0;
7485 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7486 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7488 for (i = 0; in_words_to_go >= 2; i+=4)
7490 if (in_words_to_go > 4)
7491 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7492 srcbase, &srcoffset));
7494 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7495 FALSE, srcbase, &srcoffset));
7497 if (out_words_to_go)
7499 if (out_words_to_go > 4)
7500 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7501 dstbase, &dstoffset));
7502 else if (out_words_to_go != 1)
7503 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7507 dstbase, &dstoffset));
7510 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7511 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7512 if (last_bytes != 0)
7514 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7520 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7521 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7524 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7525 if (out_words_to_go)
7529 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7530 sreg = copy_to_reg (mem);
7532 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7533 emit_move_insn (mem, sreg);
7536 gcc_assert (!in_words_to_go); /* Sanity check */
7541 gcc_assert (in_words_to_go > 0);
7543 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7544 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7547 gcc_assert (!last_bytes || part_bytes_reg);
7549 if (BYTES_BIG_ENDIAN && last_bytes)
7551 rtx tmp = gen_reg_rtx (SImode);
7553 /* The bytes we want are in the top end of the word. */
7554 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7555 GEN_INT (8 * (4 - last_bytes))));
7556 part_bytes_reg = tmp;
7560 mem = adjust_automodify_address (dstbase, QImode,
7561 plus_constant (dst, last_bytes - 1),
7562 dstoffset + last_bytes - 1);
7563 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7567 tmp = gen_reg_rtx (SImode);
7568 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7569 part_bytes_reg = tmp;
7578 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7579 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7583 rtx tmp = gen_reg_rtx (SImode);
7584 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7585 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7586 part_bytes_reg = tmp;
7593 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7594 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7601 /* Select a dominance comparison mode if possible for a test of the general
7602 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7603 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7604 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7605 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7606 In all cases OP will be either EQ or NE, but we don't need to know which
7607 here. If we are unable to support a dominance comparison we return
7608 CC mode. This will then fail to match for the RTL expressions that
7609 generate this call. */
7611 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7613 enum rtx_code cond1, cond2;
7616 /* Currently we will probably get the wrong result if the individual
7617 comparisons are not simple. This also ensures that it is safe to
7618 reverse a comparison if necessary. */
7619 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7621 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7625 /* The if_then_else variant of this tests the second condition if the
7626 first passes, but is true if the first fails. Reverse the first
7627 condition to get a true "inclusive-or" expression. */
7628 if (cond_or == DOM_CC_NX_OR_Y)
7629 cond1 = reverse_condition (cond1);
7631 /* If the comparisons are not equal, and one doesn't dominate the other,
7632 then we can't do this. */
7634 && !comparison_dominates_p (cond1, cond2)
7635 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7640 enum rtx_code temp = cond1;
7648 if (cond_or == DOM_CC_X_AND_Y)
7653 case EQ: return CC_DEQmode;
7654 case LE: return CC_DLEmode;
7655 case LEU: return CC_DLEUmode;
7656 case GE: return CC_DGEmode;
7657 case GEU: return CC_DGEUmode;
7658 default: gcc_unreachable ();
7662 if (cond_or == DOM_CC_X_AND_Y)
7678 if (cond_or == DOM_CC_X_AND_Y)
7694 if (cond_or == DOM_CC_X_AND_Y)
7710 if (cond_or == DOM_CC_X_AND_Y)
7725 /* The remaining cases only occur when both comparisons are the
7728 gcc_assert (cond1 == cond2);
7732 gcc_assert (cond1 == cond2);
7736 gcc_assert (cond1 == cond2);
7740 gcc_assert (cond1 == cond2);
7744 gcc_assert (cond1 == cond2);
7753 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7755 /* All floating point compares return CCFP if it is an equality
7756 comparison, and CCFPE otherwise. */
7757 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7777 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7786 /* A compare with a shifted operand. Because of canonicalization, the
7787 comparison will have to be swapped when we emit the assembler. */
7788 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7789 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7790 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7791 || GET_CODE (x) == ROTATERT))
7794 /* This operation is performed swapped, but since we only rely on the Z
7795 flag we don't need an additional mode. */
7796 if (GET_MODE (y) == SImode && REG_P (y)
7797 && GET_CODE (x) == NEG
7798 && (op == EQ || op == NE))
7801 /* This is a special case that is used by combine to allow a
7802 comparison of a shifted byte load to be split into a zero-extend
7803 followed by a comparison of the shifted integer (only valid for
7804 equalities and unsigned inequalities). */
7805 if (GET_MODE (x) == SImode
7806 && GET_CODE (x) == ASHIFT
7807 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7808 && GET_CODE (XEXP (x, 0)) == SUBREG
7809 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7810 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7811 && (op == EQ || op == NE
7812 || op == GEU || op == GTU || op == LTU || op == LEU)
7813 && GET_CODE (y) == CONST_INT)
7816 /* A construct for a conditional compare, if the false arm contains
7817 0, then both conditions must be true, otherwise either condition
7818 must be true. Not all conditions are possible, so CCmode is
7819 returned if it can't be done. */
7820 if (GET_CODE (x) == IF_THEN_ELSE
7821 && (XEXP (x, 2) == const0_rtx
7822 || XEXP (x, 2) == const1_rtx)
7823 && COMPARISON_P (XEXP (x, 0))
7824 && COMPARISON_P (XEXP (x, 1)))
7825 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7826 INTVAL (XEXP (x, 2)));
7828 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7829 if (GET_CODE (x) == AND
7830 && COMPARISON_P (XEXP (x, 0))
7831 && COMPARISON_P (XEXP (x, 1)))
7832 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7835 if (GET_CODE (x) == IOR
7836 && COMPARISON_P (XEXP (x, 0))
7837 && COMPARISON_P (XEXP (x, 1)))
7838 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7841 /* An operation (on Thumb) where we want to test for a single bit.
7842 This is done by shifting that bit up into the top bit of a
7843 scratch register; we can then branch on the sign bit. */
7845 && GET_MODE (x) == SImode
7846 && (op == EQ || op == NE)
7847 && GET_CODE (x) == ZERO_EXTRACT
7848 && XEXP (x, 1) == const1_rtx)
7851 /* An operation that sets the condition codes as a side-effect, the
7852 V flag is not set correctly, so we can only use comparisons where
7853 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7855 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7856 if (GET_MODE (x) == SImode
7858 && (op == EQ || op == NE || op == LT || op == GE)
7859 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7860 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7861 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7862 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7863 || GET_CODE (x) == LSHIFTRT
7864 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7865 || GET_CODE (x) == ROTATERT
7866 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7869 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7872 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7873 && GET_CODE (x) == PLUS
7874 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7880 /* X and Y are two things to compare using CODE. Emit the compare insn and
7881 return the rtx for register 0 in the proper mode. FP means this is a
7882 floating point compare: I don't think that it is needed on the arm. */
7884 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7886 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7887 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7889 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7894 /* Generate a sequence of insns that will generate the correct return
7895 address mask depending on the physical architecture that the program
7898 arm_gen_return_addr_mask (void)
7900 rtx reg = gen_reg_rtx (Pmode);
7902 emit_insn (gen_return_addr_mask (reg));
7907 arm_reload_in_hi (rtx *operands)
7909 rtx ref = operands[1];
7911 HOST_WIDE_INT offset = 0;
7913 if (GET_CODE (ref) == SUBREG)
7915 offset = SUBREG_BYTE (ref);
7916 ref = SUBREG_REG (ref);
7919 if (GET_CODE (ref) == REG)
7921 /* We have a pseudo which has been spilt onto the stack; there
7922 are two cases here: the first where there is a simple
7923 stack-slot replacement and a second where the stack-slot is
7924 out of range, or is used as a subreg. */
7925 if (reg_equiv_mem[REGNO (ref)])
7927 ref = reg_equiv_mem[REGNO (ref)];
7928 base = find_replacement (&XEXP (ref, 0));
7931 /* The slot is out of range, or was dressed up in a SUBREG. */
7932 base = reg_equiv_address[REGNO (ref)];
7935 base = find_replacement (&XEXP (ref, 0));
7937 /* Handle the case where the address is too complex to be offset by 1. */
7938 if (GET_CODE (base) == MINUS
7939 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7941 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7943 emit_set_insn (base_plus, base);
7946 else if (GET_CODE (base) == PLUS)
7948 /* The addend must be CONST_INT, or we would have dealt with it above. */
7949 HOST_WIDE_INT hi, lo;
7951 offset += INTVAL (XEXP (base, 1));
7952 base = XEXP (base, 0);
7954 /* Rework the address into a legal sequence of insns. */
7955 /* Valid range for lo is -4095 -> 4095 */
7958 : -((-offset) & 0xfff));
7960 /* Corner case, if lo is the max offset then we would be out of range
7961 once we have added the additional 1 below, so bump the msb into the
7962 pre-loading insn(s). */
7966 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7967 ^ (HOST_WIDE_INT) 0x80000000)
7968 - (HOST_WIDE_INT) 0x80000000);
7970 gcc_assert (hi + lo == offset);
7974 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7976 /* Get the base address; addsi3 knows how to handle constants
7977 that require more than one insn. */
7978 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7984 /* Operands[2] may overlap operands[0] (though it won't overlap
7985 operands[1]), that's why we asked for a DImode reg -- so we can
7986 use the bit that does not overlap. */
7987 if (REGNO (operands[2]) == REGNO (operands[0]))
7988 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7990 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7992 emit_insn (gen_zero_extendqisi2 (scratch,
7993 gen_rtx_MEM (QImode,
7994 plus_constant (base,
7996 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7997 gen_rtx_MEM (QImode,
7998 plus_constant (base,
8000 if (!BYTES_BIG_ENDIAN)
8001 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8002 gen_rtx_IOR (SImode,
8005 gen_rtx_SUBREG (SImode, operands[0], 0),
8009 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8010 gen_rtx_IOR (SImode,
8011 gen_rtx_ASHIFT (SImode, scratch,
8013 gen_rtx_SUBREG (SImode, operands[0], 0)));
8016 /* Handle storing a half-word to memory during reload by synthesizing as two
8017 byte stores. Take care not to clobber the input values until after we
8018 have moved them somewhere safe. This code assumes that if the DImode
8019 scratch in operands[2] overlaps either the input value or output address
8020 in some way, then that value must die in this insn (we absolutely need
8021 two scratch registers for some corner cases). */
8023 arm_reload_out_hi (rtx *operands)
8025 rtx ref = operands[0];
8026 rtx outval = operands[1];
8028 HOST_WIDE_INT offset = 0;
8030 if (GET_CODE (ref) == SUBREG)
8032 offset = SUBREG_BYTE (ref);
8033 ref = SUBREG_REG (ref);
8036 if (GET_CODE (ref) == REG)
8038 /* We have a pseudo which has been spilt onto the stack; there
8039 are two cases here: the first where there is a simple
8040 stack-slot replacement and a second where the stack-slot is
8041 out of range, or is used as a subreg. */
8042 if (reg_equiv_mem[REGNO (ref)])
8044 ref = reg_equiv_mem[REGNO (ref)];
8045 base = find_replacement (&XEXP (ref, 0));
8048 /* The slot is out of range, or was dressed up in a SUBREG. */
8049 base = reg_equiv_address[REGNO (ref)];
8052 base = find_replacement (&XEXP (ref, 0));
8054 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8056 /* Handle the case where the address is too complex to be offset by 1. */
8057 if (GET_CODE (base) == MINUS
8058 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8060 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8062 /* Be careful not to destroy OUTVAL. */
8063 if (reg_overlap_mentioned_p (base_plus, outval))
8065 /* Updating base_plus might destroy outval, see if we can
8066 swap the scratch and base_plus. */
8067 if (!reg_overlap_mentioned_p (scratch, outval))
8070 scratch = base_plus;
8075 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8077 /* Be conservative and copy OUTVAL into the scratch now,
8078 this should only be necessary if outval is a subreg
8079 of something larger than a word. */
8080 /* XXX Might this clobber base? I can't see how it can,
8081 since scratch is known to overlap with OUTVAL, and
8082 must be wider than a word. */
8083 emit_insn (gen_movhi (scratch_hi, outval));
8084 outval = scratch_hi;
8088 emit_set_insn (base_plus, base);
8091 else if (GET_CODE (base) == PLUS)
8093 /* The addend must be CONST_INT, or we would have dealt with it above. */
8094 HOST_WIDE_INT hi, lo;
8096 offset += INTVAL (XEXP (base, 1));
8097 base = XEXP (base, 0);
8099 /* Rework the address into a legal sequence of insns. */
8100 /* Valid range for lo is -4095 -> 4095 */
8103 : -((-offset) & 0xfff));
8105 /* Corner case, if lo is the max offset then we would be out of range
8106 once we have added the additional 1 below, so bump the msb into the
8107 pre-loading insn(s). */
8111 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8112 ^ (HOST_WIDE_INT) 0x80000000)
8113 - (HOST_WIDE_INT) 0x80000000);
8115 gcc_assert (hi + lo == offset);
8119 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8121 /* Be careful not to destroy OUTVAL. */
8122 if (reg_overlap_mentioned_p (base_plus, outval))
8124 /* Updating base_plus might destroy outval, see if we
8125 can swap the scratch and base_plus. */
8126 if (!reg_overlap_mentioned_p (scratch, outval))
8129 scratch = base_plus;
8134 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8136 /* Be conservative and copy outval into scratch now,
8137 this should only be necessary if outval is a
8138 subreg of something larger than a word. */
8139 /* XXX Might this clobber base? I can't see how it
8140 can, since scratch is known to overlap with
8142 emit_insn (gen_movhi (scratch_hi, outval));
8143 outval = scratch_hi;
8147 /* Get the base address; addsi3 knows how to handle constants
8148 that require more than one insn. */
8149 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8155 if (BYTES_BIG_ENDIAN)
8157 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8158 plus_constant (base, offset + 1)),
8159 gen_lowpart (QImode, outval)));
8160 emit_insn (gen_lshrsi3 (scratch,
8161 gen_rtx_SUBREG (SImode, outval, 0),
8163 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8164 gen_lowpart (QImode, scratch)));
8168 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8169 gen_lowpart (QImode, outval)));
8170 emit_insn (gen_lshrsi3 (scratch,
8171 gen_rtx_SUBREG (SImode, outval, 0),
8173 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8174 plus_constant (base, offset + 1)),
8175 gen_lowpart (QImode, scratch)));
8179 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8180 (padded to the size of a word) should be passed in a register. */
8183 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8185 if (TARGET_AAPCS_BASED)
8186 return must_pass_in_stack_var_size (mode, type);
8188 return must_pass_in_stack_var_size_or_pad (mode, type);
8192 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8193 Return true if an argument passed on the stack should be padded upwards,
8194 i.e. if the least-significant byte has useful data.
8195 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8196 aggregate types are placed in the lowest memory address. */
8199 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8201 if (!TARGET_AAPCS_BASED)
8202 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8204 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8211 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8212 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8213 byte of the register has useful data, and return the opposite if the
8214 most significant byte does.
8215 For AAPCS, small aggregates and small complex types are always padded
8219 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8220 tree type, int first ATTRIBUTE_UNUSED)
8222 if (TARGET_AAPCS_BASED
8224 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8225 && int_size_in_bytes (type) <= 4)
8228 /* Otherwise, use default padding. */
8229 return !BYTES_BIG_ENDIAN;
8233 /* Print a symbolic form of X to the debug file, F. */
8235 arm_print_value (FILE *f, rtx x)
8237 switch (GET_CODE (x))
8240 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8244 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8252 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8254 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8255 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8263 fprintf (f, "\"%s\"", XSTR (x, 0));
8267 fprintf (f, "`%s'", XSTR (x, 0));
8271 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8275 arm_print_value (f, XEXP (x, 0));
8279 arm_print_value (f, XEXP (x, 0));
8281 arm_print_value (f, XEXP (x, 1));
8289 fprintf (f, "????");
8294 /* Routines for manipulation of the constant pool. */
8296 /* Arm instructions cannot load a large constant directly into a
8297 register; they have to come from a pc relative load. The constant
8298 must therefore be placed in the addressable range of the pc
8299 relative load. Depending on the precise pc relative load
8300 instruction the range is somewhere between 256 bytes and 4k. This
8301 means that we often have to dump a constant inside a function, and
8302 generate code to branch around it.
8304 It is important to minimize this, since the branches will slow
8305 things down and make the code larger.
8307 Normally we can hide the table after an existing unconditional
8308 branch so that there is no interruption of the flow, but in the
8309 worst case the code looks like this:
8327 We fix this by performing a scan after scheduling, which notices
8328 which instructions need to have their operands fetched from the
8329 constant table and builds the table.
8331 The algorithm starts by building a table of all the constants that
8332 need fixing up and all the natural barriers in the function (places
8333 where a constant table can be dropped without breaking the flow).
8334 For each fixup we note how far the pc-relative replacement will be
8335 able to reach and the offset of the instruction into the function.
8337 Having built the table we then group the fixes together to form
8338 tables that are as large as possible (subject to addressing
8339 constraints) and emit each table of constants after the last
8340 barrier that is within range of all the instructions in the group.
8341 If a group does not contain a barrier, then we forcibly create one
8342 by inserting a jump instruction into the flow. Once the table has
8343 been inserted, the insns are then modified to reference the
8344 relevant entry in the pool.
8346 Possible enhancements to the algorithm (not implemented) are:
8348 1) For some processors and object formats, there may be benefit in
8349 aligning the pools to the start of cache lines; this alignment
8350 would need to be taken into account when calculating addressability
8353 /* These typedefs are located at the start of this file, so that
8354 they can be used in the prototypes there. This comment is to
8355 remind readers of that fact so that the following structures
8356 can be understood more easily.
8358 typedef struct minipool_node Mnode;
8359 typedef struct minipool_fixup Mfix; */
8361 struct minipool_node
8363 /* Doubly linked chain of entries. */
8366 /* The maximum offset into the code that this entry can be placed. While
8367 pushing fixes for forward references, all entries are sorted in order
8368 of increasing max_address. */
8369 HOST_WIDE_INT max_address;
8370 /* Similarly for an entry inserted for a backwards ref. */
8371 HOST_WIDE_INT min_address;
8372 /* The number of fixes referencing this entry. This can become zero
8373 if we "unpush" an entry. In this case we ignore the entry when we
8374 come to emit the code. */
8376 /* The offset from the start of the minipool. */
8377 HOST_WIDE_INT offset;
8378 /* The value in table. */
8380 /* The mode of value. */
8381 enum machine_mode mode;
8382 /* The size of the value. With iWMMXt enabled
8383 sizes > 4 also imply an alignment of 8-bytes. */
8387 struct minipool_fixup
8391 HOST_WIDE_INT address;
8393 enum machine_mode mode;
8397 HOST_WIDE_INT forwards;
8398 HOST_WIDE_INT backwards;
8401 /* Fixes less than a word need padding out to a word boundary. */
8402 #define MINIPOOL_FIX_SIZE(mode) \
8403 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8405 static Mnode * minipool_vector_head;
8406 static Mnode * minipool_vector_tail;
8407 static rtx minipool_vector_label;
8408 static int minipool_pad;
8410 /* The linked list of all minipool fixes required for this function. */
8411 Mfix * minipool_fix_head;
8412 Mfix * minipool_fix_tail;
8413 /* The fix entry for the current minipool, once it has been placed. */
8414 Mfix * minipool_barrier;
8416 /* Determines if INSN is the start of a jump table. Returns the end
8417 of the TABLE or NULL_RTX. */
8419 is_jump_table (rtx insn)
8423 if (GET_CODE (insn) == JUMP_INSN
8424 && JUMP_LABEL (insn) != NULL
8425 && ((table = next_real_insn (JUMP_LABEL (insn)))
8426 == next_real_insn (insn))
8428 && GET_CODE (table) == JUMP_INSN
8429 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8430 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8436 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8437 #define JUMP_TABLES_IN_TEXT_SECTION 0
8440 static HOST_WIDE_INT
8441 get_jump_table_size (rtx insn)
8443 /* ADDR_VECs only take room if read-only data does into the text
8445 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8447 rtx body = PATTERN (insn);
8448 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8450 HOST_WIDE_INT modesize;
8452 modesize = GET_MODE_SIZE (GET_MODE (body));
8453 size = modesize * XVECLEN (body, elt);
8457 /* Round up size of TBB table to a halfword boundary. */
8458 size = (size + 1) & ~(HOST_WIDE_INT)1;
8461 /* No padding necessary for TBH. */
8464 /* Add two bytes for alignment on Thumb. */
8477 /* Move a minipool fix MP from its current location to before MAX_MP.
8478 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8479 constraints may need updating. */
8481 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8482 HOST_WIDE_INT max_address)
8484 /* The code below assumes these are different. */
8485 gcc_assert (mp != max_mp);
8489 if (max_address < mp->max_address)
8490 mp->max_address = max_address;
8494 if (max_address > max_mp->max_address - mp->fix_size)
8495 mp->max_address = max_mp->max_address - mp->fix_size;
8497 mp->max_address = max_address;
8499 /* Unlink MP from its current position. Since max_mp is non-null,
8500 mp->prev must be non-null. */
8501 mp->prev->next = mp->next;
8502 if (mp->next != NULL)
8503 mp->next->prev = mp->prev;
8505 minipool_vector_tail = mp->prev;
8507 /* Re-insert it before MAX_MP. */
8509 mp->prev = max_mp->prev;
8512 if (mp->prev != NULL)
8513 mp->prev->next = mp;
8515 minipool_vector_head = mp;
8518 /* Save the new entry. */
8521 /* Scan over the preceding entries and adjust their addresses as
8523 while (mp->prev != NULL
8524 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8526 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8533 /* Add a constant to the minipool for a forward reference. Returns the
8534 node added or NULL if the constant will not fit in this pool. */
8536 add_minipool_forward_ref (Mfix *fix)
8538 /* If set, max_mp is the first pool_entry that has a lower
8539 constraint than the one we are trying to add. */
8540 Mnode * max_mp = NULL;
8541 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8544 /* If the minipool starts before the end of FIX->INSN then this FIX
8545 can not be placed into the current pool. Furthermore, adding the
8546 new constant pool entry may cause the pool to start FIX_SIZE bytes
8548 if (minipool_vector_head &&
8549 (fix->address + get_attr_length (fix->insn)
8550 >= minipool_vector_head->max_address - fix->fix_size))
8553 /* Scan the pool to see if a constant with the same value has
8554 already been added. While we are doing this, also note the
8555 location where we must insert the constant if it doesn't already
8557 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8559 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8560 && fix->mode == mp->mode
8561 && (GET_CODE (fix->value) != CODE_LABEL
8562 || (CODE_LABEL_NUMBER (fix->value)
8563 == CODE_LABEL_NUMBER (mp->value)))
8564 && rtx_equal_p (fix->value, mp->value))
8566 /* More than one fix references this entry. */
8568 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8571 /* Note the insertion point if necessary. */
8573 && mp->max_address > max_address)
8576 /* If we are inserting an 8-bytes aligned quantity and
8577 we have not already found an insertion point, then
8578 make sure that all such 8-byte aligned quantities are
8579 placed at the start of the pool. */
8580 if (ARM_DOUBLEWORD_ALIGN
8582 && fix->fix_size >= 8
8583 && mp->fix_size < 8)
8586 max_address = mp->max_address;
8590 /* The value is not currently in the minipool, so we need to create
8591 a new entry for it. If MAX_MP is NULL, the entry will be put on
8592 the end of the list since the placement is less constrained than
8593 any existing entry. Otherwise, we insert the new fix before
8594 MAX_MP and, if necessary, adjust the constraints on the other
8597 mp->fix_size = fix->fix_size;
8598 mp->mode = fix->mode;
8599 mp->value = fix->value;
8601 /* Not yet required for a backwards ref. */
8602 mp->min_address = -65536;
8606 mp->max_address = max_address;
8608 mp->prev = minipool_vector_tail;
8610 if (mp->prev == NULL)
8612 minipool_vector_head = mp;
8613 minipool_vector_label = gen_label_rtx ();
8616 mp->prev->next = mp;
8618 minipool_vector_tail = mp;
8622 if (max_address > max_mp->max_address - mp->fix_size)
8623 mp->max_address = max_mp->max_address - mp->fix_size;
8625 mp->max_address = max_address;
8628 mp->prev = max_mp->prev;
8630 if (mp->prev != NULL)
8631 mp->prev->next = mp;
8633 minipool_vector_head = mp;
8636 /* Save the new entry. */
8639 /* Scan over the preceding entries and adjust their addresses as
8641 while (mp->prev != NULL
8642 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8644 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8652 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8653 HOST_WIDE_INT min_address)
8655 HOST_WIDE_INT offset;
8657 /* The code below assumes these are different. */
8658 gcc_assert (mp != min_mp);
8662 if (min_address > mp->min_address)
8663 mp->min_address = min_address;
8667 /* We will adjust this below if it is too loose. */
8668 mp->min_address = min_address;
8670 /* Unlink MP from its current position. Since min_mp is non-null,
8671 mp->next must be non-null. */
8672 mp->next->prev = mp->prev;
8673 if (mp->prev != NULL)
8674 mp->prev->next = mp->next;
8676 minipool_vector_head = mp->next;
8678 /* Reinsert it after MIN_MP. */
8680 mp->next = min_mp->next;
8682 if (mp->next != NULL)
8683 mp->next->prev = mp;
8685 minipool_vector_tail = mp;
8691 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8693 mp->offset = offset;
8694 if (mp->refcount > 0)
8695 offset += mp->fix_size;
8697 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8698 mp->next->min_address = mp->min_address + mp->fix_size;
8704 /* Add a constant to the minipool for a backward reference. Returns the
8705 node added or NULL if the constant will not fit in this pool.
8707 Note that the code for insertion for a backwards reference can be
8708 somewhat confusing because the calculated offsets for each fix do
8709 not take into account the size of the pool (which is still under
8712 add_minipool_backward_ref (Mfix *fix)
8714 /* If set, min_mp is the last pool_entry that has a lower constraint
8715 than the one we are trying to add. */
8716 Mnode *min_mp = NULL;
8717 /* This can be negative, since it is only a constraint. */
8718 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8721 /* If we can't reach the current pool from this insn, or if we can't
8722 insert this entry at the end of the pool without pushing other
8723 fixes out of range, then we don't try. This ensures that we
8724 can't fail later on. */
8725 if (min_address >= minipool_barrier->address
8726 || (minipool_vector_tail->min_address + fix->fix_size
8727 >= minipool_barrier->address))
8730 /* Scan the pool to see if a constant with the same value has
8731 already been added. While we are doing this, also note the
8732 location where we must insert the constant if it doesn't already
8734 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8736 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8737 && fix->mode == mp->mode
8738 && (GET_CODE (fix->value) != CODE_LABEL
8739 || (CODE_LABEL_NUMBER (fix->value)
8740 == CODE_LABEL_NUMBER (mp->value)))
8741 && rtx_equal_p (fix->value, mp->value)
8742 /* Check that there is enough slack to move this entry to the
8743 end of the table (this is conservative). */
8745 > (minipool_barrier->address
8746 + minipool_vector_tail->offset
8747 + minipool_vector_tail->fix_size)))
8750 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8754 mp->min_address += fix->fix_size;
8757 /* Note the insertion point if necessary. */
8758 if (mp->min_address < min_address)
8760 /* For now, we do not allow the insertion of 8-byte alignment
8761 requiring nodes anywhere but at the start of the pool. */
8762 if (ARM_DOUBLEWORD_ALIGN
8763 && fix->fix_size >= 8 && mp->fix_size < 8)
8768 else if (mp->max_address
8769 < minipool_barrier->address + mp->offset + fix->fix_size)
8771 /* Inserting before this entry would push the fix beyond
8772 its maximum address (which can happen if we have
8773 re-located a forwards fix); force the new fix to come
8776 min_address = mp->min_address + fix->fix_size;
8778 /* If we are inserting an 8-bytes aligned quantity and
8779 we have not already found an insertion point, then
8780 make sure that all such 8-byte aligned quantities are
8781 placed at the start of the pool. */
8782 else if (ARM_DOUBLEWORD_ALIGN
8784 && fix->fix_size >= 8
8785 && mp->fix_size < 8)
8788 min_address = mp->min_address + fix->fix_size;
8793 /* We need to create a new entry. */
8795 mp->fix_size = fix->fix_size;
8796 mp->mode = fix->mode;
8797 mp->value = fix->value;
8799 mp->max_address = minipool_barrier->address + 65536;
8801 mp->min_address = min_address;
8806 mp->next = minipool_vector_head;
8808 if (mp->next == NULL)
8810 minipool_vector_tail = mp;
8811 minipool_vector_label = gen_label_rtx ();
8814 mp->next->prev = mp;
8816 minipool_vector_head = mp;
8820 mp->next = min_mp->next;
8824 if (mp->next != NULL)
8825 mp->next->prev = mp;
8827 minipool_vector_tail = mp;
8830 /* Save the new entry. */
8838 /* Scan over the following entries and adjust their offsets. */
8839 while (mp->next != NULL)
8841 if (mp->next->min_address < mp->min_address + mp->fix_size)
8842 mp->next->min_address = mp->min_address + mp->fix_size;
8845 mp->next->offset = mp->offset + mp->fix_size;
8847 mp->next->offset = mp->offset;
8856 assign_minipool_offsets (Mfix *barrier)
8858 HOST_WIDE_INT offset = 0;
8861 minipool_barrier = barrier;
8863 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8865 mp->offset = offset;
8867 if (mp->refcount > 0)
8868 offset += mp->fix_size;
8872 /* Output the literal table */
8874 dump_minipool (rtx scan)
8880 if (ARM_DOUBLEWORD_ALIGN)
8881 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8882 if (mp->refcount > 0 && mp->fix_size >= 8)
8890 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8891 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8893 scan = emit_label_after (gen_label_rtx (), scan);
8894 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8895 scan = emit_label_after (minipool_vector_label, scan);
8897 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8899 if (mp->refcount > 0)
8904 ";; Offset %u, min %ld, max %ld ",
8905 (unsigned) mp->offset, (unsigned long) mp->min_address,
8906 (unsigned long) mp->max_address);
8907 arm_print_value (dump_file, mp->value);
8908 fputc ('\n', dump_file);
8911 switch (mp->fix_size)
8913 #ifdef HAVE_consttable_1
8915 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8919 #ifdef HAVE_consttable_2
8921 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8925 #ifdef HAVE_consttable_4
8927 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8931 #ifdef HAVE_consttable_8
8933 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8937 #ifdef HAVE_consttable_16
8939 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8952 minipool_vector_head = minipool_vector_tail = NULL;
8953 scan = emit_insn_after (gen_consttable_end (), scan);
8954 scan = emit_barrier_after (scan);
8957 /* Return the cost of forcibly inserting a barrier after INSN. */
8959 arm_barrier_cost (rtx insn)
8961 /* Basing the location of the pool on the loop depth is preferable,
8962 but at the moment, the basic block information seems to be
8963 corrupt by this stage of the compilation. */
8965 rtx next = next_nonnote_insn (insn);
8967 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8970 switch (GET_CODE (insn))
8973 /* It will always be better to place the table before the label, rather
8982 return base_cost - 10;
8985 return base_cost + 10;
8989 /* Find the best place in the insn stream in the range
8990 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8991 Create the barrier by inserting a jump and add a new fix entry for
8994 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8996 HOST_WIDE_INT count = 0;
8998 rtx from = fix->insn;
8999 /* The instruction after which we will insert the jump. */
9000 rtx selected = NULL;
9002 /* The address at which the jump instruction will be placed. */
9003 HOST_WIDE_INT selected_address;
9005 HOST_WIDE_INT max_count = max_address - fix->address;
9006 rtx label = gen_label_rtx ();
9008 selected_cost = arm_barrier_cost (from);
9009 selected_address = fix->address;
9011 while (from && count < max_count)
9016 /* This code shouldn't have been called if there was a natural barrier
9018 gcc_assert (GET_CODE (from) != BARRIER);
9020 /* Count the length of this insn. */
9021 count += get_attr_length (from);
9023 /* If there is a jump table, add its length. */
9024 tmp = is_jump_table (from);
9027 count += get_jump_table_size (tmp);
9029 /* Jump tables aren't in a basic block, so base the cost on
9030 the dispatch insn. If we select this location, we will
9031 still put the pool after the table. */
9032 new_cost = arm_barrier_cost (from);
9034 if (count < max_count
9035 && (!selected || new_cost <= selected_cost))
9038 selected_cost = new_cost;
9039 selected_address = fix->address + count;
9042 /* Continue after the dispatch table. */
9043 from = NEXT_INSN (tmp);
9047 new_cost = arm_barrier_cost (from);
9049 if (count < max_count
9050 && (!selected || new_cost <= selected_cost))
9053 selected_cost = new_cost;
9054 selected_address = fix->address + count;
9057 from = NEXT_INSN (from);
9060 /* Make sure that we found a place to insert the jump. */
9061 gcc_assert (selected);
9063 /* Create a new JUMP_INSN that branches around a barrier. */
9064 from = emit_jump_insn_after (gen_jump (label), selected);
9065 JUMP_LABEL (from) = label;
9066 barrier = emit_barrier_after (from);
9067 emit_label_after (label, barrier);
9069 /* Create a minipool barrier entry for the new barrier. */
9070 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9071 new_fix->insn = barrier;
9072 new_fix->address = selected_address;
9073 new_fix->next = fix->next;
9074 fix->next = new_fix;
9079 /* Record that there is a natural barrier in the insn stream at
9082 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9084 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9087 fix->address = address;
9090 if (minipool_fix_head != NULL)
9091 minipool_fix_tail->next = fix;
9093 minipool_fix_head = fix;
9095 minipool_fix_tail = fix;
9098 /* Record INSN, which will need fixing up to load a value from the
9099 minipool. ADDRESS is the offset of the insn since the start of the
9100 function; LOC is a pointer to the part of the insn which requires
9101 fixing; VALUE is the constant that must be loaded, which is of type
9104 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9105 enum machine_mode mode, rtx value)
9107 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9110 fix->address = address;
9113 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9115 fix->forwards = get_attr_pool_range (insn);
9116 fix->backwards = get_attr_neg_pool_range (insn);
9117 fix->minipool = NULL;
9119 /* If an insn doesn't have a range defined for it, then it isn't
9120 expecting to be reworked by this code. Better to stop now than
9121 to generate duff assembly code. */
9122 gcc_assert (fix->forwards || fix->backwards);
9124 /* If an entry requires 8-byte alignment then assume all constant pools
9125 require 4 bytes of padding. Trying to do this later on a per-pool
9126 basis is awkward because existing pool entries have to be modified. */
9127 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9133 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9134 GET_MODE_NAME (mode),
9135 INSN_UID (insn), (unsigned long) address,
9136 -1 * (long)fix->backwards, (long)fix->forwards);
9137 arm_print_value (dump_file, fix->value);
9138 fprintf (dump_file, "\n");
9141 /* Add it to the chain of fixes. */
9144 if (minipool_fix_head != NULL)
9145 minipool_fix_tail->next = fix;
9147 minipool_fix_head = fix;
9149 minipool_fix_tail = fix;
9152 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9153 Returns the number of insns needed, or 99 if we don't know how to
9156 arm_const_double_inline_cost (rtx val)
9158 rtx lowpart, highpart;
9159 enum machine_mode mode;
9161 mode = GET_MODE (val);
9163 if (mode == VOIDmode)
9166 gcc_assert (GET_MODE_SIZE (mode) == 8);
9168 lowpart = gen_lowpart (SImode, val);
9169 highpart = gen_highpart_mode (SImode, mode, val);
9171 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9172 gcc_assert (GET_CODE (highpart) == CONST_INT);
9174 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9175 NULL_RTX, NULL_RTX, 0, 0)
9176 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9177 NULL_RTX, NULL_RTX, 0, 0));
9180 /* Return true if it is worthwhile to split a 64-bit constant into two
9181 32-bit operations. This is the case if optimizing for size, or
9182 if we have load delay slots, or if one 32-bit part can be done with
9183 a single data operation. */
9185 arm_const_double_by_parts (rtx val)
9187 enum machine_mode mode = GET_MODE (val);
9190 if (optimize_size || arm_ld_sched)
9193 if (mode == VOIDmode)
9196 part = gen_highpart_mode (SImode, mode, val);
9198 gcc_assert (GET_CODE (part) == CONST_INT);
9200 if (const_ok_for_arm (INTVAL (part))
9201 || const_ok_for_arm (~INTVAL (part)))
9204 part = gen_lowpart (SImode, val);
9206 gcc_assert (GET_CODE (part) == CONST_INT);
9208 if (const_ok_for_arm (INTVAL (part))
9209 || const_ok_for_arm (~INTVAL (part)))
9215 /* Scan INSN and note any of its operands that need fixing.
9216 If DO_PUSHES is false we do not actually push any of the fixups
9217 needed. The function returns TRUE if any fixups were needed/pushed.
9218 This is used by arm_memory_load_p() which needs to know about loads
9219 of constants that will be converted into minipool loads. */
9221 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9223 bool result = false;
9226 extract_insn (insn);
9228 if (!constrain_operands (1))
9229 fatal_insn_not_found (insn);
9231 if (recog_data.n_alternatives == 0)
9234 /* Fill in recog_op_alt with information about the constraints of
9236 preprocess_constraints ();
9238 for (opno = 0; opno < recog_data.n_operands; opno++)
9240 /* Things we need to fix can only occur in inputs. */
9241 if (recog_data.operand_type[opno] != OP_IN)
9244 /* If this alternative is a memory reference, then any mention
9245 of constants in this alternative is really to fool reload
9246 into allowing us to accept one there. We need to fix them up
9247 now so that we output the right code. */
9248 if (recog_op_alt[opno][which_alternative].memory_ok)
9250 rtx op = recog_data.operand[opno];
9252 if (CONSTANT_P (op))
9255 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9256 recog_data.operand_mode[opno], op);
9259 else if (GET_CODE (op) == MEM
9260 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9261 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9265 rtx cop = avoid_constant_pool_reference (op);
9267 /* Casting the address of something to a mode narrower
9268 than a word can cause avoid_constant_pool_reference()
9269 to return the pool reference itself. That's no good to
9270 us here. Lets just hope that we can use the
9271 constant pool value directly. */
9273 cop = get_pool_constant (XEXP (op, 0));
9275 push_minipool_fix (insn, address,
9276 recog_data.operand_loc[opno],
9277 recog_data.operand_mode[opno], cop);
9288 /* Gcc puts the pool in the wrong place for ARM, since we can only
9289 load addresses a limited distance around the pc. We do some
9290 special munging to move the constant pool values to the correct
9291 point in the code. */
9296 HOST_WIDE_INT address = 0;
9299 minipool_fix_head = minipool_fix_tail = NULL;
9301 /* The first insn must always be a note, or the code below won't
9302 scan it properly. */
9303 insn = get_insns ();
9304 gcc_assert (GET_CODE (insn) == NOTE);
9307 /* Scan all the insns and record the operands that will need fixing. */
9308 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9310 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9311 && (arm_cirrus_insn_p (insn)
9312 || GET_CODE (insn) == JUMP_INSN
9313 || arm_memory_load_p (insn)))
9314 cirrus_reorg (insn);
9316 if (GET_CODE (insn) == BARRIER)
9317 push_minipool_barrier (insn, address);
9318 else if (INSN_P (insn))
9322 note_invalid_constants (insn, address, true);
9323 address += get_attr_length (insn);
9325 /* If the insn is a vector jump, add the size of the table
9326 and skip the table. */
9327 if ((table = is_jump_table (insn)) != NULL)
9329 address += get_jump_table_size (table);
9335 fix = minipool_fix_head;
9337 /* Now scan the fixups and perform the required changes. */
9342 Mfix * last_added_fix;
9343 Mfix * last_barrier = NULL;
9346 /* Skip any further barriers before the next fix. */
9347 while (fix && GET_CODE (fix->insn) == BARRIER)
9350 /* No more fixes. */
9354 last_added_fix = NULL;
9356 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9358 if (GET_CODE (ftmp->insn) == BARRIER)
9360 if (ftmp->address >= minipool_vector_head->max_address)
9363 last_barrier = ftmp;
9365 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9368 last_added_fix = ftmp; /* Keep track of the last fix added. */
9371 /* If we found a barrier, drop back to that; any fixes that we
9372 could have reached but come after the barrier will now go in
9373 the next mini-pool. */
9374 if (last_barrier != NULL)
9376 /* Reduce the refcount for those fixes that won't go into this
9378 for (fdel = last_barrier->next;
9379 fdel && fdel != ftmp;
9382 fdel->minipool->refcount--;
9383 fdel->minipool = NULL;
9386 ftmp = last_barrier;
9390 /* ftmp is first fix that we can't fit into this pool and
9391 there no natural barriers that we could use. Insert a
9392 new barrier in the code somewhere between the previous
9393 fix and this one, and arrange to jump around it. */
9394 HOST_WIDE_INT max_address;
9396 /* The last item on the list of fixes must be a barrier, so
9397 we can never run off the end of the list of fixes without
9398 last_barrier being set. */
9401 max_address = minipool_vector_head->max_address;
9402 /* Check that there isn't another fix that is in range that
9403 we couldn't fit into this pool because the pool was
9404 already too large: we need to put the pool before such an
9405 instruction. The pool itself may come just after the
9406 fix because create_fix_barrier also allows space for a
9407 jump instruction. */
9408 if (ftmp->address < max_address)
9409 max_address = ftmp->address + 1;
9411 last_barrier = create_fix_barrier (last_added_fix, max_address);
9414 assign_minipool_offsets (last_barrier);
9418 if (GET_CODE (ftmp->insn) != BARRIER
9419 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9426 /* Scan over the fixes we have identified for this pool, fixing them
9427 up and adding the constants to the pool itself. */
9428 for (this_fix = fix; this_fix && ftmp != this_fix;
9429 this_fix = this_fix->next)
9430 if (GET_CODE (this_fix->insn) != BARRIER)
9433 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9434 minipool_vector_label),
9435 this_fix->minipool->offset);
9436 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9439 dump_minipool (last_barrier->insn);
9443 /* From now on we must synthesize any constants that we can't handle
9444 directly. This can happen if the RTL gets split during final
9445 instruction generation. */
9446 after_arm_reorg = 1;
9448 /* Free the minipool memory. */
9449 obstack_free (&minipool_obstack, minipool_startobj);
9452 /* Routines to output assembly language. */
9454 /* If the rtx is the correct value then return the string of the number.
9455 In this way we can ensure that valid double constants are generated even
9456 when cross compiling. */
9458 fp_immediate_constant (rtx x)
9463 if (!fp_consts_inited)
9466 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9467 for (i = 0; i < 8; i++)
9468 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9469 return strings_fp[i];
9474 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9476 fp_const_from_val (REAL_VALUE_TYPE *r)
9480 if (!fp_consts_inited)
9483 for (i = 0; i < 8; i++)
9484 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9485 return strings_fp[i];
9490 /* Output the operands of a LDM/STM instruction to STREAM.
9491 MASK is the ARM register set mask of which only bits 0-15 are important.
9492 REG is the base register, either the frame pointer or the stack pointer,
9493 INSTR is the possibly suffixed load or store instruction.
9494 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9497 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9498 unsigned long mask, int rfe)
9501 bool not_first = FALSE;
9503 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9504 fputc ('\t', stream);
9505 asm_fprintf (stream, instr, reg);
9506 fputc ('{', stream);
9508 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9509 if (mask & (1 << i))
9512 fprintf (stream, ", ");
9514 asm_fprintf (stream, "%r", i);
9519 fprintf (stream, "}^\n");
9521 fprintf (stream, "}\n");
9525 /* Output a FLDMD instruction to STREAM.
9526 BASE if the register containing the address.
9527 REG and COUNT specify the register range.
9528 Extra registers may be added to avoid hardware bugs.
9530 We output FLDMD even for ARMv5 VFP implementations. Although
9531 FLDMD is technically not supported until ARMv6, it is believed
9532 that all VFP implementations support its use in this context. */
9535 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9539 /* Workaround ARM10 VFPr1 bug. */
9540 if (count == 2 && !arm_arch6)
9547 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9548 load into multiple parts if we have to handle more than 16 registers. */
9551 vfp_output_fldmd (stream, base, reg, 16);
9552 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9556 fputc ('\t', stream);
9557 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9559 for (i = reg; i < reg + count; i++)
9562 fputs (", ", stream);
9563 asm_fprintf (stream, "d%d", i);
9565 fputs ("}\n", stream);
9570 /* Output the assembly for a store multiple. */
9573 vfp_output_fstmd (rtx * operands)
9580 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9581 p = strlen (pattern);
9583 gcc_assert (GET_CODE (operands[1]) == REG);
9585 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9586 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9588 p += sprintf (&pattern[p], ", d%d", base + i);
9590 strcpy (&pattern[p], "}");
9592 output_asm_insn (pattern, operands);
9597 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9598 number of bytes pushed. */
9601 vfp_emit_fstmd (int base_reg, int count)
9608 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9609 register pairs are stored by a store multiple insn. We avoid this
9610 by pushing an extra pair. */
9611 if (count == 2 && !arm_arch6)
9613 if (base_reg == LAST_VFP_REGNUM - 3)
9618 /* FSTMD may not store more than 16 doubleword registers at once. Split
9619 larger stores into multiple parts (up to a maximum of two, in
9624 /* NOTE: base_reg is an internal register number, so each D register
9626 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9627 saved += vfp_emit_fstmd (base_reg, 16);
9631 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9632 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9634 reg = gen_rtx_REG (DFmode, base_reg);
9638 = gen_rtx_SET (VOIDmode,
9639 gen_frame_mem (BLKmode,
9640 gen_rtx_PRE_DEC (BLKmode,
9641 stack_pointer_rtx)),
9642 gen_rtx_UNSPEC (BLKmode,
9646 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9647 plus_constant (stack_pointer_rtx, -(count * 8)));
9648 RTX_FRAME_RELATED_P (tmp) = 1;
9649 XVECEXP (dwarf, 0, 0) = tmp;
9651 tmp = gen_rtx_SET (VOIDmode,
9652 gen_frame_mem (DFmode, stack_pointer_rtx),
9654 RTX_FRAME_RELATED_P (tmp) = 1;
9655 XVECEXP (dwarf, 0, 1) = tmp;
9657 for (i = 1; i < count; i++)
9659 reg = gen_rtx_REG (DFmode, base_reg);
9661 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9663 tmp = gen_rtx_SET (VOIDmode,
9664 gen_frame_mem (DFmode,
9665 plus_constant (stack_pointer_rtx,
9668 RTX_FRAME_RELATED_P (tmp) = 1;
9669 XVECEXP (dwarf, 0, i + 1) = tmp;
9672 par = emit_insn (par);
9673 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9675 RTX_FRAME_RELATED_P (par) = 1;
9680 /* Emit a call instruction with pattern PAT. ADDR is the address of
9684 arm_emit_call_insn (rtx pat, rtx addr)
9688 insn = emit_call_insn (pat);
9690 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9691 If the call might use such an entry, add a use of the PIC register
9692 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9693 if (TARGET_VXWORKS_RTP
9695 && GET_CODE (addr) == SYMBOL_REF
9696 && (SYMBOL_REF_DECL (addr)
9697 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9698 : !SYMBOL_REF_LOCAL_P (addr)))
9700 require_pic_register ();
9701 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9705 /* Output a 'call' insn. */
9707 output_call (rtx *operands)
9709 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9711 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9712 if (REGNO (operands[0]) == LR_REGNUM)
9714 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9715 output_asm_insn ("mov%?\t%0, %|lr", operands);
9718 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9720 if (TARGET_INTERWORK || arm_arch4t)
9721 output_asm_insn ("bx%?\t%0", operands);
9723 output_asm_insn ("mov%?\t%|pc, %0", operands);
9728 /* Output a 'call' insn that is a reference in memory. */
9730 output_call_mem (rtx *operands)
9732 if (TARGET_INTERWORK && !arm_arch5)
9734 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9735 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9736 output_asm_insn ("bx%?\t%|ip", operands);
9738 else if (regno_use_in (LR_REGNUM, operands[0]))
9740 /* LR is used in the memory address. We load the address in the
9741 first instruction. It's safe to use IP as the target of the
9742 load since the call will kill it anyway. */
9743 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9745 output_asm_insn ("blx%?\t%|ip", operands);
9748 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9750 output_asm_insn ("bx%?\t%|ip", operands);
9752 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9757 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9758 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9765 /* Output a move from arm registers to an fpa registers.
9766 OPERANDS[0] is an fpa register.
9767 OPERANDS[1] is the first registers of an arm register pair. */
9769 output_mov_long_double_fpa_from_arm (rtx *operands)
9771 int arm_reg0 = REGNO (operands[1]);
9774 gcc_assert (arm_reg0 != IP_REGNUM);
9776 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9777 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9778 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9780 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9781 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9786 /* Output a move from an fpa register to arm registers.
9787 OPERANDS[0] is the first registers of an arm register pair.
9788 OPERANDS[1] is an fpa register. */
9790 output_mov_long_double_arm_from_fpa (rtx *operands)
9792 int arm_reg0 = REGNO (operands[0]);
9795 gcc_assert (arm_reg0 != IP_REGNUM);
9797 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9798 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9799 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9801 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9802 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9806 /* Output a move from arm registers to arm registers of a long double
9807 OPERANDS[0] is the destination.
9808 OPERANDS[1] is the source. */
9810 output_mov_long_double_arm_from_arm (rtx *operands)
9812 /* We have to be careful here because the two might overlap. */
9813 int dest_start = REGNO (operands[0]);
9814 int src_start = REGNO (operands[1]);
9818 if (dest_start < src_start)
9820 for (i = 0; i < 3; i++)
9822 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9823 ops[1] = gen_rtx_REG (SImode, src_start + i);
9824 output_asm_insn ("mov%?\t%0, %1", ops);
9829 for (i = 2; i >= 0; i--)
9831 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9832 ops[1] = gen_rtx_REG (SImode, src_start + i);
9833 output_asm_insn ("mov%?\t%0, %1", ops);
9841 /* Output a move from arm registers to an fpa registers.
9842 OPERANDS[0] is an fpa register.
9843 OPERANDS[1] is the first registers of an arm register pair. */
9845 output_mov_double_fpa_from_arm (rtx *operands)
9847 int arm_reg0 = REGNO (operands[1]);
9850 gcc_assert (arm_reg0 != IP_REGNUM);
9852 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9853 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9854 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9855 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9859 /* Output a move from an fpa register to arm registers.
9860 OPERANDS[0] is the first registers of an arm register pair.
9861 OPERANDS[1] is an fpa register. */
9863 output_mov_double_arm_from_fpa (rtx *operands)
9865 int arm_reg0 = REGNO (operands[0]);
9868 gcc_assert (arm_reg0 != IP_REGNUM);
9870 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9871 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9872 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9873 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9877 /* Output a move between double words.
9878 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9879 or MEM<-REG and all MEMs must be offsettable addresses. */
9881 output_move_double (rtx *operands)
9883 enum rtx_code code0 = GET_CODE (operands[0]);
9884 enum rtx_code code1 = GET_CODE (operands[1]);
9889 int reg0 = REGNO (operands[0]);
9891 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9893 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9895 switch (GET_CODE (XEXP (operands[1], 0)))
9899 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
9901 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9905 gcc_assert (TARGET_LDRD);
9906 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9911 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9913 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9918 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
9920 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9924 gcc_assert (TARGET_LDRD);
9925 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9930 otherops[0] = operands[0];
9931 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9932 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9934 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9936 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9938 /* Registers overlap so split out the increment. */
9939 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9940 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9944 /* IWMMXT allows offsets larger than ldrd can handle,
9945 fix these up with a pair of ldr. */
9946 if (GET_CODE (otherops[2]) == CONST_INT
9947 && (INTVAL(otherops[2]) <= -256
9948 || INTVAL(otherops[2]) >= 256))
9950 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9951 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9952 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9955 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9960 /* IWMMXT allows offsets larger than ldrd can handle,
9961 fix these up with a pair of ldr. */
9962 if (GET_CODE (otherops[2]) == CONST_INT
9963 && (INTVAL(otherops[2]) <= -256
9964 || INTVAL(otherops[2]) >= 256))
9966 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9967 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9968 otherops[0] = operands[0];
9969 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9972 /* We only allow constant increments, so this is safe. */
9973 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9979 /* We might be able to use ldrd %0, %1 here. However the range is
9980 different to ldr/adr, and it is broken on some ARMv7-M
9982 output_asm_insn ("adr%?\t%0, %1", operands);
9984 output_asm_insn ("ldr%(d%)\t%0, [%0]", operands);
9986 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9989 /* ??? This needs checking for thumb2. */
9991 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9992 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9994 otherops[0] = operands[0];
9995 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9996 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9998 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10000 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10002 switch ((int) INTVAL (otherops[2]))
10005 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10010 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10015 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10020 && (GET_CODE (otherops[2]) == REG
10021 || (GET_CODE (otherops[2]) == CONST_INT
10022 && INTVAL (otherops[2]) > -256
10023 && INTVAL (otherops[2]) < 256)))
10025 if (reg_overlap_mentioned_p (otherops[0],
10028 /* Swap base and index registers over to
10029 avoid a conflict. */
10030 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
10031 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
10033 /* If both registers conflict, it will usually
10034 have been fixed by a splitter. */
10035 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10037 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10038 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10042 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10046 if (GET_CODE (otherops[2]) == CONST_INT)
10048 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10049 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10051 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10054 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10057 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10060 return "ldr%(d%)\t%0, [%0]";
10062 return "ldm%(ia%)\t%0, %M0";
10066 otherops[1] = adjust_address (operands[1], SImode, 4);
10067 /* Take care of overlapping base/data reg. */
10068 if (reg_mentioned_p (operands[0], operands[1]))
10070 output_asm_insn ("ldr%?\t%0, %1", otherops);
10071 output_asm_insn ("ldr%?\t%0, %1", operands);
10075 output_asm_insn ("ldr%?\t%0, %1", operands);
10076 output_asm_insn ("ldr%?\t%0, %1", otherops);
10083 /* Constraints should ensure this. */
10084 gcc_assert (code0 == MEM && code1 == REG);
10085 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10087 switch (GET_CODE (XEXP (operands[0], 0)))
10091 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10093 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10097 gcc_assert (TARGET_LDRD);
10098 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10103 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10105 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10110 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10112 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10116 gcc_assert (TARGET_LDRD);
10117 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10122 otherops[0] = operands[1];
10123 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10124 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10126 /* IWMMXT allows offsets larger than ldrd can handle,
10127 fix these up with a pair of ldr. */
10128 if (GET_CODE (otherops[2]) == CONST_INT
10129 && (INTVAL(otherops[2]) <= -256
10130 || INTVAL(otherops[2]) >= 256))
10133 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10134 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10136 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10137 otherops[0] = reg1;
10138 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10142 otherops[0] = reg1;
10143 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10144 otherops[0] = operands[1];
10145 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10148 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10149 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10151 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10155 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10156 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10158 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10161 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10167 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10173 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10178 && (GET_CODE (otherops[2]) == REG
10179 || (GET_CODE (otherops[2]) == CONST_INT
10180 && INTVAL (otherops[2]) > -256
10181 && INTVAL (otherops[2]) < 256)))
10183 otherops[0] = operands[1];
10184 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10185 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10191 otherops[0] = adjust_address (operands[0], SImode, 4);
10192 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10193 output_asm_insn ("str%?\t%1, %0", operands);
10194 output_asm_insn ("str%?\t%1, %0", otherops);
10201 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10202 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10205 output_move_quad (rtx *operands)
10207 if (REG_P (operands[0]))
10209 /* Load, or reg->reg move. */
10211 if (MEM_P (operands[1]))
10213 switch (GET_CODE (XEXP (operands[1], 0)))
10216 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10221 output_asm_insn ("adr%?\t%0, %1", operands);
10222 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10226 gcc_unreachable ();
10234 gcc_assert (REG_P (operands[1]));
10236 dest = REGNO (operands[0]);
10237 src = REGNO (operands[1]);
10239 /* This seems pretty dumb, but hopefully GCC won't try to do it
10242 for (i = 0; i < 4; i++)
10244 ops[0] = gen_rtx_REG (SImode, dest + i);
10245 ops[1] = gen_rtx_REG (SImode, src + i);
10246 output_asm_insn ("mov%?\t%0, %1", ops);
10249 for (i = 3; i >= 0; i--)
10251 ops[0] = gen_rtx_REG (SImode, dest + i);
10252 ops[1] = gen_rtx_REG (SImode, src + i);
10253 output_asm_insn ("mov%?\t%0, %1", ops);
10259 gcc_assert (MEM_P (operands[0]));
10260 gcc_assert (REG_P (operands[1]));
10261 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10263 switch (GET_CODE (XEXP (operands[0], 0)))
10266 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10270 gcc_unreachable ();
10277 /* Output a VFP load or store instruction. */
10280 output_move_vfp (rtx *operands)
10282 rtx reg, mem, addr, ops[2];
10283 int load = REG_P (operands[0]);
10284 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10285 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10288 enum machine_mode mode;
10290 reg = operands[!load];
10291 mem = operands[load];
10293 mode = GET_MODE (reg);
10295 gcc_assert (REG_P (reg));
10296 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10297 gcc_assert (mode == SFmode
10301 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10302 gcc_assert (MEM_P (mem));
10304 addr = XEXP (mem, 0);
10306 switch (GET_CODE (addr))
10309 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10310 ops[0] = XEXP (addr, 0);
10315 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10316 ops[0] = XEXP (addr, 0);
10321 templ = "f%s%c%%?\t%%%s0, %%1%s";
10327 sprintf (buff, templ,
10328 load ? "ld" : "st",
10331 integer_p ? "\t%@ int" : "");
10332 output_asm_insn (buff, ops);
10337 /* Output a Neon quad-word load or store, or a load or store for
10338 larger structure modes.
10340 WARNING: The ordering of elements is weird in big-endian mode,
10341 because we use VSTM, as required by the EABI. GCC RTL defines
10342 element ordering based on in-memory order. This can be differ
10343 from the architectural ordering of elements within a NEON register.
10344 The intrinsics defined in arm_neon.h use the NEON register element
10345 ordering, not the GCC RTL element ordering.
10347 For example, the in-memory ordering of a big-endian a quadword
10348 vector with 16-bit elements when stored from register pair {d0,d1}
10349 will be (lowest address first, d0[N] is NEON register element N):
10351 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
10353 When necessary, quadword registers (dN, dN+1) are moved to ARM
10354 registers from rN in the order:
10356 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10358 So that STM/LDM can be used on vectors in ARM registers, and the
10359 same memory layout will result as if VSTM/VLDM were used. */
10362 output_move_neon (rtx *operands)
10364 rtx reg, mem, addr, ops[2];
10365 int regno, load = REG_P (operands[0]);
10368 enum machine_mode mode;
10370 reg = operands[!load];
10371 mem = operands[load];
10373 mode = GET_MODE (reg);
10375 gcc_assert (REG_P (reg));
10376 regno = REGNO (reg);
10377 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10378 || NEON_REGNO_OK_FOR_QUAD (regno));
10379 gcc_assert (VALID_NEON_DREG_MODE (mode)
10380 || VALID_NEON_QREG_MODE (mode)
10381 || VALID_NEON_STRUCT_MODE (mode));
10382 gcc_assert (MEM_P (mem));
10384 addr = XEXP (mem, 0);
10386 /* Strip off const from addresses like (const (plus (...))). */
10387 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10388 addr = XEXP (addr, 0);
10390 switch (GET_CODE (addr))
10393 templ = "v%smia%%?\t%%0!, %%h1";
10394 ops[0] = XEXP (addr, 0);
10399 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10400 gcc_unreachable ();
10405 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10408 for (i = 0; i < nregs; i++)
10410 /* We're only using DImode here because it's a convenient size. */
10411 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10412 ops[1] = adjust_address (mem, SImode, 8 * i);
10413 if (reg_overlap_mentioned_p (ops[0], mem))
10415 gcc_assert (overlap == -1);
10420 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10421 output_asm_insn (buff, ops);
10426 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10427 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10428 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10429 output_asm_insn (buff, ops);
10436 templ = "v%smia%%?\t%%m0, %%h1";
10441 sprintf (buff, templ, load ? "ld" : "st");
10442 output_asm_insn (buff, ops);
10447 /* Output an ADD r, s, #n where n may be too big for one instruction.
10448 If adding zero to one register, output nothing. */
10450 output_add_immediate (rtx *operands)
10452 HOST_WIDE_INT n = INTVAL (operands[2]);
10454 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10457 output_multi_immediate (operands,
10458 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10461 output_multi_immediate (operands,
10462 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10469 /* Output a multiple immediate operation.
10470 OPERANDS is the vector of operands referred to in the output patterns.
10471 INSTR1 is the output pattern to use for the first constant.
10472 INSTR2 is the output pattern to use for subsequent constants.
10473 IMMED_OP is the index of the constant slot in OPERANDS.
10474 N is the constant value. */
10475 static const char *
10476 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10477 int immed_op, HOST_WIDE_INT n)
10479 #if HOST_BITS_PER_WIDE_INT > 32
10485 /* Quick and easy output. */
10486 operands[immed_op] = const0_rtx;
10487 output_asm_insn (instr1, operands);
10492 const char * instr = instr1;
10494 /* Note that n is never zero here (which would give no output). */
10495 for (i = 0; i < 32; i += 2)
10499 operands[immed_op] = GEN_INT (n & (255 << i));
10500 output_asm_insn (instr, operands);
10510 /* Return the name of a shifter operation. */
10511 static const char *
10512 arm_shift_nmem(enum rtx_code code)
10517 return ARM_LSL_NAME;
10533 /* Return the appropriate ARM instruction for the operation code.
10534 The returned result should not be overwritten. OP is the rtx of the
10535 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10538 arithmetic_instr (rtx op, int shift_first_arg)
10540 switch (GET_CODE (op))
10546 return shift_first_arg ? "rsb" : "sub";
10561 return arm_shift_nmem(GET_CODE(op));
10564 gcc_unreachable ();
10568 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10569 for the operation code. The returned result should not be overwritten.
10570 OP is the rtx code of the shift.
10571 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10573 static const char *
10574 shift_op (rtx op, HOST_WIDE_INT *amountp)
10577 enum rtx_code code = GET_CODE (op);
10579 switch (GET_CODE (XEXP (op, 1)))
10587 *amountp = INTVAL (XEXP (op, 1));
10591 gcc_unreachable ();
10597 gcc_assert (*amountp != -1);
10598 *amountp = 32 - *amountp;
10601 /* Fall through. */
10607 mnem = arm_shift_nmem(code);
10611 /* We never have to worry about the amount being other than a
10612 power of 2, since this case can never be reloaded from a reg. */
10613 gcc_assert (*amountp != -1);
10614 *amountp = int_log2 (*amountp);
10615 return ARM_LSL_NAME;
10618 gcc_unreachable ();
10621 if (*amountp != -1)
10623 /* This is not 100% correct, but follows from the desire to merge
10624 multiplication by a power of 2 with the recognizer for a
10625 shift. >=32 is not a valid shift for "lsl", so we must try and
10626 output a shift that produces the correct arithmetical result.
10627 Using lsr #32 is identical except for the fact that the carry bit
10628 is not set correctly if we set the flags; but we never use the
10629 carry bit from such an operation, so we can ignore that. */
10630 if (code == ROTATERT)
10631 /* Rotate is just modulo 32. */
10633 else if (*amountp != (*amountp & 31))
10635 if (code == ASHIFT)
10640 /* Shifts of 0 are no-ops. */
10648 /* Obtain the shift from the POWER of two. */
10650 static HOST_WIDE_INT
10651 int_log2 (HOST_WIDE_INT power)
10653 HOST_WIDE_INT shift = 0;
10655 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10657 gcc_assert (shift <= 31);
10664 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10665 because /bin/as is horribly restrictive. The judgement about
10666 whether or not each character is 'printable' (and can be output as
10667 is) or not (and must be printed with an octal escape) must be made
10668 with reference to the *host* character set -- the situation is
10669 similar to that discussed in the comments above pp_c_char in
10670 c-pretty-print.c. */
10672 #define MAX_ASCII_LEN 51
10675 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10678 int len_so_far = 0;
10680 fputs ("\t.ascii\t\"", stream);
10682 for (i = 0; i < len; i++)
10686 if (len_so_far >= MAX_ASCII_LEN)
10688 fputs ("\"\n\t.ascii\t\"", stream);
10694 if (c == '\\' || c == '\"')
10696 putc ('\\', stream);
10704 fprintf (stream, "\\%03o", c);
10709 fputs ("\"\n", stream);
10712 /* Compute the register save mask for registers 0 through 12
10713 inclusive. This code is used by arm_compute_save_reg_mask. */
10715 static unsigned long
10716 arm_compute_save_reg0_reg12_mask (void)
10718 unsigned long func_type = arm_current_func_type ();
10719 unsigned long save_reg_mask = 0;
10722 if (IS_INTERRUPT (func_type))
10724 unsigned int max_reg;
10725 /* Interrupt functions must not corrupt any registers,
10726 even call clobbered ones. If this is a leaf function
10727 we can just examine the registers used by the RTL, but
10728 otherwise we have to assume that whatever function is
10729 called might clobber anything, and so we have to save
10730 all the call-clobbered registers as well. */
10731 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10732 /* FIQ handlers have registers r8 - r12 banked, so
10733 we only need to check r0 - r7, Normal ISRs only
10734 bank r14 and r15, so we must check up to r12.
10735 r13 is the stack pointer which is always preserved,
10736 so we do not need to consider it here. */
10741 for (reg = 0; reg <= max_reg; reg++)
10742 if (df_regs_ever_live_p (reg)
10743 || (! current_function_is_leaf && call_used_regs[reg]))
10744 save_reg_mask |= (1 << reg);
10746 /* Also save the pic base register if necessary. */
10748 && !TARGET_SINGLE_PIC_BASE
10749 && arm_pic_register != INVALID_REGNUM
10750 && crtl->uses_pic_offset_table)
10751 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10755 /* In the normal case we only need to save those registers
10756 which are call saved and which are used by this function. */
10757 for (reg = 0; reg <= 11; reg++)
10758 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10759 save_reg_mask |= (1 << reg);
10761 /* Handle the frame pointer as a special case. */
10762 if (frame_pointer_needed)
10763 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10765 /* If we aren't loading the PIC register,
10766 don't stack it even though it may be live. */
10768 && !TARGET_SINGLE_PIC_BASE
10769 && arm_pic_register != INVALID_REGNUM
10770 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10771 || crtl->uses_pic_offset_table))
10772 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10774 /* The prologue will copy SP into R0, so save it. */
10775 if (IS_STACKALIGN (func_type))
10776 save_reg_mask |= 1;
10779 /* Save registers so the exception handler can modify them. */
10780 if (crtl->calls_eh_return)
10786 reg = EH_RETURN_DATA_REGNO (i);
10787 if (reg == INVALID_REGNUM)
10789 save_reg_mask |= 1 << reg;
10793 return save_reg_mask;
10797 /* Compute the number of bytes used to store the static chain register on the
10798 stack, above the stack frame. We need to know this accurately to get the
10799 alignment of the rest of the stack frame correct. */
10801 static int arm_compute_static_chain_stack_bytes (void)
10803 unsigned long func_type = arm_current_func_type ();
10804 int static_chain_stack_bytes = 0;
10806 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
10807 IS_NESTED (func_type) &&
10808 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
10809 static_chain_stack_bytes = 4;
10811 return static_chain_stack_bytes;
10815 /* Compute a bit mask of which registers need to be
10816 saved on the stack for the current function.
10817 This is used by arm_get_frame_offsets, which may add extra registers. */
10819 static unsigned long
10820 arm_compute_save_reg_mask (void)
10822 unsigned int save_reg_mask = 0;
10823 unsigned long func_type = arm_current_func_type ();
10826 if (IS_NAKED (func_type))
10827 /* This should never really happen. */
10830 /* If we are creating a stack frame, then we must save the frame pointer,
10831 IP (which will hold the old stack pointer), LR and the PC. */
10832 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10834 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10837 | (1 << PC_REGNUM);
10839 /* Volatile functions do not return, so there
10840 is no need to save any other registers. */
10841 if (IS_VOLATILE (func_type))
10842 return save_reg_mask;
10844 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10846 /* Decide if we need to save the link register.
10847 Interrupt routines have their own banked link register,
10848 so they never need to save it.
10849 Otherwise if we do not use the link register we do not need to save
10850 it. If we are pushing other registers onto the stack however, we
10851 can save an instruction in the epilogue by pushing the link register
10852 now and then popping it back into the PC. This incurs extra memory
10853 accesses though, so we only do it when optimizing for size, and only
10854 if we know that we will not need a fancy return sequence. */
10855 if (df_regs_ever_live_p (LR_REGNUM)
10858 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10859 && !crtl->calls_eh_return))
10860 save_reg_mask |= 1 << LR_REGNUM;
10862 if (cfun->machine->lr_save_eliminated)
10863 save_reg_mask &= ~ (1 << LR_REGNUM);
10865 if (TARGET_REALLY_IWMMXT
10866 && ((bit_count (save_reg_mask)
10867 + ARM_NUM_INTS (crtl->args.pretend_args_size +
10868 arm_compute_static_chain_stack_bytes())
10871 /* The total number of registers that are going to be pushed
10872 onto the stack is odd. We need to ensure that the stack
10873 is 64-bit aligned before we start to save iWMMXt registers,
10874 and also before we start to create locals. (A local variable
10875 might be a double or long long which we will load/store using
10876 an iWMMXt instruction). Therefore we need to push another
10877 ARM register, so that the stack will be 64-bit aligned. We
10878 try to avoid using the arg registers (r0 -r3) as they might be
10879 used to pass values in a tail call. */
10880 for (reg = 4; reg <= 12; reg++)
10881 if ((save_reg_mask & (1 << reg)) == 0)
10885 save_reg_mask |= (1 << reg);
10888 cfun->machine->sibcall_blocked = 1;
10889 save_reg_mask |= (1 << 3);
10893 /* We may need to push an additional register for use initializing the
10894 PIC base register. */
10895 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10896 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10898 reg = thumb_find_work_register (1 << 4);
10899 if (!call_used_regs[reg])
10900 save_reg_mask |= (1 << reg);
10903 return save_reg_mask;
10907 /* Compute a bit mask of which registers need to be
10908 saved on the stack for the current function. */
10909 static unsigned long
10910 thumb1_compute_save_reg_mask (void)
10912 unsigned long mask;
10916 for (reg = 0; reg < 12; reg ++)
10917 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10921 && !TARGET_SINGLE_PIC_BASE
10922 && arm_pic_register != INVALID_REGNUM
10923 && crtl->uses_pic_offset_table)
10924 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10926 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10927 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10928 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10930 /* LR will also be pushed if any lo regs are pushed. */
10931 if (mask & 0xff || thumb_force_lr_save ())
10932 mask |= (1 << LR_REGNUM);
10934 /* Make sure we have a low work register if we need one.
10935 We will need one if we are going to push a high register,
10936 but we are not currently intending to push a low register. */
10937 if ((mask & 0xff) == 0
10938 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10940 /* Use thumb_find_work_register to choose which register
10941 we will use. If the register is live then we will
10942 have to push it. Use LAST_LO_REGNUM as our fallback
10943 choice for the register to select. */
10944 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10945 /* Make sure the register returned by thumb_find_work_register is
10946 not part of the return value. */
10947 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
10948 reg = LAST_LO_REGNUM;
10950 if (! call_used_regs[reg])
10954 /* The 504 below is 8 bytes less than 512 because there are two possible
10955 alignment words. We can't tell here if they will be present or not so we
10956 have to play it safe and assume that they are. */
10957 if ((CALLER_INTERWORKING_SLOT_SIZE +
10958 ROUND_UP_WORD (get_frame_size ()) +
10959 crtl->outgoing_args_size) >= 504)
10961 /* This is the same as the code in thumb1_expand_prologue() which
10962 determines which register to use for stack decrement. */
10963 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
10964 if (mask & (1 << reg))
10967 if (reg > LAST_LO_REGNUM)
10969 /* Make sure we have a register available for stack decrement. */
10970 mask |= 1 << LAST_LO_REGNUM;
10978 /* Return the number of bytes required to save VFP registers. */
10980 arm_get_vfp_saved_size (void)
10982 unsigned int regno;
10987 /* Space for saved VFP registers. */
10988 if (TARGET_HARD_FLOAT && TARGET_VFP)
10991 for (regno = FIRST_VFP_REGNUM;
10992 regno < LAST_VFP_REGNUM;
10995 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10996 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11000 /* Workaround ARM10 VFPr1 bug. */
11001 if (count == 2 && !arm_arch6)
11003 saved += count * 8;
11012 if (count == 2 && !arm_arch6)
11014 saved += count * 8;
11021 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11022 everything bar the final return instruction. */
11024 output_return_instruction (rtx operand, int really_return, int reverse)
11026 char conditional[10];
11029 unsigned long live_regs_mask;
11030 unsigned long func_type;
11031 arm_stack_offsets *offsets;
11033 func_type = arm_current_func_type ();
11035 if (IS_NAKED (func_type))
11038 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11040 /* If this function was declared non-returning, and we have
11041 found a tail call, then we have to trust that the called
11042 function won't return. */
11047 /* Otherwise, trap an attempted return by aborting. */
11049 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11051 assemble_external_libcall (ops[1]);
11052 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11058 gcc_assert (!cfun->calls_alloca || really_return);
11060 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11062 return_used_this_function = 1;
11064 offsets = arm_get_frame_offsets ();
11065 live_regs_mask = offsets->saved_regs_mask;
11067 if (live_regs_mask)
11069 const char * return_reg;
11071 /* If we do not have any special requirements for function exit
11072 (e.g. interworking) then we can load the return address
11073 directly into the PC. Otherwise we must load it into LR. */
11075 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11076 return_reg = reg_names[PC_REGNUM];
11078 return_reg = reg_names[LR_REGNUM];
11080 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11082 /* There are three possible reasons for the IP register
11083 being saved. 1) a stack frame was created, in which case
11084 IP contains the old stack pointer, or 2) an ISR routine
11085 corrupted it, or 3) it was saved to align the stack on
11086 iWMMXt. In case 1, restore IP into SP, otherwise just
11088 if (frame_pointer_needed)
11090 live_regs_mask &= ~ (1 << IP_REGNUM);
11091 live_regs_mask |= (1 << SP_REGNUM);
11094 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11097 /* On some ARM architectures it is faster to use LDR rather than
11098 LDM to load a single register. On other architectures, the
11099 cost is the same. In 26 bit mode, or for exception handlers,
11100 we have to use LDM to load the PC so that the CPSR is also
11102 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11103 if (live_regs_mask == (1U << reg))
11106 if (reg <= LAST_ARM_REGNUM
11107 && (reg != LR_REGNUM
11109 || ! IS_INTERRUPT (func_type)))
11111 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11112 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11119 /* Generate the load multiple instruction to restore the
11120 registers. Note we can get here, even if
11121 frame_pointer_needed is true, but only if sp already
11122 points to the base of the saved core registers. */
11123 if (live_regs_mask & (1 << SP_REGNUM))
11125 unsigned HOST_WIDE_INT stack_adjust;
11127 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11128 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11130 if (stack_adjust && arm_arch5 && TARGET_ARM)
11131 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11134 /* If we can't use ldmib (SA110 bug),
11135 then try to pop r3 instead. */
11137 live_regs_mask |= 1 << 3;
11138 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11142 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11144 p = instr + strlen (instr);
11146 for (reg = 0; reg <= SP_REGNUM; reg++)
11147 if (live_regs_mask & (1 << reg))
11149 int l = strlen (reg_names[reg]);
11155 memcpy (p, ", ", 2);
11159 memcpy (p, "%|", 2);
11160 memcpy (p + 2, reg_names[reg], l);
11164 if (live_regs_mask & (1 << LR_REGNUM))
11166 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11167 /* If returning from an interrupt, restore the CPSR. */
11168 if (IS_INTERRUPT (func_type))
11175 output_asm_insn (instr, & operand);
11177 /* See if we need to generate an extra instruction to
11178 perform the actual function return. */
11180 && func_type != ARM_FT_INTERWORKED
11181 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11183 /* The return has already been handled
11184 by loading the LR into the PC. */
11191 switch ((int) ARM_FUNC_TYPE (func_type))
11195 /* ??? This is wrong for unified assembly syntax. */
11196 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11199 case ARM_FT_INTERWORKED:
11200 sprintf (instr, "bx%s\t%%|lr", conditional);
11203 case ARM_FT_EXCEPTION:
11204 /* ??? This is wrong for unified assembly syntax. */
11205 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11209 /* Use bx if it's available. */
11210 if (arm_arch5 || arm_arch4t)
11211 sprintf (instr, "bx%s\t%%|lr", conditional);
11213 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11217 output_asm_insn (instr, & operand);
11223 /* Write the function name into the code section, directly preceding
11224 the function prologue.
11226 Code will be output similar to this:
11228 .ascii "arm_poke_function_name", 0
11231 .word 0xff000000 + (t1 - t0)
11232 arm_poke_function_name
11234 stmfd sp!, {fp, ip, lr, pc}
11237 When performing a stack backtrace, code can inspect the value
11238 of 'pc' stored at 'fp' + 0. If the trace function then looks
11239 at location pc - 12 and the top 8 bits are set, then we know
11240 that there is a function name embedded immediately preceding this
11241 location and has length ((pc[-3]) & 0xff000000).
11243 We assume that pc is declared as a pointer to an unsigned long.
11245 It is of no benefit to output the function name if we are assembling
11246 a leaf function. These function types will not contain a stack
11247 backtrace structure, therefore it is not possible to determine the
11250 arm_poke_function_name (FILE *stream, const char *name)
11252 unsigned long alignlength;
11253 unsigned long length;
11256 length = strlen (name) + 1;
11257 alignlength = ROUND_UP_WORD (length);
11259 ASM_OUTPUT_ASCII (stream, name, length);
11260 ASM_OUTPUT_ALIGN (stream, 2);
11261 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11262 assemble_aligned_integer (UNITS_PER_WORD, x);
11265 /* Place some comments into the assembler stream
11266 describing the current function. */
11268 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11270 unsigned long func_type;
11274 thumb1_output_function_prologue (f, frame_size);
11278 /* Sanity check. */
11279 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11281 func_type = arm_current_func_type ();
11283 switch ((int) ARM_FUNC_TYPE (func_type))
11286 case ARM_FT_NORMAL:
11288 case ARM_FT_INTERWORKED:
11289 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11292 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11295 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11297 case ARM_FT_EXCEPTION:
11298 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11302 if (IS_NAKED (func_type))
11303 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11305 if (IS_VOLATILE (func_type))
11306 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11308 if (IS_NESTED (func_type))
11309 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11310 if (IS_STACKALIGN (func_type))
11311 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11313 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11315 crtl->args.pretend_args_size, frame_size);
11317 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11318 frame_pointer_needed,
11319 cfun->machine->uses_anonymous_args);
11321 if (cfun->machine->lr_save_eliminated)
11322 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11324 if (crtl->calls_eh_return)
11325 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11327 return_used_this_function = 0;
11331 arm_output_epilogue (rtx sibling)
11334 unsigned long saved_regs_mask;
11335 unsigned long func_type;
11336 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11337 frame that is $fp + 4 for a non-variadic function. */
11338 int floats_offset = 0;
11340 FILE * f = asm_out_file;
11341 unsigned int lrm_count = 0;
11342 int really_return = (sibling == NULL);
11344 arm_stack_offsets *offsets;
11346 /* If we have already generated the return instruction
11347 then it is futile to generate anything else. */
11348 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11351 func_type = arm_current_func_type ();
11353 if (IS_NAKED (func_type))
11354 /* Naked functions don't have epilogues. */
11357 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11361 /* A volatile function should never return. Call abort. */
11362 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11363 assemble_external_libcall (op);
11364 output_asm_insn ("bl\t%a0", &op);
11369 /* If we are throwing an exception, then we really must be doing a
11370 return, so we can't tail-call. */
11371 gcc_assert (!crtl->calls_eh_return || really_return);
11373 offsets = arm_get_frame_offsets ();
11374 saved_regs_mask = offsets->saved_regs_mask;
11377 lrm_count = bit_count (saved_regs_mask);
11379 floats_offset = offsets->saved_args;
11380 /* Compute how far away the floats will be. */
11381 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11382 if (saved_regs_mask & (1 << reg))
11383 floats_offset += 4;
11385 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11387 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11388 int vfp_offset = offsets->frame;
11390 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11392 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11393 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11395 floats_offset += 12;
11396 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11397 reg, FP_REGNUM, floats_offset - vfp_offset);
11402 start_reg = LAST_FPA_REGNUM;
11404 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11406 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11408 floats_offset += 12;
11410 /* We can't unstack more than four registers at once. */
11411 if (start_reg - reg == 3)
11413 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11414 reg, FP_REGNUM, floats_offset - vfp_offset);
11415 start_reg = reg - 1;
11420 if (reg != start_reg)
11421 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11422 reg + 1, start_reg - reg,
11423 FP_REGNUM, floats_offset - vfp_offset);
11424 start_reg = reg - 1;
11428 /* Just in case the last register checked also needs unstacking. */
11429 if (reg != start_reg)
11430 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11431 reg + 1, start_reg - reg,
11432 FP_REGNUM, floats_offset - vfp_offset);
11435 if (TARGET_HARD_FLOAT && TARGET_VFP)
11439 /* The fldmd insns do not have base+offset addressing
11440 modes, so we use IP to hold the address. */
11441 saved_size = arm_get_vfp_saved_size ();
11443 if (saved_size > 0)
11445 floats_offset += saved_size;
11446 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11447 FP_REGNUM, floats_offset - vfp_offset);
11449 start_reg = FIRST_VFP_REGNUM;
11450 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11452 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11453 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11455 if (start_reg != reg)
11456 vfp_output_fldmd (f, IP_REGNUM,
11457 (start_reg - FIRST_VFP_REGNUM) / 2,
11458 (reg - start_reg) / 2);
11459 start_reg = reg + 2;
11462 if (start_reg != reg)
11463 vfp_output_fldmd (f, IP_REGNUM,
11464 (start_reg - FIRST_VFP_REGNUM) / 2,
11465 (reg - start_reg) / 2);
11470 /* The frame pointer is guaranteed to be non-double-word aligned.
11471 This is because it is set to (old_stack_pointer - 4) and the
11472 old_stack_pointer was double word aligned. Thus the offset to
11473 the iWMMXt registers to be loaded must also be non-double-word
11474 sized, so that the resultant address *is* double-word aligned.
11475 We can ignore floats_offset since that was already included in
11476 the live_regs_mask. */
11477 lrm_count += (lrm_count % 2 ? 2 : 1);
11479 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11480 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11482 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11483 reg, FP_REGNUM, lrm_count * 4);
11488 /* saved_regs_mask should contain the IP, which at the time of stack
11489 frame generation actually contains the old stack pointer. So a
11490 quick way to unwind the stack is just pop the IP register directly
11491 into the stack pointer. */
11492 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11493 saved_regs_mask &= ~ (1 << IP_REGNUM);
11494 saved_regs_mask |= (1 << SP_REGNUM);
11496 /* There are two registers left in saved_regs_mask - LR and PC. We
11497 only need to restore the LR register (the return address), but to
11498 save time we can load it directly into the PC, unless we need a
11499 special function exit sequence, or we are not really returning. */
11501 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11502 && !crtl->calls_eh_return)
11503 /* Delete the LR from the register mask, so that the LR on
11504 the stack is loaded into the PC in the register mask. */
11505 saved_regs_mask &= ~ (1 << LR_REGNUM);
11507 saved_regs_mask &= ~ (1 << PC_REGNUM);
11509 /* We must use SP as the base register, because SP is one of the
11510 registers being restored. If an interrupt or page fault
11511 happens in the ldm instruction, the SP might or might not
11512 have been restored. That would be bad, as then SP will no
11513 longer indicate the safe area of stack, and we can get stack
11514 corruption. Using SP as the base register means that it will
11515 be reset correctly to the original value, should an interrupt
11516 occur. If the stack pointer already points at the right
11517 place, then omit the subtraction. */
11518 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11519 || cfun->calls_alloca)
11520 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11521 4 * bit_count (saved_regs_mask));
11522 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11524 if (IS_INTERRUPT (func_type))
11525 /* Interrupt handlers will have pushed the
11526 IP onto the stack, so restore it now. */
11527 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11531 /* This branch is executed for ARM mode (non-apcs frames) and
11532 Thumb-2 mode. Frame layout is essentially the same for those
11533 cases, except that in ARM mode frame pointer points to the
11534 first saved register, while in Thumb-2 mode the frame pointer points
11535 to the last saved register.
11537 It is possible to make frame pointer point to last saved
11538 register in both cases, and remove some conditionals below.
11539 That means that fp setup in prologue would be just "mov fp, sp"
11540 and sp restore in epilogue would be just "mov sp, fp", whereas
11541 now we have to use add/sub in those cases. However, the value
11542 of that would be marginal, as both mov and add/sub are 32-bit
11543 in ARM mode, and it would require extra conditionals
11544 in arm_expand_prologue to distingish ARM-apcs-frame case
11545 (where frame pointer is required to point at first register)
11546 and ARM-non-apcs-frame. Therefore, such change is postponed
11547 until real need arise. */
11548 HOST_WIDE_INT amount;
11550 /* Restore stack pointer if necessary. */
11551 if (TARGET_ARM && frame_pointer_needed)
11553 operands[0] = stack_pointer_rtx;
11554 operands[1] = hard_frame_pointer_rtx;
11556 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11557 output_add_immediate (operands);
11561 if (frame_pointer_needed)
11563 /* For Thumb-2 restore sp from the frame pointer.
11564 Operand restrictions mean we have to incrememnt FP, then copy
11566 amount = offsets->locals_base - offsets->saved_regs;
11567 operands[0] = hard_frame_pointer_rtx;
11571 unsigned long count;
11572 operands[0] = stack_pointer_rtx;
11573 amount = offsets->outgoing_args - offsets->saved_regs;
11574 /* pop call clobbered registers if it avoids a
11575 separate stack adjustment. */
11576 count = offsets->saved_regs - offsets->saved_args;
11579 && !crtl->calls_eh_return
11580 && bit_count(saved_regs_mask) * 4 == count
11581 && !IS_INTERRUPT (func_type)
11582 && !crtl->tail_call_emit)
11584 unsigned long mask;
11585 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11587 mask &= ~saved_regs_mask;
11589 while (bit_count (mask) * 4 > amount)
11591 while ((mask & (1 << reg)) == 0)
11593 mask &= ~(1 << reg);
11595 if (bit_count (mask) * 4 == amount) {
11597 saved_regs_mask |= mask;
11604 operands[1] = operands[0];
11605 operands[2] = GEN_INT (amount);
11606 output_add_immediate (operands);
11608 if (frame_pointer_needed)
11609 asm_fprintf (f, "\tmov\t%r, %r\n",
11610 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11613 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11615 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11616 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11617 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11622 start_reg = FIRST_FPA_REGNUM;
11624 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11626 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11628 if (reg - start_reg == 3)
11630 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11631 start_reg, SP_REGNUM);
11632 start_reg = reg + 1;
11637 if (reg != start_reg)
11638 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11639 start_reg, reg - start_reg,
11642 start_reg = reg + 1;
11646 /* Just in case the last register checked also needs unstacking. */
11647 if (reg != start_reg)
11648 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11649 start_reg, reg - start_reg, SP_REGNUM);
11652 if (TARGET_HARD_FLOAT && TARGET_VFP)
11654 start_reg = FIRST_VFP_REGNUM;
11655 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11657 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11658 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11660 if (start_reg != reg)
11661 vfp_output_fldmd (f, SP_REGNUM,
11662 (start_reg - FIRST_VFP_REGNUM) / 2,
11663 (reg - start_reg) / 2);
11664 start_reg = reg + 2;
11667 if (start_reg != reg)
11668 vfp_output_fldmd (f, SP_REGNUM,
11669 (start_reg - FIRST_VFP_REGNUM) / 2,
11670 (reg - start_reg) / 2);
11673 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11674 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11675 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11677 /* If we can, restore the LR into the PC. */
11678 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11679 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11680 && !IS_STACKALIGN (func_type)
11682 && crtl->args.pretend_args_size == 0
11683 && saved_regs_mask & (1 << LR_REGNUM)
11684 && !crtl->calls_eh_return)
11686 saved_regs_mask &= ~ (1 << LR_REGNUM);
11687 saved_regs_mask |= (1 << PC_REGNUM);
11688 rfe = IS_INTERRUPT (func_type);
11693 /* Load the registers off the stack. If we only have one register
11694 to load use the LDR instruction - it is faster. For Thumb-2
11695 always use pop and the assembler will pick the best instruction.*/
11696 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11697 && !IS_INTERRUPT(func_type))
11699 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11701 else if (saved_regs_mask)
11703 if (saved_regs_mask & (1 << SP_REGNUM))
11704 /* Note - write back to the stack register is not enabled
11705 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11706 in the list of registers and if we add writeback the
11707 instruction becomes UNPREDICTABLE. */
11708 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11710 else if (TARGET_ARM)
11711 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11714 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11717 if (crtl->args.pretend_args_size)
11719 /* Unwind the pre-pushed regs. */
11720 operands[0] = operands[1] = stack_pointer_rtx;
11721 operands[2] = GEN_INT (crtl->args.pretend_args_size);
11722 output_add_immediate (operands);
11726 /* We may have already restored PC directly from the stack. */
11727 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11730 /* Stack adjustment for exception handler. */
11731 if (crtl->calls_eh_return)
11732 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11733 ARM_EH_STACKADJ_REGNUM);
11735 /* Generate the return instruction. */
11736 switch ((int) ARM_FUNC_TYPE (func_type))
11740 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11743 case ARM_FT_EXCEPTION:
11744 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11747 case ARM_FT_INTERWORKED:
11748 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11752 if (IS_STACKALIGN (func_type))
11754 /* See comment in arm_expand_prologue. */
11755 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11757 if (arm_arch5 || arm_arch4t)
11758 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11760 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11768 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11769 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11771 arm_stack_offsets *offsets;
11777 /* Emit any call-via-reg trampolines that are needed for v4t support
11778 of call_reg and call_value_reg type insns. */
11779 for (regno = 0; regno < LR_REGNUM; regno++)
11781 rtx label = cfun->machine->call_via[regno];
11785 switch_to_section (function_section (current_function_decl));
11786 targetm.asm_out.internal_label (asm_out_file, "L",
11787 CODE_LABEL_NUMBER (label));
11788 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11792 /* ??? Probably not safe to set this here, since it assumes that a
11793 function will be emitted as assembly immediately after we generate
11794 RTL for it. This does not happen for inline functions. */
11795 return_used_this_function = 0;
11797 else /* TARGET_32BIT */
11799 /* We need to take into account any stack-frame rounding. */
11800 offsets = arm_get_frame_offsets ();
11802 gcc_assert (!use_return_insn (FALSE, NULL)
11803 || !return_used_this_function
11804 || offsets->saved_regs == offsets->outgoing_args
11805 || frame_pointer_needed);
11807 /* Reset the ARM-specific per-function variables. */
11808 after_arm_reorg = 0;
11812 /* Generate and emit an insn that we will recognize as a push_multi.
11813 Unfortunately, since this insn does not reflect very well the actual
11814 semantics of the operation, we need to annotate the insn for the benefit
11815 of DWARF2 frame unwind information. */
11817 emit_multi_reg_push (unsigned long mask)
11820 int num_dwarf_regs;
11824 int dwarf_par_index;
11827 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11828 if (mask & (1 << i))
11831 gcc_assert (num_regs && num_regs <= 16);
11833 /* We don't record the PC in the dwarf frame information. */
11834 num_dwarf_regs = num_regs;
11835 if (mask & (1 << PC_REGNUM))
11838 /* For the body of the insn we are going to generate an UNSPEC in
11839 parallel with several USEs. This allows the insn to be recognized
11840 by the push_multi pattern in the arm.md file. The insn looks
11841 something like this:
11844 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11845 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11846 (use (reg:SI 11 fp))
11847 (use (reg:SI 12 ip))
11848 (use (reg:SI 14 lr))
11849 (use (reg:SI 15 pc))
11852 For the frame note however, we try to be more explicit and actually
11853 show each register being stored into the stack frame, plus a (single)
11854 decrement of the stack pointer. We do it this way in order to be
11855 friendly to the stack unwinding code, which only wants to see a single
11856 stack decrement per instruction. The RTL we generate for the note looks
11857 something like this:
11860 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11861 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11862 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11863 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11864 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11867 This sequence is used both by the code to support stack unwinding for
11868 exceptions handlers and the code to generate dwarf2 frame debugging. */
11870 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11871 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11872 dwarf_par_index = 1;
11874 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11876 if (mask & (1 << i))
11878 reg = gen_rtx_REG (SImode, i);
11880 XVECEXP (par, 0, 0)
11881 = gen_rtx_SET (VOIDmode,
11882 gen_frame_mem (BLKmode,
11883 gen_rtx_PRE_DEC (BLKmode,
11884 stack_pointer_rtx)),
11885 gen_rtx_UNSPEC (BLKmode,
11886 gen_rtvec (1, reg),
11887 UNSPEC_PUSH_MULT));
11889 if (i != PC_REGNUM)
11891 tmp = gen_rtx_SET (VOIDmode,
11892 gen_frame_mem (SImode, stack_pointer_rtx),
11894 RTX_FRAME_RELATED_P (tmp) = 1;
11895 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11903 for (j = 1, i++; j < num_regs; i++)
11905 if (mask & (1 << i))
11907 reg = gen_rtx_REG (SImode, i);
11909 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11911 if (i != PC_REGNUM)
11914 = gen_rtx_SET (VOIDmode,
11915 gen_frame_mem (SImode,
11916 plus_constant (stack_pointer_rtx,
11919 RTX_FRAME_RELATED_P (tmp) = 1;
11920 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11927 par = emit_insn (par);
11929 tmp = gen_rtx_SET (VOIDmode,
11931 plus_constant (stack_pointer_rtx, -4 * num_regs));
11932 RTX_FRAME_RELATED_P (tmp) = 1;
11933 XVECEXP (dwarf, 0, 0) = tmp;
11935 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11940 /* Calculate the size of the return value that is passed in registers. */
11942 arm_size_return_regs (void)
11944 enum machine_mode mode;
11946 if (crtl->return_rtx != 0)
11947 mode = GET_MODE (crtl->return_rtx);
11949 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11951 return GET_MODE_SIZE (mode);
11955 emit_sfm (int base_reg, int count)
11962 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11963 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11965 reg = gen_rtx_REG (XFmode, base_reg++);
11967 XVECEXP (par, 0, 0)
11968 = gen_rtx_SET (VOIDmode,
11969 gen_frame_mem (BLKmode,
11970 gen_rtx_PRE_DEC (BLKmode,
11971 stack_pointer_rtx)),
11972 gen_rtx_UNSPEC (BLKmode,
11973 gen_rtvec (1, reg),
11974 UNSPEC_PUSH_MULT));
11975 tmp = gen_rtx_SET (VOIDmode,
11976 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11977 RTX_FRAME_RELATED_P (tmp) = 1;
11978 XVECEXP (dwarf, 0, 1) = tmp;
11980 for (i = 1; i < count; i++)
11982 reg = gen_rtx_REG (XFmode, base_reg++);
11983 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11985 tmp = gen_rtx_SET (VOIDmode,
11986 gen_frame_mem (XFmode,
11987 plus_constant (stack_pointer_rtx,
11990 RTX_FRAME_RELATED_P (tmp) = 1;
11991 XVECEXP (dwarf, 0, i + 1) = tmp;
11994 tmp = gen_rtx_SET (VOIDmode,
11996 plus_constant (stack_pointer_rtx, -12 * count));
11998 RTX_FRAME_RELATED_P (tmp) = 1;
11999 XVECEXP (dwarf, 0, 0) = tmp;
12001 par = emit_insn (par);
12002 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12008 /* Return true if the current function needs to save/restore LR. */
12011 thumb_force_lr_save (void)
12013 return !cfun->machine->lr_save_eliminated
12014 && (!leaf_function_p ()
12015 || thumb_far_jump_used_p ()
12016 || df_regs_ever_live_p (LR_REGNUM));
12020 /* Compute the distance from register FROM to register TO.
12021 These can be the arg pointer (26), the soft frame pointer (25),
12022 the stack pointer (13) or the hard frame pointer (11).
12023 In thumb mode r7 is used as the soft frame pointer, if needed.
12024 Typical stack layout looks like this:
12026 old stack pointer -> | |
12029 | | saved arguments for
12030 | | vararg functions
12033 hard FP & arg pointer -> | | \
12041 soft frame pointer -> | | /
12046 locals base pointer -> | | /
12051 current stack pointer -> | | /
12054 For a given function some or all of these stack components
12055 may not be needed, giving rise to the possibility of
12056 eliminating some of the registers.
12058 The values returned by this function must reflect the behavior
12059 of arm_expand_prologue() and arm_compute_save_reg_mask().
12061 The sign of the number returned reflects the direction of stack
12062 growth, so the values are positive for all eliminations except
12063 from the soft frame pointer to the hard frame pointer.
12065 SFP may point just inside the local variables block to ensure correct
12069 /* Calculate stack offsets. These are used to calculate register elimination
12070 offsets and in prologue/epilogue code. Also calculates which registers
12071 should be saved. */
12073 static arm_stack_offsets *
12074 arm_get_frame_offsets (void)
12076 struct arm_stack_offsets *offsets;
12077 unsigned long func_type;
12081 HOST_WIDE_INT frame_size;
12084 offsets = &cfun->machine->stack_offsets;
12086 /* We need to know if we are a leaf function. Unfortunately, it
12087 is possible to be called after start_sequence has been called,
12088 which causes get_insns to return the insns for the sequence,
12089 not the function, which will cause leaf_function_p to return
12090 the incorrect result.
12092 to know about leaf functions once reload has completed, and the
12093 frame size cannot be changed after that time, so we can safely
12094 use the cached value. */
12096 if (reload_completed)
12099 /* Initially this is the size of the local variables. It will translated
12100 into an offset once we have determined the size of preceding data. */
12101 frame_size = ROUND_UP_WORD (get_frame_size ());
12103 leaf = leaf_function_p ();
12105 /* Space for variadic functions. */
12106 offsets->saved_args = crtl->args.pretend_args_size;
12108 /* In Thumb mode this is incorrect, but never used. */
12109 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12110 arm_compute_static_chain_stack_bytes();
12114 unsigned int regno;
12116 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12117 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12118 saved = core_saved;
12120 /* We know that SP will be doubleword aligned on entry, and we must
12121 preserve that condition at any subroutine call. We also require the
12122 soft frame pointer to be doubleword aligned. */
12124 if (TARGET_REALLY_IWMMXT)
12126 /* Check for the call-saved iWMMXt registers. */
12127 for (regno = FIRST_IWMMXT_REGNUM;
12128 regno <= LAST_IWMMXT_REGNUM;
12130 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12134 func_type = arm_current_func_type ();
12135 if (! IS_VOLATILE (func_type))
12137 /* Space for saved FPA registers. */
12138 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12139 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12142 /* Space for saved VFP registers. */
12143 if (TARGET_HARD_FLOAT && TARGET_VFP)
12144 saved += arm_get_vfp_saved_size ();
12147 else /* TARGET_THUMB1 */
12149 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12150 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12151 saved = core_saved;
12152 if (TARGET_BACKTRACE)
12156 /* Saved registers include the stack frame. */
12157 offsets->saved_regs = offsets->saved_args + saved +
12158 arm_compute_static_chain_stack_bytes();
12159 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12160 /* A leaf function does not need any stack alignment if it has nothing
12162 if (leaf && frame_size == 0)
12164 offsets->outgoing_args = offsets->soft_frame;
12165 offsets->locals_base = offsets->soft_frame;
12169 /* Ensure SFP has the correct alignment. */
12170 if (ARM_DOUBLEWORD_ALIGN
12171 && (offsets->soft_frame & 7))
12173 offsets->soft_frame += 4;
12174 /* Try to align stack by pushing an extra reg. Don't bother doing this
12175 when there is a stack frame as the alignment will be rolled into
12176 the normal stack adjustment. */
12177 if (frame_size + crtl->outgoing_args_size == 0)
12181 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12183 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12190 if (reg == -1 && arm_size_return_regs () <= 12
12191 && !crtl->tail_call_emit)
12193 /* Push/pop an argument register (r3) if all callee saved
12194 registers are already being pushed. */
12200 offsets->saved_regs += 4;
12201 offsets->saved_regs_mask |= (1 << reg);
12206 offsets->locals_base = offsets->soft_frame + frame_size;
12207 offsets->outgoing_args = (offsets->locals_base
12208 + crtl->outgoing_args_size);
12210 if (ARM_DOUBLEWORD_ALIGN)
12212 /* Ensure SP remains doubleword aligned. */
12213 if (offsets->outgoing_args & 7)
12214 offsets->outgoing_args += 4;
12215 gcc_assert (!(offsets->outgoing_args & 7));
12222 /* Calculate the relative offsets for the different stack pointers. Positive
12223 offsets are in the direction of stack growth. */
12226 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12228 arm_stack_offsets *offsets;
12230 offsets = arm_get_frame_offsets ();
12232 /* OK, now we have enough information to compute the distances.
12233 There must be an entry in these switch tables for each pair
12234 of registers in ELIMINABLE_REGS, even if some of the entries
12235 seem to be redundant or useless. */
12238 case ARG_POINTER_REGNUM:
12241 case THUMB_HARD_FRAME_POINTER_REGNUM:
12244 case FRAME_POINTER_REGNUM:
12245 /* This is the reverse of the soft frame pointer
12246 to hard frame pointer elimination below. */
12247 return offsets->soft_frame - offsets->saved_args;
12249 case ARM_HARD_FRAME_POINTER_REGNUM:
12250 /* This is only non-zero in the case where the static chain register
12251 is stored above the frame. */
12252 return offsets->frame - offsets->saved_args - 4;
12254 case STACK_POINTER_REGNUM:
12255 /* If nothing has been pushed on the stack at all
12256 then this will return -4. This *is* correct! */
12257 return offsets->outgoing_args - (offsets->saved_args + 4);
12260 gcc_unreachable ();
12262 gcc_unreachable ();
12264 case FRAME_POINTER_REGNUM:
12267 case THUMB_HARD_FRAME_POINTER_REGNUM:
12270 case ARM_HARD_FRAME_POINTER_REGNUM:
12271 /* The hard frame pointer points to the top entry in the
12272 stack frame. The soft frame pointer to the bottom entry
12273 in the stack frame. If there is no stack frame at all,
12274 then they are identical. */
12276 return offsets->frame - offsets->soft_frame;
12278 case STACK_POINTER_REGNUM:
12279 return offsets->outgoing_args - offsets->soft_frame;
12282 gcc_unreachable ();
12284 gcc_unreachable ();
12287 /* You cannot eliminate from the stack pointer.
12288 In theory you could eliminate from the hard frame
12289 pointer to the stack pointer, but this will never
12290 happen, since if a stack frame is not needed the
12291 hard frame pointer will never be used. */
12292 gcc_unreachable ();
12297 /* Emit RTL to save coprocessor registers on function entry. Returns the
12298 number of bytes pushed. */
12301 arm_save_coproc_regs(void)
12303 int saved_size = 0;
12305 unsigned start_reg;
12308 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12309 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12311 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12312 insn = gen_rtx_MEM (V2SImode, insn);
12313 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12314 RTX_FRAME_RELATED_P (insn) = 1;
12318 /* Save any floating point call-saved registers used by this
12320 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12322 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12323 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12325 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12326 insn = gen_rtx_MEM (XFmode, insn);
12327 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12328 RTX_FRAME_RELATED_P (insn) = 1;
12334 start_reg = LAST_FPA_REGNUM;
12336 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12338 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12340 if (start_reg - reg == 3)
12342 insn = emit_sfm (reg, 4);
12343 RTX_FRAME_RELATED_P (insn) = 1;
12345 start_reg = reg - 1;
12350 if (start_reg != reg)
12352 insn = emit_sfm (reg + 1, start_reg - reg);
12353 RTX_FRAME_RELATED_P (insn) = 1;
12354 saved_size += (start_reg - reg) * 12;
12356 start_reg = reg - 1;
12360 if (start_reg != reg)
12362 insn = emit_sfm (reg + 1, start_reg - reg);
12363 saved_size += (start_reg - reg) * 12;
12364 RTX_FRAME_RELATED_P (insn) = 1;
12367 if (TARGET_HARD_FLOAT && TARGET_VFP)
12369 start_reg = FIRST_VFP_REGNUM;
12371 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12373 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12374 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12376 if (start_reg != reg)
12377 saved_size += vfp_emit_fstmd (start_reg,
12378 (reg - start_reg) / 2);
12379 start_reg = reg + 2;
12382 if (start_reg != reg)
12383 saved_size += vfp_emit_fstmd (start_reg,
12384 (reg - start_reg) / 2);
12390 /* Set the Thumb frame pointer from the stack pointer. */
12393 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12395 HOST_WIDE_INT amount;
12398 amount = offsets->outgoing_args - offsets->locals_base;
12400 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12401 stack_pointer_rtx, GEN_INT (amount)));
12404 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12405 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12406 expects the first two operands to be the same. */
12409 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12411 hard_frame_pointer_rtx));
12415 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12416 hard_frame_pointer_rtx,
12417 stack_pointer_rtx));
12419 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12420 plus_constant (stack_pointer_rtx, amount));
12421 RTX_FRAME_RELATED_P (dwarf) = 1;
12422 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12426 RTX_FRAME_RELATED_P (insn) = 1;
12429 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12432 arm_expand_prologue (void)
12437 unsigned long live_regs_mask;
12438 unsigned long func_type;
12440 int saved_pretend_args = 0;
12441 int saved_regs = 0;
12442 unsigned HOST_WIDE_INT args_to_push;
12443 arm_stack_offsets *offsets;
12445 func_type = arm_current_func_type ();
12447 /* Naked functions don't have prologues. */
12448 if (IS_NAKED (func_type))
12451 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12452 args_to_push = crtl->args.pretend_args_size;
12454 /* Compute which register we will have to save onto the stack. */
12455 offsets = arm_get_frame_offsets ();
12456 live_regs_mask = offsets->saved_regs_mask;
12458 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12460 if (IS_STACKALIGN (func_type))
12465 /* Handle a word-aligned stack pointer. We generate the following:
12470 <save and restore r0 in normal prologue/epilogue>
12474 The unwinder doesn't need to know about the stack realignment.
12475 Just tell it we saved SP in r0. */
12476 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12478 r0 = gen_rtx_REG (SImode, 0);
12479 r1 = gen_rtx_REG (SImode, 1);
12480 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
12481 compiler won't choke. */
12482 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
12483 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12484 insn = gen_movsi (r0, stack_pointer_rtx);
12485 RTX_FRAME_RELATED_P (insn) = 1;
12486 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12487 dwarf, REG_NOTES (insn));
12489 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12490 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12493 /* For APCS frames, if IP register is clobbered
12494 when creating frame, save that register in a special
12496 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12498 if (IS_INTERRUPT (func_type))
12500 /* Interrupt functions must not corrupt any registers.
12501 Creating a frame pointer however, corrupts the IP
12502 register, so we must push it first. */
12503 insn = emit_multi_reg_push (1 << IP_REGNUM);
12505 /* Do not set RTX_FRAME_RELATED_P on this insn.
12506 The dwarf stack unwinding code only wants to see one
12507 stack decrement per function, and this is not it. If
12508 this instruction is labeled as being part of the frame
12509 creation sequence then dwarf2out_frame_debug_expr will
12510 die when it encounters the assignment of IP to FP
12511 later on, since the use of SP here establishes SP as
12512 the CFA register and not IP.
12514 Anyway this instruction is not really part of the stack
12515 frame creation although it is part of the prologue. */
12517 else if (IS_NESTED (func_type))
12519 /* The Static chain register is the same as the IP register
12520 used as a scratch register during stack frame creation.
12521 To get around this need to find somewhere to store IP
12522 whilst the frame is being created. We try the following
12525 1. The last argument register.
12526 2. A slot on the stack above the frame. (This only
12527 works if the function is not a varargs function).
12528 3. Register r3, after pushing the argument registers
12531 Note - we only need to tell the dwarf2 backend about the SP
12532 adjustment in the second variant; the static chain register
12533 doesn't need to be unwound, as it doesn't contain a value
12534 inherited from the caller. */
12536 if (df_regs_ever_live_p (3) == false)
12537 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12538 else if (args_to_push == 0)
12540 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
12545 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12546 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12549 /* Just tell the dwarf backend that we adjusted SP. */
12550 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12551 plus_constant (stack_pointer_rtx,
12553 RTX_FRAME_RELATED_P (insn) = 1;
12554 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12555 dwarf, REG_NOTES (insn));
12559 /* Store the args on the stack. */
12560 if (cfun->machine->uses_anonymous_args)
12561 insn = emit_multi_reg_push
12562 ((0xf0 >> (args_to_push / 4)) & 0xf);
12565 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12566 GEN_INT (- args_to_push)));
12568 RTX_FRAME_RELATED_P (insn) = 1;
12570 saved_pretend_args = 1;
12571 fp_offset = args_to_push;
12574 /* Now reuse r3 to preserve IP. */
12575 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12579 insn = emit_set_insn (ip_rtx,
12580 plus_constant (stack_pointer_rtx, fp_offset));
12581 RTX_FRAME_RELATED_P (insn) = 1;
12586 /* Push the argument registers, or reserve space for them. */
12587 if (cfun->machine->uses_anonymous_args)
12588 insn = emit_multi_reg_push
12589 ((0xf0 >> (args_to_push / 4)) & 0xf);
12592 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12593 GEN_INT (- args_to_push)));
12594 RTX_FRAME_RELATED_P (insn) = 1;
12597 /* If this is an interrupt service routine, and the link register
12598 is going to be pushed, and we're not generating extra
12599 push of IP (needed when frame is needed and frame layout if apcs),
12600 subtracting four from LR now will mean that the function return
12601 can be done with a single instruction. */
12602 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12603 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12604 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12607 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12609 emit_set_insn (lr, plus_constant (lr, -4));
12612 if (live_regs_mask)
12614 saved_regs += bit_count (live_regs_mask) * 4;
12615 if (optimize_size && !frame_pointer_needed
12616 && saved_regs == offsets->saved_regs - offsets->saved_args)
12618 /* If no coprocessor registers are being pushed and we don't have
12619 to worry about a frame pointer then push extra registers to
12620 create the stack frame. This is done is a way that does not
12621 alter the frame layout, so is independent of the epilogue. */
12625 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12627 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12628 if (frame && n * 4 >= frame)
12631 live_regs_mask |= (1 << n) - 1;
12632 saved_regs += frame;
12635 insn = emit_multi_reg_push (live_regs_mask);
12636 RTX_FRAME_RELATED_P (insn) = 1;
12639 if (! IS_VOLATILE (func_type))
12640 saved_regs += arm_save_coproc_regs ();
12642 if (frame_pointer_needed && TARGET_ARM)
12644 /* Create the new frame pointer. */
12645 if (TARGET_APCS_FRAME)
12647 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12648 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12649 RTX_FRAME_RELATED_P (insn) = 1;
12651 if (IS_NESTED (func_type))
12653 /* Recover the static chain register. */
12654 if (!df_regs_ever_live_p (3)
12655 || saved_pretend_args)
12656 insn = gen_rtx_REG (SImode, 3);
12657 else /* if (crtl->args.pretend_args_size == 0) */
12659 insn = plus_constant (hard_frame_pointer_rtx, 4);
12660 insn = gen_frame_mem (SImode, insn);
12662 emit_set_insn (ip_rtx, insn);
12663 /* Add a USE to stop propagate_one_insn() from barfing. */
12664 emit_insn (gen_prologue_use (ip_rtx));
12669 insn = GEN_INT (saved_regs - 4);
12670 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12671 stack_pointer_rtx, insn));
12672 RTX_FRAME_RELATED_P (insn) = 1;
12676 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12678 /* This add can produce multiple insns for a large constant, so we
12679 need to get tricky. */
12680 rtx last = get_last_insn ();
12682 amount = GEN_INT (offsets->saved_args + saved_regs
12683 - offsets->outgoing_args);
12685 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12689 last = last ? NEXT_INSN (last) : get_insns ();
12690 RTX_FRAME_RELATED_P (last) = 1;
12692 while (last != insn);
12694 /* If the frame pointer is needed, emit a special barrier that
12695 will prevent the scheduler from moving stores to the frame
12696 before the stack adjustment. */
12697 if (frame_pointer_needed)
12698 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12699 hard_frame_pointer_rtx));
12703 if (frame_pointer_needed && TARGET_THUMB2)
12704 thumb_set_frame_pointer (offsets);
12706 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12708 unsigned long mask;
12710 mask = live_regs_mask;
12711 mask &= THUMB2_WORK_REGS;
12712 if (!IS_NESTED (func_type))
12713 mask |= (1 << IP_REGNUM);
12714 arm_load_pic_register (mask);
12717 /* If we are profiling, make sure no instructions are scheduled before
12718 the call to mcount. Similarly if the user has requested no
12719 scheduling in the prolog. Similarly if we want non-call exceptions
12720 using the EABI unwinder, to prevent faulting instructions from being
12721 swapped with a stack adjustment. */
12722 if (crtl->profile || !TARGET_SCHED_PROLOG
12723 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12724 emit_insn (gen_blockage ());
12726 /* If the link register is being kept alive, with the return address in it,
12727 then make sure that it does not get reused by the ce2 pass. */
12728 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12729 cfun->machine->lr_save_eliminated = 1;
12732 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12734 arm_print_condition (FILE *stream)
12736 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12738 /* Branch conversion is not implemented for Thumb-2. */
12741 output_operand_lossage ("predicated Thumb instruction");
12744 if (current_insn_predicate != NULL)
12746 output_operand_lossage
12747 ("predicated instruction in conditional sequence");
12751 fputs (arm_condition_codes[arm_current_cc], stream);
12753 else if (current_insn_predicate)
12755 enum arm_cond_code code;
12759 output_operand_lossage ("predicated Thumb instruction");
12763 code = get_arm_condition_code (current_insn_predicate);
12764 fputs (arm_condition_codes[code], stream);
12769 /* If CODE is 'd', then the X is a condition operand and the instruction
12770 should only be executed if the condition is true.
12771 if CODE is 'D', then the X is a condition operand and the instruction
12772 should only be executed if the condition is false: however, if the mode
12773 of the comparison is CCFPEmode, then always execute the instruction -- we
12774 do this because in these circumstances !GE does not necessarily imply LT;
12775 in these cases the instruction pattern will take care to make sure that
12776 an instruction containing %d will follow, thereby undoing the effects of
12777 doing this instruction unconditionally.
12778 If CODE is 'N' then X is a floating point operand that must be negated
12780 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12781 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12783 arm_print_operand (FILE *stream, rtx x, int code)
12788 fputs (ASM_COMMENT_START, stream);
12792 fputs (user_label_prefix, stream);
12796 fputs (REGISTER_PREFIX, stream);
12800 arm_print_condition (stream);
12804 /* Nothing in unified syntax, otherwise the current condition code. */
12805 if (!TARGET_UNIFIED_ASM)
12806 arm_print_condition (stream);
12810 /* The current condition code in unified syntax, otherwise nothing. */
12811 if (TARGET_UNIFIED_ASM)
12812 arm_print_condition (stream);
12816 /* The current condition code for a condition code setting instruction.
12817 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12818 if (TARGET_UNIFIED_ASM)
12820 fputc('s', stream);
12821 arm_print_condition (stream);
12825 arm_print_condition (stream);
12826 fputc('s', stream);
12831 /* If the instruction is conditionally executed then print
12832 the current condition code, otherwise print 's'. */
12833 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12834 if (current_insn_predicate)
12835 arm_print_condition (stream);
12837 fputc('s', stream);
12840 /* %# is a "break" sequence. It doesn't output anything, but is used to
12841 separate e.g. operand numbers from following text, if that text consists
12842 of further digits which we don't want to be part of the operand
12850 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12851 r = REAL_VALUE_NEGATE (r);
12852 fprintf (stream, "%s", fp_const_from_val (&r));
12856 /* An integer without a preceding # sign. */
12858 gcc_assert (GET_CODE (x) == CONST_INT);
12859 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12863 if (GET_CODE (x) == CONST_INT)
12866 val = ARM_SIGN_EXTEND (~INTVAL (x));
12867 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12871 putc ('~', stream);
12872 output_addr_const (stream, x);
12877 /* The low 16 bits of an immediate constant. */
12878 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12882 fprintf (stream, "%s", arithmetic_instr (x, 1));
12885 /* Truncate Cirrus shift counts. */
12887 if (GET_CODE (x) == CONST_INT)
12889 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12892 arm_print_operand (stream, x, 0);
12896 fprintf (stream, "%s", arithmetic_instr (x, 0));
12904 if (!shift_operator (x, SImode))
12906 output_operand_lossage ("invalid shift operand");
12910 shift = shift_op (x, &val);
12914 fprintf (stream, ", %s ", shift);
12916 arm_print_operand (stream, XEXP (x, 1), 0);
12918 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12923 /* An explanation of the 'Q', 'R' and 'H' register operands:
12925 In a pair of registers containing a DI or DF value the 'Q'
12926 operand returns the register number of the register containing
12927 the least significant part of the value. The 'R' operand returns
12928 the register number of the register containing the most
12929 significant part of the value.
12931 The 'H' operand returns the higher of the two register numbers.
12932 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12933 same as the 'Q' operand, since the most significant part of the
12934 value is held in the lower number register. The reverse is true
12935 on systems where WORDS_BIG_ENDIAN is false.
12937 The purpose of these operands is to distinguish between cases
12938 where the endian-ness of the values is important (for example
12939 when they are added together), and cases where the endian-ness
12940 is irrelevant, but the order of register operations is important.
12941 For example when loading a value from memory into a register
12942 pair, the endian-ness does not matter. Provided that the value
12943 from the lower memory address is put into the lower numbered
12944 register, and the value from the higher address is put into the
12945 higher numbered register, the load will work regardless of whether
12946 the value being loaded is big-wordian or little-wordian. The
12947 order of the two register loads can matter however, if the address
12948 of the memory location is actually held in one of the registers
12949 being overwritten by the load. */
12951 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12953 output_operand_lossage ("invalid operand for code '%c'", code);
12957 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12961 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12963 output_operand_lossage ("invalid operand for code '%c'", code);
12967 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12971 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12973 output_operand_lossage ("invalid operand for code '%c'", code);
12977 asm_fprintf (stream, "%r", REGNO (x) + 1);
12981 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12983 output_operand_lossage ("invalid operand for code '%c'", code);
12987 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12991 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12993 output_operand_lossage ("invalid operand for code '%c'", code);
12997 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13001 asm_fprintf (stream, "%r",
13002 GET_CODE (XEXP (x, 0)) == REG
13003 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13007 asm_fprintf (stream, "{%r-%r}",
13009 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13012 /* Like 'M', but writing doubleword vector registers, for use by Neon
13016 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13017 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13019 asm_fprintf (stream, "{d%d}", regno);
13021 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13026 /* CONST_TRUE_RTX means always -- that's the default. */
13027 if (x == const_true_rtx)
13030 if (!COMPARISON_P (x))
13032 output_operand_lossage ("invalid operand for code '%c'", code);
13036 fputs (arm_condition_codes[get_arm_condition_code (x)],
13041 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13042 want to do that. */
13043 if (x == const_true_rtx)
13045 output_operand_lossage ("instruction never executed");
13048 if (!COMPARISON_P (x))
13050 output_operand_lossage ("invalid operand for code '%c'", code);
13054 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13055 (get_arm_condition_code (x))],
13059 /* Cirrus registers can be accessed in a variety of ways:
13060 single floating point (f)
13061 double floating point (d)
13063 64bit integer (dx). */
13064 case 'W': /* Cirrus register in F mode. */
13065 case 'X': /* Cirrus register in D mode. */
13066 case 'Y': /* Cirrus register in FX mode. */
13067 case 'Z': /* Cirrus register in DX mode. */
13068 gcc_assert (GET_CODE (x) == REG
13069 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13071 fprintf (stream, "mv%s%s",
13073 : code == 'X' ? "d"
13074 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13078 /* Print cirrus register in the mode specified by the register's mode. */
13081 int mode = GET_MODE (x);
13083 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13085 output_operand_lossage ("invalid operand for code '%c'", code);
13089 fprintf (stream, "mv%s%s",
13090 mode == DFmode ? "d"
13091 : mode == SImode ? "fx"
13092 : mode == DImode ? "dx"
13093 : "f", reg_names[REGNO (x)] + 2);
13099 if (GET_CODE (x) != REG
13100 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13101 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13102 /* Bad value for wCG register number. */
13104 output_operand_lossage ("invalid operand for code '%c'", code);
13109 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13112 /* Print an iWMMXt control register name. */
13114 if (GET_CODE (x) != CONST_INT
13116 || INTVAL (x) >= 16)
13117 /* Bad value for wC register number. */
13119 output_operand_lossage ("invalid operand for code '%c'", code);
13125 static const char * wc_reg_names [16] =
13127 "wCID", "wCon", "wCSSF", "wCASF",
13128 "wC4", "wC5", "wC6", "wC7",
13129 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13130 "wC12", "wC13", "wC14", "wC15"
13133 fprintf (stream, wc_reg_names [INTVAL (x)]);
13137 /* Print a VFP/Neon double precision or quad precision register name. */
13141 int mode = GET_MODE (x);
13142 int is_quad = (code == 'q');
13145 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13147 output_operand_lossage ("invalid operand for code '%c'", code);
13151 if (GET_CODE (x) != REG
13152 || !IS_VFP_REGNUM (REGNO (x)))
13154 output_operand_lossage ("invalid operand for code '%c'", code);
13159 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13160 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13162 output_operand_lossage ("invalid operand for code '%c'", code);
13166 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13167 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13171 /* These two codes print the low/high doubleword register of a Neon quad
13172 register, respectively. For pair-structure types, can also print
13173 low/high quadword registers. */
13177 int mode = GET_MODE (x);
13180 if ((GET_MODE_SIZE (mode) != 16
13181 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13183 output_operand_lossage ("invalid operand for code '%c'", code);
13188 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13190 output_operand_lossage ("invalid operand for code '%c'", code);
13194 if (GET_MODE_SIZE (mode) == 16)
13195 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13196 + (code == 'f' ? 1 : 0));
13198 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13199 + (code == 'f' ? 1 : 0));
13203 /* Print a VFPv3 floating-point constant, represented as an integer
13207 int index = vfp3_const_double_index (x);
13208 gcc_assert (index != -1);
13209 fprintf (stream, "%d", index);
13213 /* Print bits representing opcode features for Neon.
13215 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13216 and polynomials as unsigned.
13218 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13220 Bit 2 is 1 for rounding functions, 0 otherwise. */
13222 /* Identify the type as 's', 'u', 'p' or 'f'. */
13225 HOST_WIDE_INT bits = INTVAL (x);
13226 fputc ("uspf"[bits & 3], stream);
13230 /* Likewise, but signed and unsigned integers are both 'i'. */
13233 HOST_WIDE_INT bits = INTVAL (x);
13234 fputc ("iipf"[bits & 3], stream);
13238 /* As for 'T', but emit 'u' instead of 'p'. */
13241 HOST_WIDE_INT bits = INTVAL (x);
13242 fputc ("usuf"[bits & 3], stream);
13246 /* Bit 2: rounding (vs none). */
13249 HOST_WIDE_INT bits = INTVAL (x);
13250 fputs ((bits & 4) != 0 ? "r" : "", stream);
13257 output_operand_lossage ("missing operand");
13261 switch (GET_CODE (x))
13264 asm_fprintf (stream, "%r", REGNO (x));
13268 output_memory_reference_mode = GET_MODE (x);
13269 output_address (XEXP (x, 0));
13276 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13277 sizeof (fpstr), 0, 1);
13278 fprintf (stream, "#%s", fpstr);
13281 fprintf (stream, "#%s", fp_immediate_constant (x));
13285 gcc_assert (GET_CODE (x) != NEG);
13286 fputc ('#', stream);
13287 output_addr_const (stream, x);
13293 /* Target hook for assembling integer objects. The ARM version needs to
13294 handle word-sized values specially. */
13296 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13298 enum machine_mode mode;
13300 if (size == UNITS_PER_WORD && aligned_p)
13302 fputs ("\t.word\t", asm_out_file);
13303 output_addr_const (asm_out_file, x);
13305 /* Mark symbols as position independent. We only do this in the
13306 .text segment, not in the .data segment. */
13307 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13308 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13310 /* See legitimize_pic_address for an explanation of the
13311 TARGET_VXWORKS_RTP check. */
13312 if (TARGET_VXWORKS_RTP
13313 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13314 fputs ("(GOT)", asm_out_file);
13316 fputs ("(GOTOFF)", asm_out_file);
13318 fputc ('\n', asm_out_file);
13322 mode = GET_MODE (x);
13324 if (arm_vector_mode_supported_p (mode))
13328 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13330 units = CONST_VECTOR_NUNITS (x);
13331 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13333 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13334 for (i = 0; i < units; i++)
13336 rtx elt = CONST_VECTOR_ELT (x, i);
13338 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13341 for (i = 0; i < units; i++)
13343 rtx elt = CONST_VECTOR_ELT (x, i);
13344 REAL_VALUE_TYPE rval;
13346 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13349 (rval, GET_MODE_INNER (mode),
13350 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13356 return default_assemble_integer (x, size, aligned_p);
13360 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13364 if (!TARGET_AAPCS_BASED)
13367 default_named_section_asm_out_constructor
13368 : default_named_section_asm_out_destructor) (symbol, priority);
13372 /* Put these in the .init_array section, using a special relocation. */
13373 if (priority != DEFAULT_INIT_PRIORITY)
13376 sprintf (buf, "%s.%.5u",
13377 is_ctor ? ".init_array" : ".fini_array",
13379 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13386 switch_to_section (s);
13387 assemble_align (POINTER_SIZE);
13388 fputs ("\t.word\t", asm_out_file);
13389 output_addr_const (asm_out_file, symbol);
13390 fputs ("(target1)\n", asm_out_file);
13393 /* Add a function to the list of static constructors. */
13396 arm_elf_asm_constructor (rtx symbol, int priority)
13398 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13401 /* Add a function to the list of static destructors. */
13404 arm_elf_asm_destructor (rtx symbol, int priority)
13406 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13409 /* A finite state machine takes care of noticing whether or not instructions
13410 can be conditionally executed, and thus decrease execution time and code
13411 size by deleting branch instructions. The fsm is controlled by
13412 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13414 /* The state of the fsm controlling condition codes are:
13415 0: normal, do nothing special
13416 1: make ASM_OUTPUT_OPCODE not output this instruction
13417 2: make ASM_OUTPUT_OPCODE not output this instruction
13418 3: make instructions conditional
13419 4: make instructions conditional
13421 State transitions (state->state by whom under condition):
13422 0 -> 1 final_prescan_insn if the `target' is a label
13423 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13424 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13425 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13426 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13427 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13428 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13429 (the target insn is arm_target_insn).
13431 If the jump clobbers the conditions then we use states 2 and 4.
13433 A similar thing can be done with conditional return insns.
13435 XXX In case the `target' is an unconditional branch, this conditionalising
13436 of the instructions always reduces code size, but not always execution
13437 time. But then, I want to reduce the code size to somewhere near what
13438 /bin/cc produces. */
13440 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13441 instructions. When a COND_EXEC instruction is seen the subsequent
13442 instructions are scanned so that multiple conditional instructions can be
13443 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13444 specify the length and true/false mask for the IT block. These will be
13445 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13447 /* Returns the index of the ARM condition code string in
13448 `arm_condition_codes'. COMPARISON should be an rtx like
13449 `(eq (...) (...))'. */
13450 static enum arm_cond_code
13451 get_arm_condition_code (rtx comparison)
13453 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13455 enum rtx_code comp_code = GET_CODE (comparison);
13457 if (GET_MODE_CLASS (mode) != MODE_CC)
13458 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13459 XEXP (comparison, 1));
13463 case CC_DNEmode: code = ARM_NE; goto dominance;
13464 case CC_DEQmode: code = ARM_EQ; goto dominance;
13465 case CC_DGEmode: code = ARM_GE; goto dominance;
13466 case CC_DGTmode: code = ARM_GT; goto dominance;
13467 case CC_DLEmode: code = ARM_LE; goto dominance;
13468 case CC_DLTmode: code = ARM_LT; goto dominance;
13469 case CC_DGEUmode: code = ARM_CS; goto dominance;
13470 case CC_DGTUmode: code = ARM_HI; goto dominance;
13471 case CC_DLEUmode: code = ARM_LS; goto dominance;
13472 case CC_DLTUmode: code = ARM_CC;
13475 gcc_assert (comp_code == EQ || comp_code == NE);
13477 if (comp_code == EQ)
13478 return ARM_INVERSE_CONDITION_CODE (code);
13484 case NE: return ARM_NE;
13485 case EQ: return ARM_EQ;
13486 case GE: return ARM_PL;
13487 case LT: return ARM_MI;
13488 default: gcc_unreachable ();
13494 case NE: return ARM_NE;
13495 case EQ: return ARM_EQ;
13496 default: gcc_unreachable ();
13502 case NE: return ARM_MI;
13503 case EQ: return ARM_PL;
13504 default: gcc_unreachable ();
13509 /* These encodings assume that AC=1 in the FPA system control
13510 byte. This allows us to handle all cases except UNEQ and
13514 case GE: return ARM_GE;
13515 case GT: return ARM_GT;
13516 case LE: return ARM_LS;
13517 case LT: return ARM_MI;
13518 case NE: return ARM_NE;
13519 case EQ: return ARM_EQ;
13520 case ORDERED: return ARM_VC;
13521 case UNORDERED: return ARM_VS;
13522 case UNLT: return ARM_LT;
13523 case UNLE: return ARM_LE;
13524 case UNGT: return ARM_HI;
13525 case UNGE: return ARM_PL;
13526 /* UNEQ and LTGT do not have a representation. */
13527 case UNEQ: /* Fall through. */
13528 case LTGT: /* Fall through. */
13529 default: gcc_unreachable ();
13535 case NE: return ARM_NE;
13536 case EQ: return ARM_EQ;
13537 case GE: return ARM_LE;
13538 case GT: return ARM_LT;
13539 case LE: return ARM_GE;
13540 case LT: return ARM_GT;
13541 case GEU: return ARM_LS;
13542 case GTU: return ARM_CC;
13543 case LEU: return ARM_CS;
13544 case LTU: return ARM_HI;
13545 default: gcc_unreachable ();
13551 case LTU: return ARM_CS;
13552 case GEU: return ARM_CC;
13553 default: gcc_unreachable ();
13559 case NE: return ARM_NE;
13560 case EQ: return ARM_EQ;
13561 case GE: return ARM_GE;
13562 case GT: return ARM_GT;
13563 case LE: return ARM_LE;
13564 case LT: return ARM_LT;
13565 case GEU: return ARM_CS;
13566 case GTU: return ARM_HI;
13567 case LEU: return ARM_LS;
13568 case LTU: return ARM_CC;
13569 default: gcc_unreachable ();
13572 default: gcc_unreachable ();
13576 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13579 thumb2_final_prescan_insn (rtx insn)
13581 rtx first_insn = insn;
13582 rtx body = PATTERN (insn);
13584 enum arm_cond_code code;
13588 /* Remove the previous insn from the count of insns to be output. */
13589 if (arm_condexec_count)
13590 arm_condexec_count--;
13592 /* Nothing to do if we are already inside a conditional block. */
13593 if (arm_condexec_count)
13596 if (GET_CODE (body) != COND_EXEC)
13599 /* Conditional jumps are implemented directly. */
13600 if (GET_CODE (insn) == JUMP_INSN)
13603 predicate = COND_EXEC_TEST (body);
13604 arm_current_cc = get_arm_condition_code (predicate);
13606 n = get_attr_ce_count (insn);
13607 arm_condexec_count = 1;
13608 arm_condexec_mask = (1 << n) - 1;
13609 arm_condexec_masklen = n;
13610 /* See if subsequent instructions can be combined into the same block. */
13613 insn = next_nonnote_insn (insn);
13615 /* Jumping into the middle of an IT block is illegal, so a label or
13616 barrier terminates the block. */
13617 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13620 body = PATTERN (insn);
13621 /* USE and CLOBBER aren't really insns, so just skip them. */
13622 if (GET_CODE (body) == USE
13623 || GET_CODE (body) == CLOBBER)
13626 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13627 if (GET_CODE (body) != COND_EXEC)
13629 /* Allow up to 4 conditionally executed instructions in a block. */
13630 n = get_attr_ce_count (insn);
13631 if (arm_condexec_masklen + n > 4)
13634 predicate = COND_EXEC_TEST (body);
13635 code = get_arm_condition_code (predicate);
13636 mask = (1 << n) - 1;
13637 if (arm_current_cc == code)
13638 arm_condexec_mask |= (mask << arm_condexec_masklen);
13639 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13642 arm_condexec_count++;
13643 arm_condexec_masklen += n;
13645 /* A jump must be the last instruction in a conditional block. */
13646 if (GET_CODE(insn) == JUMP_INSN)
13649 /* Restore recog_data (getting the attributes of other insns can
13650 destroy this array, but final.c assumes that it remains intact
13651 across this call). */
13652 extract_constrain_insn_cached (first_insn);
13656 arm_final_prescan_insn (rtx insn)
13658 /* BODY will hold the body of INSN. */
13659 rtx body = PATTERN (insn);
13661 /* This will be 1 if trying to repeat the trick, and things need to be
13662 reversed if it appears to fail. */
13665 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13666 taken are clobbered, even if the rtl suggests otherwise. It also
13667 means that we have to grub around within the jump expression to find
13668 out what the conditions are when the jump isn't taken. */
13669 int jump_clobbers = 0;
13671 /* If we start with a return insn, we only succeed if we find another one. */
13672 int seeking_return = 0;
13674 /* START_INSN will hold the insn from where we start looking. This is the
13675 first insn after the following code_label if REVERSE is true. */
13676 rtx start_insn = insn;
13678 /* If in state 4, check if the target branch is reached, in order to
13679 change back to state 0. */
13680 if (arm_ccfsm_state == 4)
13682 if (insn == arm_target_insn)
13684 arm_target_insn = NULL;
13685 arm_ccfsm_state = 0;
13690 /* If in state 3, it is possible to repeat the trick, if this insn is an
13691 unconditional branch to a label, and immediately following this branch
13692 is the previous target label which is only used once, and the label this
13693 branch jumps to is not too far off. */
13694 if (arm_ccfsm_state == 3)
13696 if (simplejump_p (insn))
13698 start_insn = next_nonnote_insn (start_insn);
13699 if (GET_CODE (start_insn) == BARRIER)
13701 /* XXX Isn't this always a barrier? */
13702 start_insn = next_nonnote_insn (start_insn);
13704 if (GET_CODE (start_insn) == CODE_LABEL
13705 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13706 && LABEL_NUSES (start_insn) == 1)
13711 else if (GET_CODE (body) == RETURN)
13713 start_insn = next_nonnote_insn (start_insn);
13714 if (GET_CODE (start_insn) == BARRIER)
13715 start_insn = next_nonnote_insn (start_insn);
13716 if (GET_CODE (start_insn) == CODE_LABEL
13717 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13718 && LABEL_NUSES (start_insn) == 1)
13721 seeking_return = 1;
13730 gcc_assert (!arm_ccfsm_state || reverse);
13731 if (GET_CODE (insn) != JUMP_INSN)
13734 /* This jump might be paralleled with a clobber of the condition codes
13735 the jump should always come first */
13736 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13737 body = XVECEXP (body, 0, 0);
13740 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13741 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13744 int fail = FALSE, succeed = FALSE;
13745 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13746 int then_not_else = TRUE;
13747 rtx this_insn = start_insn, label = 0;
13749 /* If the jump cannot be done with one instruction, we cannot
13750 conditionally execute the instruction in the inverse case. */
13751 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13757 /* Register the insn jumped to. */
13760 if (!seeking_return)
13761 label = XEXP (SET_SRC (body), 0);
13763 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13764 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13765 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13767 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13768 then_not_else = FALSE;
13770 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13771 seeking_return = 1;
13772 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13774 seeking_return = 1;
13775 then_not_else = FALSE;
13778 gcc_unreachable ();
13780 /* See how many insns this branch skips, and what kind of insns. If all
13781 insns are okay, and the label or unconditional branch to the same
13782 label is not too far away, succeed. */
13783 for (insns_skipped = 0;
13784 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13788 this_insn = next_nonnote_insn (this_insn);
13792 switch (GET_CODE (this_insn))
13795 /* Succeed if it is the target label, otherwise fail since
13796 control falls in from somewhere else. */
13797 if (this_insn == label)
13801 arm_ccfsm_state = 2;
13802 this_insn = next_nonnote_insn (this_insn);
13805 arm_ccfsm_state = 1;
13813 /* Succeed if the following insn is the target label.
13815 If return insns are used then the last insn in a function
13816 will be a barrier. */
13817 this_insn = next_nonnote_insn (this_insn);
13818 if (this_insn && this_insn == label)
13822 arm_ccfsm_state = 2;
13823 this_insn = next_nonnote_insn (this_insn);
13826 arm_ccfsm_state = 1;
13834 /* The AAPCS says that conditional calls should not be
13835 used since they make interworking inefficient (the
13836 linker can't transform BL<cond> into BLX). That's
13837 only a problem if the machine has BLX. */
13844 /* Succeed if the following insn is the target label, or
13845 if the following two insns are a barrier and the
13847 this_insn = next_nonnote_insn (this_insn);
13848 if (this_insn && GET_CODE (this_insn) == BARRIER)
13849 this_insn = next_nonnote_insn (this_insn);
13851 if (this_insn && this_insn == label
13852 && insns_skipped < max_insns_skipped)
13856 arm_ccfsm_state = 2;
13857 this_insn = next_nonnote_insn (this_insn);
13860 arm_ccfsm_state = 1;
13868 /* If this is an unconditional branch to the same label, succeed.
13869 If it is to another label, do nothing. If it is conditional,
13871 /* XXX Probably, the tests for SET and the PC are
13874 scanbody = PATTERN (this_insn);
13875 if (GET_CODE (scanbody) == SET
13876 && GET_CODE (SET_DEST (scanbody)) == PC)
13878 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13879 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13881 arm_ccfsm_state = 2;
13884 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13887 /* Fail if a conditional return is undesirable (e.g. on a
13888 StrongARM), but still allow this if optimizing for size. */
13889 else if (GET_CODE (scanbody) == RETURN
13890 && !use_return_insn (TRUE, NULL)
13893 else if (GET_CODE (scanbody) == RETURN
13896 arm_ccfsm_state = 2;
13899 else if (GET_CODE (scanbody) == PARALLEL)
13901 switch (get_attr_conds (this_insn))
13911 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13916 /* Instructions using or affecting the condition codes make it
13918 scanbody = PATTERN (this_insn);
13919 if (!(GET_CODE (scanbody) == SET
13920 || GET_CODE (scanbody) == PARALLEL)
13921 || get_attr_conds (this_insn) != CONDS_NOCOND)
13924 /* A conditional cirrus instruction must be followed by
13925 a non Cirrus instruction. However, since we
13926 conditionalize instructions in this function and by
13927 the time we get here we can't add instructions
13928 (nops), because shorten_branches() has already been
13929 called, we will disable conditionalizing Cirrus
13930 instructions to be safe. */
13931 if (GET_CODE (scanbody) != USE
13932 && GET_CODE (scanbody) != CLOBBER
13933 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13943 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13944 arm_target_label = CODE_LABEL_NUMBER (label);
13947 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13949 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13951 this_insn = next_nonnote_insn (this_insn);
13952 gcc_assert (!this_insn
13953 || (GET_CODE (this_insn) != BARRIER
13954 && GET_CODE (this_insn) != CODE_LABEL));
13958 /* Oh, dear! we ran off the end.. give up. */
13959 extract_constrain_insn_cached (insn);
13960 arm_ccfsm_state = 0;
13961 arm_target_insn = NULL;
13964 arm_target_insn = this_insn;
13968 gcc_assert (!reverse);
13970 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13972 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13973 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13974 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13975 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13979 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13982 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13986 if (reverse || then_not_else)
13987 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13990 /* Restore recog_data (getting the attributes of other insns can
13991 destroy this array, but final.c assumes that it remains intact
13992 across this call. */
13993 extract_constrain_insn_cached (insn);
13997 /* Output IT instructions. */
13999 thumb2_asm_output_opcode (FILE * stream)
14004 if (arm_condexec_mask)
14006 for (n = 0; n < arm_condexec_masklen; n++)
14007 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14009 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14010 arm_condition_codes[arm_current_cc]);
14011 arm_condexec_mask = 0;
14015 /* Returns true if REGNO is a valid register
14016 for holding a quantity of type MODE. */
14018 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14020 if (GET_MODE_CLASS (mode) == MODE_CC)
14021 return (regno == CC_REGNUM
14022 || (TARGET_HARD_FLOAT && TARGET_VFP
14023 && regno == VFPCC_REGNUM));
14026 /* For the Thumb we only allow values bigger than SImode in
14027 registers 0 - 6, so that there is always a second low
14028 register available to hold the upper part of the value.
14029 We probably we ought to ensure that the register is the
14030 start of an even numbered register pair. */
14031 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14033 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14034 && IS_CIRRUS_REGNUM (regno))
14035 /* We have outlawed SI values in Cirrus registers because they
14036 reside in the lower 32 bits, but SF values reside in the
14037 upper 32 bits. This causes gcc all sorts of grief. We can't
14038 even split the registers into pairs because Cirrus SI values
14039 get sign extended to 64bits-- aldyh. */
14040 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14042 if (TARGET_HARD_FLOAT && TARGET_VFP
14043 && IS_VFP_REGNUM (regno))
14045 if (mode == SFmode || mode == SImode)
14046 return VFP_REGNO_OK_FOR_SINGLE (regno);
14048 if (mode == DFmode)
14049 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14052 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14053 || (VALID_NEON_QREG_MODE (mode)
14054 && NEON_REGNO_OK_FOR_QUAD (regno))
14055 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14056 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14057 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14058 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14059 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14064 if (TARGET_REALLY_IWMMXT)
14066 if (IS_IWMMXT_GR_REGNUM (regno))
14067 return mode == SImode;
14069 if (IS_IWMMXT_REGNUM (regno))
14070 return VALID_IWMMXT_REG_MODE (mode);
14073 /* We allow any value to be stored in the general registers.
14074 Restrict doubleword quantities to even register pairs so that we can
14075 use ldrd. Do not allow Neon structure opaque modes in general registers;
14076 they would use too many. */
14077 if (regno <= LAST_ARM_REGNUM)
14078 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14079 && !VALID_NEON_STRUCT_MODE (mode);
14081 if (regno == FRAME_POINTER_REGNUM
14082 || regno == ARG_POINTER_REGNUM)
14083 /* We only allow integers in the fake hard registers. */
14084 return GET_MODE_CLASS (mode) == MODE_INT;
14086 /* The only registers left are the FPA registers
14087 which we only allow to hold FP values. */
14088 return (TARGET_HARD_FLOAT && TARGET_FPA
14089 && GET_MODE_CLASS (mode) == MODE_FLOAT
14090 && regno >= FIRST_FPA_REGNUM
14091 && regno <= LAST_FPA_REGNUM);
14094 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14095 not used in arm mode. */
14097 arm_regno_class (int regno)
14101 if (regno == STACK_POINTER_REGNUM)
14103 if (regno == CC_REGNUM)
14110 if (TARGET_THUMB2 && regno < 8)
14113 if ( regno <= LAST_ARM_REGNUM
14114 || regno == FRAME_POINTER_REGNUM
14115 || regno == ARG_POINTER_REGNUM)
14116 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14118 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14119 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14121 if (IS_CIRRUS_REGNUM (regno))
14122 return CIRRUS_REGS;
14124 if (IS_VFP_REGNUM (regno))
14126 if (regno <= D7_VFP_REGNUM)
14127 return VFP_D0_D7_REGS;
14128 else if (regno <= LAST_LO_VFP_REGNUM)
14129 return VFP_LO_REGS;
14131 return VFP_HI_REGS;
14134 if (IS_IWMMXT_REGNUM (regno))
14135 return IWMMXT_REGS;
14137 if (IS_IWMMXT_GR_REGNUM (regno))
14138 return IWMMXT_GR_REGS;
14143 /* Handle a special case when computing the offset
14144 of an argument from the frame pointer. */
14146 arm_debugger_arg_offset (int value, rtx addr)
14150 /* We are only interested if dbxout_parms() failed to compute the offset. */
14154 /* We can only cope with the case where the address is held in a register. */
14155 if (GET_CODE (addr) != REG)
14158 /* If we are using the frame pointer to point at the argument, then
14159 an offset of 0 is correct. */
14160 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14163 /* If we are using the stack pointer to point at the
14164 argument, then an offset of 0 is correct. */
14165 /* ??? Check this is consistent with thumb2 frame layout. */
14166 if ((TARGET_THUMB || !frame_pointer_needed)
14167 && REGNO (addr) == SP_REGNUM)
14170 /* Oh dear. The argument is pointed to by a register rather
14171 than being held in a register, or being stored at a known
14172 offset from the frame pointer. Since GDB only understands
14173 those two kinds of argument we must translate the address
14174 held in the register into an offset from the frame pointer.
14175 We do this by searching through the insns for the function
14176 looking to see where this register gets its value. If the
14177 register is initialized from the frame pointer plus an offset
14178 then we are in luck and we can continue, otherwise we give up.
14180 This code is exercised by producing debugging information
14181 for a function with arguments like this:
14183 double func (double a, double b, int c, double d) {return d;}
14185 Without this code the stab for parameter 'd' will be set to
14186 an offset of 0 from the frame pointer, rather than 8. */
14188 /* The if() statement says:
14190 If the insn is a normal instruction
14191 and if the insn is setting the value in a register
14192 and if the register being set is the register holding the address of the argument
14193 and if the address is computing by an addition
14194 that involves adding to a register
14195 which is the frame pointer
14200 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14202 if ( GET_CODE (insn) == INSN
14203 && GET_CODE (PATTERN (insn)) == SET
14204 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14205 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14206 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14207 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14208 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14211 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14220 warning (0, "unable to compute real location of stacked parameter");
14221 value = 8; /* XXX magic hack */
14227 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14230 if ((MASK) & insn_flags) \
14231 add_builtin_function ((NAME), (TYPE), (CODE), \
14232 BUILT_IN_MD, NULL, NULL_TREE); \
14236 struct builtin_description
14238 const unsigned int mask;
14239 const enum insn_code icode;
14240 const char * const name;
14241 const enum arm_builtins code;
14242 const enum rtx_code comparison;
14243 const unsigned int flag;
14246 static const struct builtin_description bdesc_2arg[] =
14248 #define IWMMXT_BUILTIN(code, string, builtin) \
14249 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14250 ARM_BUILTIN_##builtin, 0, 0 },
14252 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14253 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14254 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14255 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14256 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14257 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14258 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14259 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14260 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14261 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14262 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14263 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14264 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14265 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14266 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14267 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14268 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14269 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14270 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14271 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14272 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14273 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14274 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14275 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14276 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14277 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14278 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14279 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14280 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14281 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14282 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14283 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14284 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14285 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14286 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14287 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14288 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14289 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14290 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14291 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14292 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14293 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14294 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14295 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14296 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14297 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14298 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14299 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14300 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14301 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14302 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14303 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14304 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14305 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14306 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14307 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14308 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14309 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14311 #define IWMMXT_BUILTIN2(code, builtin) \
14312 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14314 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14315 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14316 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14317 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14318 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14319 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14320 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14321 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14322 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14323 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14324 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14325 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14326 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14327 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14328 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14329 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14330 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14331 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14332 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14333 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14334 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14335 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14336 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14337 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14338 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14339 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14340 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14341 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14342 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14343 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14344 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14345 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14348 static const struct builtin_description bdesc_1arg[] =
14350 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14351 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14352 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14353 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14354 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14355 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14356 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14357 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14358 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14359 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14360 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14361 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14362 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14363 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14364 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14365 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14366 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14367 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14370 /* Set up all the iWMMXt builtins. This is
14371 not called if TARGET_IWMMXT is zero. */
14374 arm_init_iwmmxt_builtins (void)
14376 const struct builtin_description * d;
14378 tree endlink = void_list_node;
14380 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14381 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14382 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14385 = build_function_type (integer_type_node,
14386 tree_cons (NULL_TREE, integer_type_node, endlink));
14387 tree v8qi_ftype_v8qi_v8qi_int
14388 = build_function_type (V8QI_type_node,
14389 tree_cons (NULL_TREE, V8QI_type_node,
14390 tree_cons (NULL_TREE, V8QI_type_node,
14391 tree_cons (NULL_TREE,
14394 tree v4hi_ftype_v4hi_int
14395 = build_function_type (V4HI_type_node,
14396 tree_cons (NULL_TREE, V4HI_type_node,
14397 tree_cons (NULL_TREE, integer_type_node,
14399 tree v2si_ftype_v2si_int
14400 = build_function_type (V2SI_type_node,
14401 tree_cons (NULL_TREE, V2SI_type_node,
14402 tree_cons (NULL_TREE, integer_type_node,
14404 tree v2si_ftype_di_di
14405 = build_function_type (V2SI_type_node,
14406 tree_cons (NULL_TREE, long_long_integer_type_node,
14407 tree_cons (NULL_TREE, long_long_integer_type_node,
14409 tree di_ftype_di_int
14410 = build_function_type (long_long_integer_type_node,
14411 tree_cons (NULL_TREE, long_long_integer_type_node,
14412 tree_cons (NULL_TREE, integer_type_node,
14414 tree di_ftype_di_int_int
14415 = build_function_type (long_long_integer_type_node,
14416 tree_cons (NULL_TREE, long_long_integer_type_node,
14417 tree_cons (NULL_TREE, integer_type_node,
14418 tree_cons (NULL_TREE,
14421 tree int_ftype_v8qi
14422 = build_function_type (integer_type_node,
14423 tree_cons (NULL_TREE, V8QI_type_node,
14425 tree int_ftype_v4hi
14426 = build_function_type (integer_type_node,
14427 tree_cons (NULL_TREE, V4HI_type_node,
14429 tree int_ftype_v2si
14430 = build_function_type (integer_type_node,
14431 tree_cons (NULL_TREE, V2SI_type_node,
14433 tree int_ftype_v8qi_int
14434 = build_function_type (integer_type_node,
14435 tree_cons (NULL_TREE, V8QI_type_node,
14436 tree_cons (NULL_TREE, integer_type_node,
14438 tree int_ftype_v4hi_int
14439 = build_function_type (integer_type_node,
14440 tree_cons (NULL_TREE, V4HI_type_node,
14441 tree_cons (NULL_TREE, integer_type_node,
14443 tree int_ftype_v2si_int
14444 = build_function_type (integer_type_node,
14445 tree_cons (NULL_TREE, V2SI_type_node,
14446 tree_cons (NULL_TREE, integer_type_node,
14448 tree v8qi_ftype_v8qi_int_int
14449 = build_function_type (V8QI_type_node,
14450 tree_cons (NULL_TREE, V8QI_type_node,
14451 tree_cons (NULL_TREE, integer_type_node,
14452 tree_cons (NULL_TREE,
14455 tree v4hi_ftype_v4hi_int_int
14456 = build_function_type (V4HI_type_node,
14457 tree_cons (NULL_TREE, V4HI_type_node,
14458 tree_cons (NULL_TREE, integer_type_node,
14459 tree_cons (NULL_TREE,
14462 tree v2si_ftype_v2si_int_int
14463 = build_function_type (V2SI_type_node,
14464 tree_cons (NULL_TREE, V2SI_type_node,
14465 tree_cons (NULL_TREE, integer_type_node,
14466 tree_cons (NULL_TREE,
14469 /* Miscellaneous. */
14470 tree v8qi_ftype_v4hi_v4hi
14471 = build_function_type (V8QI_type_node,
14472 tree_cons (NULL_TREE, V4HI_type_node,
14473 tree_cons (NULL_TREE, V4HI_type_node,
14475 tree v4hi_ftype_v2si_v2si
14476 = build_function_type (V4HI_type_node,
14477 tree_cons (NULL_TREE, V2SI_type_node,
14478 tree_cons (NULL_TREE, V2SI_type_node,
14480 tree v2si_ftype_v4hi_v4hi
14481 = build_function_type (V2SI_type_node,
14482 tree_cons (NULL_TREE, V4HI_type_node,
14483 tree_cons (NULL_TREE, V4HI_type_node,
14485 tree v2si_ftype_v8qi_v8qi
14486 = build_function_type (V2SI_type_node,
14487 tree_cons (NULL_TREE, V8QI_type_node,
14488 tree_cons (NULL_TREE, V8QI_type_node,
14490 tree v4hi_ftype_v4hi_di
14491 = build_function_type (V4HI_type_node,
14492 tree_cons (NULL_TREE, V4HI_type_node,
14493 tree_cons (NULL_TREE,
14494 long_long_integer_type_node,
14496 tree v2si_ftype_v2si_di
14497 = build_function_type (V2SI_type_node,
14498 tree_cons (NULL_TREE, V2SI_type_node,
14499 tree_cons (NULL_TREE,
14500 long_long_integer_type_node,
14502 tree void_ftype_int_int
14503 = build_function_type (void_type_node,
14504 tree_cons (NULL_TREE, integer_type_node,
14505 tree_cons (NULL_TREE, integer_type_node,
14508 = build_function_type (long_long_unsigned_type_node, endlink);
14510 = build_function_type (long_long_integer_type_node,
14511 tree_cons (NULL_TREE, V8QI_type_node,
14514 = build_function_type (long_long_integer_type_node,
14515 tree_cons (NULL_TREE, V4HI_type_node,
14518 = build_function_type (long_long_integer_type_node,
14519 tree_cons (NULL_TREE, V2SI_type_node,
14521 tree v2si_ftype_v4hi
14522 = build_function_type (V2SI_type_node,
14523 tree_cons (NULL_TREE, V4HI_type_node,
14525 tree v4hi_ftype_v8qi
14526 = build_function_type (V4HI_type_node,
14527 tree_cons (NULL_TREE, V8QI_type_node,
14530 tree di_ftype_di_v4hi_v4hi
14531 = build_function_type (long_long_unsigned_type_node,
14532 tree_cons (NULL_TREE,
14533 long_long_unsigned_type_node,
14534 tree_cons (NULL_TREE, V4HI_type_node,
14535 tree_cons (NULL_TREE,
14539 tree di_ftype_v4hi_v4hi
14540 = build_function_type (long_long_unsigned_type_node,
14541 tree_cons (NULL_TREE, V4HI_type_node,
14542 tree_cons (NULL_TREE, V4HI_type_node,
14545 /* Normal vector binops. */
14546 tree v8qi_ftype_v8qi_v8qi
14547 = build_function_type (V8QI_type_node,
14548 tree_cons (NULL_TREE, V8QI_type_node,
14549 tree_cons (NULL_TREE, V8QI_type_node,
14551 tree v4hi_ftype_v4hi_v4hi
14552 = build_function_type (V4HI_type_node,
14553 tree_cons (NULL_TREE, V4HI_type_node,
14554 tree_cons (NULL_TREE, V4HI_type_node,
14556 tree v2si_ftype_v2si_v2si
14557 = build_function_type (V2SI_type_node,
14558 tree_cons (NULL_TREE, V2SI_type_node,
14559 tree_cons (NULL_TREE, V2SI_type_node,
14561 tree di_ftype_di_di
14562 = build_function_type (long_long_unsigned_type_node,
14563 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14564 tree_cons (NULL_TREE,
14565 long_long_unsigned_type_node,
14568 /* Add all builtins that are more or less simple operations on two
14570 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14572 /* Use one of the operands; the target can have a different mode for
14573 mask-generating compares. */
14574 enum machine_mode mode;
14580 mode = insn_data[d->icode].operand[1].mode;
14585 type = v8qi_ftype_v8qi_v8qi;
14588 type = v4hi_ftype_v4hi_v4hi;
14591 type = v2si_ftype_v2si_v2si;
14594 type = di_ftype_di_di;
14598 gcc_unreachable ();
14601 def_mbuiltin (d->mask, d->name, type, d->code);
14604 /* Add the remaining MMX insns with somewhat more complicated types. */
14605 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14607 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14610 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14612 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14614 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14616 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14617 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14618 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14619 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14620 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14621 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14623 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14624 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14625 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14626 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14627 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14628 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14630 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14631 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14632 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14633 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14634 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14635 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14637 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14639 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14640 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14641 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14642 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14644 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14645 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14646 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14647 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14648 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14649 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14650 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14651 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14652 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14654 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14655 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14656 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14658 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14659 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14660 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14662 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14663 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14664 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14665 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14666 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14667 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14669 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14670 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14671 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14672 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14673 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14674 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14675 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14676 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14677 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14678 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14679 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14680 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14682 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14683 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14684 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14685 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14687 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14688 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14689 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14690 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14691 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14692 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14693 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14697 arm_init_tls_builtins (void)
14701 ftype = build_function_type (ptr_type_node, void_list_node);
14702 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14703 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14705 TREE_NOTHROW (decl) = 1;
14706 TREE_READONLY (decl) = 1;
14723 } neon_builtin_type_bits;
14725 #define v8qi_UP T_V8QI
14726 #define v4hi_UP T_V4HI
14727 #define v2si_UP T_V2SI
14728 #define v2sf_UP T_V2SF
14730 #define v16qi_UP T_V16QI
14731 #define v8hi_UP T_V8HI
14732 #define v4si_UP T_V4SI
14733 #define v4sf_UP T_V4SF
14734 #define v2di_UP T_V2DI
14739 #define UP(X) X##_UP
14774 NEON_LOADSTRUCTLANE,
14776 NEON_STORESTRUCTLANE,
14785 const neon_itype itype;
14786 const neon_builtin_type_bits bits;
14787 const enum insn_code codes[T_MAX];
14788 const unsigned int num_vars;
14789 unsigned int base_fcode;
14790 } neon_builtin_datum;
14792 #define CF(N,X) CODE_FOR_neon_##N##X
14794 #define VAR1(T, N, A) \
14795 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14796 #define VAR2(T, N, A, B) \
14797 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14798 #define VAR3(T, N, A, B, C) \
14799 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14800 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14801 #define VAR4(T, N, A, B, C, D) \
14802 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14803 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14804 #define VAR5(T, N, A, B, C, D, E) \
14805 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14806 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14807 #define VAR6(T, N, A, B, C, D, E, F) \
14808 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14809 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14810 #define VAR7(T, N, A, B, C, D, E, F, G) \
14811 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14812 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14814 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14815 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14817 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14818 CF (N, G), CF (N, H) }, 8, 0
14819 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14820 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14821 | UP (H) | UP (I), \
14822 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14823 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14824 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14825 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14826 | UP (H) | UP (I) | UP (J), \
14827 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14828 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14830 /* The mode entries in the following table correspond to the "key" type of the
14831 instruction variant, i.e. equivalent to that which would be specified after
14832 the assembler mnemonic, which usually refers to the last vector operand.
14833 (Signed/unsigned/polynomial types are not differentiated between though, and
14834 are all mapped onto the same mode for a given element size.) The modes
14835 listed per instruction should be the same as those defined for that
14836 instruction's pattern in neon.md.
14837 WARNING: Variants should be listed in the same increasing order as
14838 neon_builtin_type_bits. */
14840 static neon_builtin_datum neon_builtin_data[] =
14842 { VAR10 (BINOP, vadd,
14843 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14844 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14845 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14846 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14847 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14848 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14849 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14850 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14851 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14852 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14853 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14854 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14855 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14856 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14857 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14858 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14859 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14860 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14861 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14862 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14863 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14864 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14865 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14866 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14867 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14868 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14869 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14870 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14871 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14872 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14873 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14874 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14875 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14876 { VAR10 (BINOP, vsub,
14877 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14878 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14879 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14880 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14881 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14882 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14883 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14884 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14885 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14886 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14887 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14888 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14889 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14890 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14891 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14892 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14893 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14894 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14895 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14896 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14897 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14898 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14899 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14900 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14901 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14902 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14903 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14904 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14905 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14906 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14907 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14908 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14909 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14910 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14911 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14912 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14913 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14914 /* FIXME: vget_lane supports more variants than this! */
14915 { VAR10 (GETLANE, vget_lane,
14916 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14917 { VAR10 (SETLANE, vset_lane,
14918 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14919 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14920 { VAR10 (DUP, vdup_n,
14921 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14922 { VAR10 (DUPLANE, vdup_lane,
14923 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14924 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14925 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14926 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14927 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14928 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14929 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14930 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14931 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14932 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14933 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14934 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14935 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14936 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14937 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14938 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14939 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14940 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14941 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14942 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14943 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14944 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14945 { VAR10 (BINOP, vext,
14946 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14947 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14948 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14949 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14950 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14951 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14952 { VAR10 (SELECT, vbsl,
14953 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14954 { VAR1 (VTBL, vtbl1, v8qi) },
14955 { VAR1 (VTBL, vtbl2, v8qi) },
14956 { VAR1 (VTBL, vtbl3, v8qi) },
14957 { VAR1 (VTBL, vtbl4, v8qi) },
14958 { VAR1 (VTBX, vtbx1, v8qi) },
14959 { VAR1 (VTBX, vtbx2, v8qi) },
14960 { VAR1 (VTBX, vtbx3, v8qi) },
14961 { VAR1 (VTBX, vtbx4, v8qi) },
14962 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14963 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14964 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14965 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14966 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14967 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14968 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14969 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14970 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14971 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14972 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14973 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14974 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14975 { VAR10 (LOAD1, vld1,
14976 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14977 { VAR10 (LOAD1LANE, vld1_lane,
14978 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14979 { VAR10 (LOAD1, vld1_dup,
14980 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14981 { VAR10 (STORE1, vst1,
14982 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14983 { VAR10 (STORE1LANE, vst1_lane,
14984 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14985 { VAR9 (LOADSTRUCT,
14986 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14987 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14988 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14989 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14990 { VAR9 (STORESTRUCT, vst2,
14991 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14992 { VAR7 (STORESTRUCTLANE, vst2_lane,
14993 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14994 { VAR9 (LOADSTRUCT,
14995 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14996 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14997 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14998 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14999 { VAR9 (STORESTRUCT, vst3,
15000 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15001 { VAR7 (STORESTRUCTLANE, vst3_lane,
15002 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15003 { VAR9 (LOADSTRUCT, vld4,
15004 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15005 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15006 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15007 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15008 { VAR9 (STORESTRUCT, vst4,
15009 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15010 { VAR7 (STORESTRUCTLANE, vst4_lane,
15011 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15012 { VAR10 (LOGICBINOP, vand,
15013 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15014 { VAR10 (LOGICBINOP, vorr,
15015 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15016 { VAR10 (BINOP, veor,
15017 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15018 { VAR10 (LOGICBINOP, vbic,
15019 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15020 { VAR10 (LOGICBINOP, vorn,
15021 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15037 arm_init_neon_builtins (void)
15039 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15041 tree neon_intQI_type_node;
15042 tree neon_intHI_type_node;
15043 tree neon_polyQI_type_node;
15044 tree neon_polyHI_type_node;
15045 tree neon_intSI_type_node;
15046 tree neon_intDI_type_node;
15047 tree neon_float_type_node;
15049 tree intQI_pointer_node;
15050 tree intHI_pointer_node;
15051 tree intSI_pointer_node;
15052 tree intDI_pointer_node;
15053 tree float_pointer_node;
15055 tree const_intQI_node;
15056 tree const_intHI_node;
15057 tree const_intSI_node;
15058 tree const_intDI_node;
15059 tree const_float_node;
15061 tree const_intQI_pointer_node;
15062 tree const_intHI_pointer_node;
15063 tree const_intSI_pointer_node;
15064 tree const_intDI_pointer_node;
15065 tree const_float_pointer_node;
15067 tree V8QI_type_node;
15068 tree V4HI_type_node;
15069 tree V2SI_type_node;
15070 tree V2SF_type_node;
15071 tree V16QI_type_node;
15072 tree V8HI_type_node;
15073 tree V4SI_type_node;
15074 tree V4SF_type_node;
15075 tree V2DI_type_node;
15077 tree intUQI_type_node;
15078 tree intUHI_type_node;
15079 tree intUSI_type_node;
15080 tree intUDI_type_node;
15082 tree intEI_type_node;
15083 tree intOI_type_node;
15084 tree intCI_type_node;
15085 tree intXI_type_node;
15087 tree V8QI_pointer_node;
15088 tree V4HI_pointer_node;
15089 tree V2SI_pointer_node;
15090 tree V2SF_pointer_node;
15091 tree V16QI_pointer_node;
15092 tree V8HI_pointer_node;
15093 tree V4SI_pointer_node;
15094 tree V4SF_pointer_node;
15095 tree V2DI_pointer_node;
15097 tree void_ftype_pv8qi_v8qi_v8qi;
15098 tree void_ftype_pv4hi_v4hi_v4hi;
15099 tree void_ftype_pv2si_v2si_v2si;
15100 tree void_ftype_pv2sf_v2sf_v2sf;
15101 tree void_ftype_pdi_di_di;
15102 tree void_ftype_pv16qi_v16qi_v16qi;
15103 tree void_ftype_pv8hi_v8hi_v8hi;
15104 tree void_ftype_pv4si_v4si_v4si;
15105 tree void_ftype_pv4sf_v4sf_v4sf;
15106 tree void_ftype_pv2di_v2di_v2di;
15108 tree reinterp_ftype_dreg[5][5];
15109 tree reinterp_ftype_qreg[5][5];
15110 tree dreg_types[5], qreg_types[5];
15112 /* Create distinguished type nodes for NEON vector element types,
15113 and pointers to values of such types, so we can detect them later. */
15114 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15115 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15116 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15117 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15118 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15119 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15120 neon_float_type_node = make_node (REAL_TYPE);
15121 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15122 layout_type (neon_float_type_node);
15124 /* Define typedefs which exactly correspond to the modes we are basing vector
15125 types on. If you change these names you'll need to change
15126 the table used by arm_mangle_type too. */
15127 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15128 "__builtin_neon_qi");
15129 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15130 "__builtin_neon_hi");
15131 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15132 "__builtin_neon_si");
15133 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15134 "__builtin_neon_sf");
15135 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15136 "__builtin_neon_di");
15137 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15138 "__builtin_neon_poly8");
15139 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15140 "__builtin_neon_poly16");
15142 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15143 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15144 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15145 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15146 float_pointer_node = build_pointer_type (neon_float_type_node);
15148 /* Next create constant-qualified versions of the above types. */
15149 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15151 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15153 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15155 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15157 const_float_node = build_qualified_type (neon_float_type_node,
15160 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15161 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15162 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15163 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15164 const_float_pointer_node = build_pointer_type (const_float_node);
15166 /* Now create vector types based on our NEON element types. */
15167 /* 64-bit vectors. */
15169 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15171 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15173 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15175 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15176 /* 128-bit vectors. */
15178 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15180 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15182 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15184 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15186 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15188 /* Unsigned integer types for various mode sizes. */
15189 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15190 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15191 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15192 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15194 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15195 "__builtin_neon_uqi");
15196 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15197 "__builtin_neon_uhi");
15198 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15199 "__builtin_neon_usi");
15200 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15201 "__builtin_neon_udi");
15203 /* Opaque integer types for structures of vectors. */
15204 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15205 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15206 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15207 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15209 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15210 "__builtin_neon_ti");
15211 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15212 "__builtin_neon_ei");
15213 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15214 "__builtin_neon_oi");
15215 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15216 "__builtin_neon_ci");
15217 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15218 "__builtin_neon_xi");
15220 /* Pointers to vector types. */
15221 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15222 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15223 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15224 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15225 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15226 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15227 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15228 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15229 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15231 /* Operations which return results as pairs. */
15232 void_ftype_pv8qi_v8qi_v8qi =
15233 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15234 V8QI_type_node, NULL);
15235 void_ftype_pv4hi_v4hi_v4hi =
15236 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15237 V4HI_type_node, NULL);
15238 void_ftype_pv2si_v2si_v2si =
15239 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15240 V2SI_type_node, NULL);
15241 void_ftype_pv2sf_v2sf_v2sf =
15242 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15243 V2SF_type_node, NULL);
15244 void_ftype_pdi_di_di =
15245 build_function_type_list (void_type_node, intDI_pointer_node,
15246 neon_intDI_type_node, neon_intDI_type_node, NULL);
15247 void_ftype_pv16qi_v16qi_v16qi =
15248 build_function_type_list (void_type_node, V16QI_pointer_node,
15249 V16QI_type_node, V16QI_type_node, NULL);
15250 void_ftype_pv8hi_v8hi_v8hi =
15251 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15252 V8HI_type_node, NULL);
15253 void_ftype_pv4si_v4si_v4si =
15254 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15255 V4SI_type_node, NULL);
15256 void_ftype_pv4sf_v4sf_v4sf =
15257 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15258 V4SF_type_node, NULL);
15259 void_ftype_pv2di_v2di_v2di =
15260 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15261 V2DI_type_node, NULL);
15263 dreg_types[0] = V8QI_type_node;
15264 dreg_types[1] = V4HI_type_node;
15265 dreg_types[2] = V2SI_type_node;
15266 dreg_types[3] = V2SF_type_node;
15267 dreg_types[4] = neon_intDI_type_node;
15269 qreg_types[0] = V16QI_type_node;
15270 qreg_types[1] = V8HI_type_node;
15271 qreg_types[2] = V4SI_type_node;
15272 qreg_types[3] = V4SF_type_node;
15273 qreg_types[4] = V2DI_type_node;
15275 for (i = 0; i < 5; i++)
15278 for (j = 0; j < 5; j++)
15280 reinterp_ftype_dreg[i][j]
15281 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15282 reinterp_ftype_qreg[i][j]
15283 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15287 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15289 neon_builtin_datum *d = &neon_builtin_data[i];
15290 unsigned int j, codeidx = 0;
15292 d->base_fcode = fcode;
15294 for (j = 0; j < T_MAX; j++)
15296 const char* const modenames[] = {
15297 "v8qi", "v4hi", "v2si", "v2sf", "di",
15298 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15302 enum insn_code icode;
15303 int is_load = 0, is_store = 0;
15305 if ((d->bits & (1 << j)) == 0)
15308 icode = d->codes[codeidx++];
15313 case NEON_LOAD1LANE:
15314 case NEON_LOADSTRUCT:
15315 case NEON_LOADSTRUCTLANE:
15317 /* Fall through. */
15319 case NEON_STORE1LANE:
15320 case NEON_STORESTRUCT:
15321 case NEON_STORESTRUCTLANE:
15324 /* Fall through. */
15327 case NEON_LOGICBINOP:
15328 case NEON_SHIFTINSERT:
15335 case NEON_SHIFTIMM:
15336 case NEON_SHIFTACC:
15342 case NEON_LANEMULL:
15343 case NEON_LANEMULH:
15345 case NEON_SCALARMUL:
15346 case NEON_SCALARMULL:
15347 case NEON_SCALARMULH:
15348 case NEON_SCALARMAC:
15354 tree return_type = void_type_node, args = void_list_node;
15356 /* Build a function type directly from the insn_data for this
15357 builtin. The build_function_type() function takes care of
15358 removing duplicates for us. */
15359 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15363 if (is_load && k == 1)
15365 /* Neon load patterns always have the memory operand
15366 (a SImode pointer) in the operand 1 position. We
15367 want a const pointer to the element type in that
15369 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15375 eltype = const_intQI_pointer_node;
15380 eltype = const_intHI_pointer_node;
15385 eltype = const_intSI_pointer_node;
15390 eltype = const_float_pointer_node;
15395 eltype = const_intDI_pointer_node;
15398 default: gcc_unreachable ();
15401 else if (is_store && k == 0)
15403 /* Similarly, Neon store patterns use operand 0 as
15404 the memory location to store to (a SImode pointer).
15405 Use a pointer to the element type of the store in
15407 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15413 eltype = intQI_pointer_node;
15418 eltype = intHI_pointer_node;
15423 eltype = intSI_pointer_node;
15428 eltype = float_pointer_node;
15433 eltype = intDI_pointer_node;
15436 default: gcc_unreachable ();
15441 switch (insn_data[icode].operand[k].mode)
15443 case VOIDmode: eltype = void_type_node; break;
15445 case QImode: eltype = neon_intQI_type_node; break;
15446 case HImode: eltype = neon_intHI_type_node; break;
15447 case SImode: eltype = neon_intSI_type_node; break;
15448 case SFmode: eltype = neon_float_type_node; break;
15449 case DImode: eltype = neon_intDI_type_node; break;
15450 case TImode: eltype = intTI_type_node; break;
15451 case EImode: eltype = intEI_type_node; break;
15452 case OImode: eltype = intOI_type_node; break;
15453 case CImode: eltype = intCI_type_node; break;
15454 case XImode: eltype = intXI_type_node; break;
15455 /* 64-bit vectors. */
15456 case V8QImode: eltype = V8QI_type_node; break;
15457 case V4HImode: eltype = V4HI_type_node; break;
15458 case V2SImode: eltype = V2SI_type_node; break;
15459 case V2SFmode: eltype = V2SF_type_node; break;
15460 /* 128-bit vectors. */
15461 case V16QImode: eltype = V16QI_type_node; break;
15462 case V8HImode: eltype = V8HI_type_node; break;
15463 case V4SImode: eltype = V4SI_type_node; break;
15464 case V4SFmode: eltype = V4SF_type_node; break;
15465 case V2DImode: eltype = V2DI_type_node; break;
15466 default: gcc_unreachable ();
15470 if (k == 0 && !is_store)
15471 return_type = eltype;
15473 args = tree_cons (NULL_TREE, eltype, args);
15476 ftype = build_function_type (return_type, args);
15480 case NEON_RESULTPAIR:
15482 switch (insn_data[icode].operand[1].mode)
15484 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15485 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15486 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15487 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15488 case DImode: ftype = void_ftype_pdi_di_di; break;
15489 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15490 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15491 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15492 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15493 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15494 default: gcc_unreachable ();
15499 case NEON_REINTERP:
15501 /* We iterate over 5 doubleword types, then 5 quadword
15504 switch (insn_data[icode].operand[0].mode)
15506 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15507 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15508 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15509 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15510 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15511 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15512 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15513 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15514 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15515 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15516 default: gcc_unreachable ();
15522 gcc_unreachable ();
15525 gcc_assert (ftype != NULL);
15527 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15529 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15536 arm_init_builtins (void)
15538 arm_init_tls_builtins ();
15540 if (TARGET_REALLY_IWMMXT)
15541 arm_init_iwmmxt_builtins ();
15544 arm_init_neon_builtins ();
15547 /* Errors in the source file can cause expand_expr to return const0_rtx
15548 where we expect a vector. To avoid crashing, use one of the vector
15549 clear instructions. */
15552 safe_vector_operand (rtx x, enum machine_mode mode)
15554 if (x != const0_rtx)
15556 x = gen_reg_rtx (mode);
15558 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15559 : gen_rtx_SUBREG (DImode, x, 0)));
15563 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15566 arm_expand_binop_builtin (enum insn_code icode,
15567 tree exp, rtx target)
15570 tree arg0 = CALL_EXPR_ARG (exp, 0);
15571 tree arg1 = CALL_EXPR_ARG (exp, 1);
15572 rtx op0 = expand_normal (arg0);
15573 rtx op1 = expand_normal (arg1);
15574 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15575 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15576 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15578 if (VECTOR_MODE_P (mode0))
15579 op0 = safe_vector_operand (op0, mode0);
15580 if (VECTOR_MODE_P (mode1))
15581 op1 = safe_vector_operand (op1, mode1);
15584 || GET_MODE (target) != tmode
15585 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15586 target = gen_reg_rtx (tmode);
15588 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15590 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15591 op0 = copy_to_mode_reg (mode0, op0);
15592 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15593 op1 = copy_to_mode_reg (mode1, op1);
15595 pat = GEN_FCN (icode) (target, op0, op1);
15602 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15605 arm_expand_unop_builtin (enum insn_code icode,
15606 tree exp, rtx target, int do_load)
15609 tree arg0 = CALL_EXPR_ARG (exp, 0);
15610 rtx op0 = expand_normal (arg0);
15611 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15612 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15615 || GET_MODE (target) != tmode
15616 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15617 target = gen_reg_rtx (tmode);
15619 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15622 if (VECTOR_MODE_P (mode0))
15623 op0 = safe_vector_operand (op0, mode0);
15625 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15626 op0 = copy_to_mode_reg (mode0, op0);
15629 pat = GEN_FCN (icode) (target, op0);
15637 neon_builtin_compare (const void *a, const void *b)
15639 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
15640 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
15641 unsigned int soughtcode = key->base_fcode;
15643 if (soughtcode >= memb->base_fcode
15644 && soughtcode < memb->base_fcode + memb->num_vars)
15646 else if (soughtcode < memb->base_fcode)
15652 static enum insn_code
15653 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15655 neon_builtin_datum key, *found;
15658 key.base_fcode = fcode;
15659 found = (neon_builtin_datum *)
15660 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15661 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15662 gcc_assert (found);
15663 idx = fcode - (int) found->base_fcode;
15664 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15667 *itype = found->itype;
15669 return found->codes[idx];
15673 NEON_ARG_COPY_TO_REG,
15678 #define NEON_MAX_BUILTIN_ARGS 5
15680 /* Expand a Neon builtin. */
15682 arm_expand_neon_args (rtx target, int icode, int have_retval,
15687 tree arg[NEON_MAX_BUILTIN_ARGS];
15688 rtx op[NEON_MAX_BUILTIN_ARGS];
15689 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15690 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15695 || GET_MODE (target) != tmode
15696 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15697 target = gen_reg_rtx (tmode);
15699 va_start (ap, exp);
15703 builtin_arg thisarg = va_arg (ap, int);
15705 if (thisarg == NEON_ARG_STOP)
15709 arg[argc] = CALL_EXPR_ARG (exp, argc);
15710 op[argc] = expand_normal (arg[argc]);
15711 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15715 case NEON_ARG_COPY_TO_REG:
15716 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15717 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15718 (op[argc], mode[argc]))
15719 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15722 case NEON_ARG_CONSTANT:
15723 /* FIXME: This error message is somewhat unhelpful. */
15724 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15725 (op[argc], mode[argc]))
15726 error ("argument must be a constant");
15729 case NEON_ARG_STOP:
15730 gcc_unreachable ();
15743 pat = GEN_FCN (icode) (target, op[0]);
15747 pat = GEN_FCN (icode) (target, op[0], op[1]);
15751 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15755 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15759 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15763 gcc_unreachable ();
15769 pat = GEN_FCN (icode) (op[0]);
15773 pat = GEN_FCN (icode) (op[0], op[1]);
15777 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15781 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15785 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15789 gcc_unreachable ();
15800 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15801 constants defined per-instruction or per instruction-variant. Instead, the
15802 required info is looked up in the table neon_builtin_data. */
15804 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15807 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15814 return arm_expand_neon_args (target, icode, 1, exp,
15815 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15819 case NEON_SCALARMUL:
15820 case NEON_SCALARMULL:
15821 case NEON_SCALARMULH:
15822 case NEON_SHIFTINSERT:
15823 case NEON_LOGICBINOP:
15824 return arm_expand_neon_args (target, icode, 1, exp,
15825 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15829 return arm_expand_neon_args (target, icode, 1, exp,
15830 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15831 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15835 case NEON_SHIFTIMM:
15836 return arm_expand_neon_args (target, icode, 1, exp,
15837 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15841 return arm_expand_neon_args (target, icode, 1, exp,
15842 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15846 case NEON_REINTERP:
15847 return arm_expand_neon_args (target, icode, 1, exp,
15848 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15852 return arm_expand_neon_args (target, icode, 1, exp,
15853 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15855 case NEON_RESULTPAIR:
15856 return arm_expand_neon_args (target, icode, 0, exp,
15857 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15861 case NEON_LANEMULL:
15862 case NEON_LANEMULH:
15863 return arm_expand_neon_args (target, icode, 1, exp,
15864 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15865 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15868 return arm_expand_neon_args (target, icode, 1, exp,
15869 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15870 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15872 case NEON_SHIFTACC:
15873 return arm_expand_neon_args (target, icode, 1, exp,
15874 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15875 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15877 case NEON_SCALARMAC:
15878 return arm_expand_neon_args (target, icode, 1, exp,
15879 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15880 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15884 return arm_expand_neon_args (target, icode, 1, exp,
15885 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15889 case NEON_LOADSTRUCT:
15890 return arm_expand_neon_args (target, icode, 1, exp,
15891 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15893 case NEON_LOAD1LANE:
15894 case NEON_LOADSTRUCTLANE:
15895 return arm_expand_neon_args (target, icode, 1, exp,
15896 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15900 case NEON_STORESTRUCT:
15901 return arm_expand_neon_args (target, icode, 0, exp,
15902 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15904 case NEON_STORE1LANE:
15905 case NEON_STORESTRUCTLANE:
15906 return arm_expand_neon_args (target, icode, 0, exp,
15907 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15911 gcc_unreachable ();
15914 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15916 neon_reinterpret (rtx dest, rtx src)
15918 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15921 /* Emit code to place a Neon pair result in memory locations (with equal
15924 neon_emit_pair_result_insn (enum machine_mode mode,
15925 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15928 rtx mem = gen_rtx_MEM (mode, destaddr);
15929 rtx tmp1 = gen_reg_rtx (mode);
15930 rtx tmp2 = gen_reg_rtx (mode);
15932 emit_insn (intfn (tmp1, op1, tmp2, op2));
15934 emit_move_insn (mem, tmp1);
15935 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15936 emit_move_insn (mem, tmp2);
15939 /* Set up operands for a register copy from src to dest, taking care not to
15940 clobber registers in the process.
15941 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15942 be called with a large N, so that should be OK. */
15945 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15947 unsigned int copied = 0, opctr = 0;
15948 unsigned int done = (1 << count) - 1;
15951 while (copied != done)
15953 for (i = 0; i < count; i++)
15957 for (j = 0; good && j < count; j++)
15958 if (i != j && (copied & (1 << j)) == 0
15959 && reg_overlap_mentioned_p (src[j], dest[i]))
15964 operands[opctr++] = dest[i];
15965 operands[opctr++] = src[i];
15971 gcc_assert (opctr == count * 2);
15974 /* Expand an expression EXP that calls a built-in function,
15975 with result going to TARGET if that's convenient
15976 (and in mode MODE if that's convenient).
15977 SUBTARGET may be used as the target for computing one of EXP's operands.
15978 IGNORE is nonzero if the value is to be ignored. */
15981 arm_expand_builtin (tree exp,
15983 rtx subtarget ATTRIBUTE_UNUSED,
15984 enum machine_mode mode ATTRIBUTE_UNUSED,
15985 int ignore ATTRIBUTE_UNUSED)
15987 const struct builtin_description * d;
15988 enum insn_code icode;
15989 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15997 int fcode = DECL_FUNCTION_CODE (fndecl);
15999 enum machine_mode tmode;
16000 enum machine_mode mode0;
16001 enum machine_mode mode1;
16002 enum machine_mode mode2;
16004 if (fcode >= ARM_BUILTIN_NEON_BASE)
16005 return arm_expand_neon_builtin (fcode, exp, target);
16009 case ARM_BUILTIN_TEXTRMSB:
16010 case ARM_BUILTIN_TEXTRMUB:
16011 case ARM_BUILTIN_TEXTRMSH:
16012 case ARM_BUILTIN_TEXTRMUH:
16013 case ARM_BUILTIN_TEXTRMSW:
16014 case ARM_BUILTIN_TEXTRMUW:
16015 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16016 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16017 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16018 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16019 : CODE_FOR_iwmmxt_textrmw);
16021 arg0 = CALL_EXPR_ARG (exp, 0);
16022 arg1 = CALL_EXPR_ARG (exp, 1);
16023 op0 = expand_normal (arg0);
16024 op1 = expand_normal (arg1);
16025 tmode = insn_data[icode].operand[0].mode;
16026 mode0 = insn_data[icode].operand[1].mode;
16027 mode1 = insn_data[icode].operand[2].mode;
16029 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16030 op0 = copy_to_mode_reg (mode0, op0);
16031 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16033 /* @@@ better error message */
16034 error ("selector must be an immediate");
16035 return gen_reg_rtx (tmode);
16038 || GET_MODE (target) != tmode
16039 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16040 target = gen_reg_rtx (tmode);
16041 pat = GEN_FCN (icode) (target, op0, op1);
16047 case ARM_BUILTIN_TINSRB:
16048 case ARM_BUILTIN_TINSRH:
16049 case ARM_BUILTIN_TINSRW:
16050 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16051 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16052 : CODE_FOR_iwmmxt_tinsrw);
16053 arg0 = CALL_EXPR_ARG (exp, 0);
16054 arg1 = CALL_EXPR_ARG (exp, 1);
16055 arg2 = CALL_EXPR_ARG (exp, 2);
16056 op0 = expand_normal (arg0);
16057 op1 = expand_normal (arg1);
16058 op2 = expand_normal (arg2);
16059 tmode = insn_data[icode].operand[0].mode;
16060 mode0 = insn_data[icode].operand[1].mode;
16061 mode1 = insn_data[icode].operand[2].mode;
16062 mode2 = insn_data[icode].operand[3].mode;
16064 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16065 op0 = copy_to_mode_reg (mode0, op0);
16066 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16067 op1 = copy_to_mode_reg (mode1, op1);
16068 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16070 /* @@@ better error message */
16071 error ("selector must be an immediate");
16075 || GET_MODE (target) != tmode
16076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16077 target = gen_reg_rtx (tmode);
16078 pat = GEN_FCN (icode) (target, op0, op1, op2);
16084 case ARM_BUILTIN_SETWCX:
16085 arg0 = CALL_EXPR_ARG (exp, 0);
16086 arg1 = CALL_EXPR_ARG (exp, 1);
16087 op0 = force_reg (SImode, expand_normal (arg0));
16088 op1 = expand_normal (arg1);
16089 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16092 case ARM_BUILTIN_GETWCX:
16093 arg0 = CALL_EXPR_ARG (exp, 0);
16094 op0 = expand_normal (arg0);
16095 target = gen_reg_rtx (SImode);
16096 emit_insn (gen_iwmmxt_tmrc (target, op0));
16099 case ARM_BUILTIN_WSHUFH:
16100 icode = CODE_FOR_iwmmxt_wshufh;
16101 arg0 = CALL_EXPR_ARG (exp, 0);
16102 arg1 = CALL_EXPR_ARG (exp, 1);
16103 op0 = expand_normal (arg0);
16104 op1 = expand_normal (arg1);
16105 tmode = insn_data[icode].operand[0].mode;
16106 mode1 = insn_data[icode].operand[1].mode;
16107 mode2 = insn_data[icode].operand[2].mode;
16109 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16110 op0 = copy_to_mode_reg (mode1, op0);
16111 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16113 /* @@@ better error message */
16114 error ("mask must be an immediate");
16118 || GET_MODE (target) != tmode
16119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16120 target = gen_reg_rtx (tmode);
16121 pat = GEN_FCN (icode) (target, op0, op1);
16127 case ARM_BUILTIN_WSADB:
16128 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16129 case ARM_BUILTIN_WSADH:
16130 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16131 case ARM_BUILTIN_WSADBZ:
16132 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16133 case ARM_BUILTIN_WSADHZ:
16134 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16136 /* Several three-argument builtins. */
16137 case ARM_BUILTIN_WMACS:
16138 case ARM_BUILTIN_WMACU:
16139 case ARM_BUILTIN_WALIGN:
16140 case ARM_BUILTIN_TMIA:
16141 case ARM_BUILTIN_TMIAPH:
16142 case ARM_BUILTIN_TMIATT:
16143 case ARM_BUILTIN_TMIATB:
16144 case ARM_BUILTIN_TMIABT:
16145 case ARM_BUILTIN_TMIABB:
16146 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16147 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16148 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16149 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16150 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16151 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16152 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16153 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16154 : CODE_FOR_iwmmxt_walign);
16155 arg0 = CALL_EXPR_ARG (exp, 0);
16156 arg1 = CALL_EXPR_ARG (exp, 1);
16157 arg2 = CALL_EXPR_ARG (exp, 2);
16158 op0 = expand_normal (arg0);
16159 op1 = expand_normal (arg1);
16160 op2 = expand_normal (arg2);
16161 tmode = insn_data[icode].operand[0].mode;
16162 mode0 = insn_data[icode].operand[1].mode;
16163 mode1 = insn_data[icode].operand[2].mode;
16164 mode2 = insn_data[icode].operand[3].mode;
16166 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16167 op0 = copy_to_mode_reg (mode0, op0);
16168 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16169 op1 = copy_to_mode_reg (mode1, op1);
16170 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16171 op2 = copy_to_mode_reg (mode2, op2);
16173 || GET_MODE (target) != tmode
16174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16175 target = gen_reg_rtx (tmode);
16176 pat = GEN_FCN (icode) (target, op0, op1, op2);
16182 case ARM_BUILTIN_WZERO:
16183 target = gen_reg_rtx (DImode);
16184 emit_insn (gen_iwmmxt_clrdi (target));
16187 case ARM_BUILTIN_THREAD_POINTER:
16188 return arm_load_tp (target);
16194 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16195 if (d->code == (const enum arm_builtins) fcode)
16196 return arm_expand_binop_builtin (d->icode, exp, target);
16198 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16199 if (d->code == (const enum arm_builtins) fcode)
16200 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16202 /* @@@ Should really do something sensible here. */
16206 /* Return the number (counting from 0) of
16207 the least significant set bit in MASK. */
16210 number_of_first_bit_set (unsigned mask)
16215 (mask & (1 << bit)) == 0;
16222 /* Emit code to push or pop registers to or from the stack. F is the
16223 assembly file. MASK is the registers to push or pop. PUSH is
16224 nonzero if we should push, and zero if we should pop. For debugging
16225 output, if pushing, adjust CFA_OFFSET by the amount of space added
16226 to the stack. REAL_REGS should have the same number of bits set as
16227 MASK, and will be used instead (in the same order) to describe which
16228 registers were saved - this is used to mark the save slots when we
16229 push high registers after moving them to low registers. */
16231 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16232 unsigned long real_regs)
16235 int lo_mask = mask & 0xFF;
16236 int pushed_words = 0;
16240 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16242 /* Special case. Do not generate a POP PC statement here, do it in
16244 thumb_exit (f, -1);
16248 if (ARM_EABI_UNWIND_TABLES && push)
16250 fprintf (f, "\t.save\t{");
16251 for (regno = 0; regno < 15; regno++)
16253 if (real_regs & (1 << regno))
16255 if (real_regs & ((1 << regno) -1))
16257 asm_fprintf (f, "%r", regno);
16260 fprintf (f, "}\n");
16263 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16265 /* Look at the low registers first. */
16266 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16270 asm_fprintf (f, "%r", regno);
16272 if ((lo_mask & ~1) != 0)
16279 if (push && (mask & (1 << LR_REGNUM)))
16281 /* Catch pushing the LR. */
16285 asm_fprintf (f, "%r", LR_REGNUM);
16289 else if (!push && (mask & (1 << PC_REGNUM)))
16291 /* Catch popping the PC. */
16292 if (TARGET_INTERWORK || TARGET_BACKTRACE
16293 || crtl->calls_eh_return)
16295 /* The PC is never poped directly, instead
16296 it is popped into r3 and then BX is used. */
16297 fprintf (f, "}\n");
16299 thumb_exit (f, -1);
16308 asm_fprintf (f, "%r", PC_REGNUM);
16312 fprintf (f, "}\n");
16314 if (push && pushed_words && dwarf2out_do_frame ())
16316 char *l = dwarf2out_cfi_label ();
16317 int pushed_mask = real_regs;
16319 *cfa_offset += pushed_words * 4;
16320 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16323 pushed_mask = real_regs;
16324 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16326 if (pushed_mask & 1)
16327 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16332 /* Generate code to return from a thumb function.
16333 If 'reg_containing_return_addr' is -1, then the return address is
16334 actually on the stack, at the stack pointer. */
16336 thumb_exit (FILE *f, int reg_containing_return_addr)
16338 unsigned regs_available_for_popping;
16339 unsigned regs_to_pop;
16341 unsigned available;
16345 int restore_a4 = FALSE;
16347 /* Compute the registers we need to pop. */
16351 if (reg_containing_return_addr == -1)
16353 regs_to_pop |= 1 << LR_REGNUM;
16357 if (TARGET_BACKTRACE)
16359 /* Restore the (ARM) frame pointer and stack pointer. */
16360 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16364 /* If there is nothing to pop then just emit the BX instruction and
16366 if (pops_needed == 0)
16368 if (crtl->calls_eh_return)
16369 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16371 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16374 /* Otherwise if we are not supporting interworking and we have not created
16375 a backtrace structure and the function was not entered in ARM mode then
16376 just pop the return address straight into the PC. */
16377 else if (!TARGET_INTERWORK
16378 && !TARGET_BACKTRACE
16379 && !is_called_in_ARM_mode (current_function_decl)
16380 && !crtl->calls_eh_return)
16382 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16386 /* Find out how many of the (return) argument registers we can corrupt. */
16387 regs_available_for_popping = 0;
16389 /* If returning via __builtin_eh_return, the bottom three registers
16390 all contain information needed for the return. */
16391 if (crtl->calls_eh_return)
16395 /* If we can deduce the registers used from the function's
16396 return value. This is more reliable that examining
16397 df_regs_ever_live_p () because that will be set if the register is
16398 ever used in the function, not just if the register is used
16399 to hold a return value. */
16401 if (crtl->return_rtx != 0)
16402 mode = GET_MODE (crtl->return_rtx);
16404 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16406 size = GET_MODE_SIZE (mode);
16410 /* In a void function we can use any argument register.
16411 In a function that returns a structure on the stack
16412 we can use the second and third argument registers. */
16413 if (mode == VOIDmode)
16414 regs_available_for_popping =
16415 (1 << ARG_REGISTER (1))
16416 | (1 << ARG_REGISTER (2))
16417 | (1 << ARG_REGISTER (3));
16419 regs_available_for_popping =
16420 (1 << ARG_REGISTER (2))
16421 | (1 << ARG_REGISTER (3));
16423 else if (size <= 4)
16424 regs_available_for_popping =
16425 (1 << ARG_REGISTER (2))
16426 | (1 << ARG_REGISTER (3));
16427 else if (size <= 8)
16428 regs_available_for_popping =
16429 (1 << ARG_REGISTER (3));
16432 /* Match registers to be popped with registers into which we pop them. */
16433 for (available = regs_available_for_popping,
16434 required = regs_to_pop;
16435 required != 0 && available != 0;
16436 available &= ~(available & - available),
16437 required &= ~(required & - required))
16440 /* If we have any popping registers left over, remove them. */
16442 regs_available_for_popping &= ~available;
16444 /* Otherwise if we need another popping register we can use
16445 the fourth argument register. */
16446 else if (pops_needed)
16448 /* If we have not found any free argument registers and
16449 reg a4 contains the return address, we must move it. */
16450 if (regs_available_for_popping == 0
16451 && reg_containing_return_addr == LAST_ARG_REGNUM)
16453 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16454 reg_containing_return_addr = LR_REGNUM;
16456 else if (size > 12)
16458 /* Register a4 is being used to hold part of the return value,
16459 but we have dire need of a free, low register. */
16462 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16465 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16467 /* The fourth argument register is available. */
16468 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16474 /* Pop as many registers as we can. */
16475 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16476 regs_available_for_popping);
16478 /* Process the registers we popped. */
16479 if (reg_containing_return_addr == -1)
16481 /* The return address was popped into the lowest numbered register. */
16482 regs_to_pop &= ~(1 << LR_REGNUM);
16484 reg_containing_return_addr =
16485 number_of_first_bit_set (regs_available_for_popping);
16487 /* Remove this register for the mask of available registers, so that
16488 the return address will not be corrupted by further pops. */
16489 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16492 /* If we popped other registers then handle them here. */
16493 if (regs_available_for_popping)
16497 /* Work out which register currently contains the frame pointer. */
16498 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16500 /* Move it into the correct place. */
16501 asm_fprintf (f, "\tmov\t%r, %r\n",
16502 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16504 /* (Temporarily) remove it from the mask of popped registers. */
16505 regs_available_for_popping &= ~(1 << frame_pointer);
16506 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16508 if (regs_available_for_popping)
16512 /* We popped the stack pointer as well,
16513 find the register that contains it. */
16514 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16516 /* Move it into the stack register. */
16517 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16519 /* At this point we have popped all necessary registers, so
16520 do not worry about restoring regs_available_for_popping
16521 to its correct value:
16523 assert (pops_needed == 0)
16524 assert (regs_available_for_popping == (1 << frame_pointer))
16525 assert (regs_to_pop == (1 << STACK_POINTER)) */
16529 /* Since we have just move the popped value into the frame
16530 pointer, the popping register is available for reuse, and
16531 we know that we still have the stack pointer left to pop. */
16532 regs_available_for_popping |= (1 << frame_pointer);
16536 /* If we still have registers left on the stack, but we no longer have
16537 any registers into which we can pop them, then we must move the return
16538 address into the link register and make available the register that
16540 if (regs_available_for_popping == 0 && pops_needed > 0)
16542 regs_available_for_popping |= 1 << reg_containing_return_addr;
16544 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16545 reg_containing_return_addr);
16547 reg_containing_return_addr = LR_REGNUM;
16550 /* If we have registers left on the stack then pop some more.
16551 We know that at most we will want to pop FP and SP. */
16552 if (pops_needed > 0)
16557 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16558 regs_available_for_popping);
16560 /* We have popped either FP or SP.
16561 Move whichever one it is into the correct register. */
16562 popped_into = number_of_first_bit_set (regs_available_for_popping);
16563 move_to = number_of_first_bit_set (regs_to_pop);
16565 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16567 regs_to_pop &= ~(1 << move_to);
16572 /* If we still have not popped everything then we must have only
16573 had one register available to us and we are now popping the SP. */
16574 if (pops_needed > 0)
16578 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16579 regs_available_for_popping);
16581 popped_into = number_of_first_bit_set (regs_available_for_popping);
16583 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16585 assert (regs_to_pop == (1 << STACK_POINTER))
16586 assert (pops_needed == 1)
16590 /* If necessary restore the a4 register. */
16593 if (reg_containing_return_addr != LR_REGNUM)
16595 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16596 reg_containing_return_addr = LR_REGNUM;
16599 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16602 if (crtl->calls_eh_return)
16603 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16605 /* Return to caller. */
16606 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16611 thumb1_final_prescan_insn (rtx insn)
16613 if (flag_print_asm_name)
16614 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16615 INSN_ADDRESSES (INSN_UID (insn)));
16619 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16621 unsigned HOST_WIDE_INT mask = 0xff;
16624 if (val == 0) /* XXX */
16627 for (i = 0; i < 25; i++)
16628 if ((val & (mask << i)) == val)
16634 /* Returns nonzero if the current function contains,
16635 or might contain a far jump. */
16637 thumb_far_jump_used_p (void)
16641 /* This test is only important for leaf functions. */
16642 /* assert (!leaf_function_p ()); */
16644 /* If we have already decided that far jumps may be used,
16645 do not bother checking again, and always return true even if
16646 it turns out that they are not being used. Once we have made
16647 the decision that far jumps are present (and that hence the link
16648 register will be pushed onto the stack) we cannot go back on it. */
16649 if (cfun->machine->far_jump_used)
16652 /* If this function is not being called from the prologue/epilogue
16653 generation code then it must be being called from the
16654 INITIAL_ELIMINATION_OFFSET macro. */
16655 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16657 /* In this case we know that we are being asked about the elimination
16658 of the arg pointer register. If that register is not being used,
16659 then there are no arguments on the stack, and we do not have to
16660 worry that a far jump might force the prologue to push the link
16661 register, changing the stack offsets. In this case we can just
16662 return false, since the presence of far jumps in the function will
16663 not affect stack offsets.
16665 If the arg pointer is live (or if it was live, but has now been
16666 eliminated and so set to dead) then we do have to test to see if
16667 the function might contain a far jump. This test can lead to some
16668 false negatives, since before reload is completed, then length of
16669 branch instructions is not known, so gcc defaults to returning their
16670 longest length, which in turn sets the far jump attribute to true.
16672 A false negative will not result in bad code being generated, but it
16673 will result in a needless push and pop of the link register. We
16674 hope that this does not occur too often.
16676 If we need doubleword stack alignment this could affect the other
16677 elimination offsets so we can't risk getting it wrong. */
16678 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16679 cfun->machine->arg_pointer_live = 1;
16680 else if (!cfun->machine->arg_pointer_live)
16684 /* Check to see if the function contains a branch
16685 insn with the far jump attribute set. */
16686 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16688 if (GET_CODE (insn) == JUMP_INSN
16689 /* Ignore tablejump patterns. */
16690 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16691 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16692 && get_attr_far_jump (insn) == FAR_JUMP_YES
16695 /* Record the fact that we have decided that
16696 the function does use far jumps. */
16697 cfun->machine->far_jump_used = 1;
16705 /* Return nonzero if FUNC must be entered in ARM mode. */
16707 is_called_in_ARM_mode (tree func)
16709 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16711 /* Ignore the problem about functions whose address is taken. */
16712 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16716 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16722 /* The bits which aren't usefully expanded as rtl. */
16724 thumb_unexpanded_epilogue (void)
16726 arm_stack_offsets *offsets;
16728 unsigned long live_regs_mask = 0;
16729 int high_regs_pushed = 0;
16730 int had_to_push_lr;
16733 if (return_used_this_function)
16736 if (IS_NAKED (arm_current_func_type ()))
16739 offsets = arm_get_frame_offsets ();
16740 live_regs_mask = offsets->saved_regs_mask;
16741 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16743 /* If we can deduce the registers used from the function's return value.
16744 This is more reliable that examining df_regs_ever_live_p () because that
16745 will be set if the register is ever used in the function, not just if
16746 the register is used to hold a return value. */
16747 size = arm_size_return_regs ();
16749 /* The prolog may have pushed some high registers to use as
16750 work registers. e.g. the testsuite file:
16751 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16752 compiles to produce:
16753 push {r4, r5, r6, r7, lr}
16757 as part of the prolog. We have to undo that pushing here. */
16759 if (high_regs_pushed)
16761 unsigned long mask = live_regs_mask & 0xff;
16764 /* The available low registers depend on the size of the value we are
16772 /* Oh dear! We have no low registers into which we can pop
16775 ("no low registers available for popping high registers");
16777 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16778 if (live_regs_mask & (1 << next_hi_reg))
16781 while (high_regs_pushed)
16783 /* Find lo register(s) into which the high register(s) can
16785 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16787 if (mask & (1 << regno))
16788 high_regs_pushed--;
16789 if (high_regs_pushed == 0)
16793 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16795 /* Pop the values into the low register(s). */
16796 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16798 /* Move the value(s) into the high registers. */
16799 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16801 if (mask & (1 << regno))
16803 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16806 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16807 if (live_regs_mask & (1 << next_hi_reg))
16812 live_regs_mask &= ~0x0f00;
16815 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16816 live_regs_mask &= 0xff;
16818 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
16820 /* Pop the return address into the PC. */
16821 if (had_to_push_lr)
16822 live_regs_mask |= 1 << PC_REGNUM;
16824 /* Either no argument registers were pushed or a backtrace
16825 structure was created which includes an adjusted stack
16826 pointer, so just pop everything. */
16827 if (live_regs_mask)
16828 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16831 /* We have either just popped the return address into the
16832 PC or it is was kept in LR for the entire function. */
16833 if (!had_to_push_lr)
16834 thumb_exit (asm_out_file, LR_REGNUM);
16838 /* Pop everything but the return address. */
16839 if (live_regs_mask)
16840 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16843 if (had_to_push_lr)
16847 /* We have no free low regs, so save one. */
16848 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16852 /* Get the return address into a temporary register. */
16853 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16854 1 << LAST_ARG_REGNUM);
16858 /* Move the return address to lr. */
16859 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16861 /* Restore the low register. */
16862 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16867 regno = LAST_ARG_REGNUM;
16872 /* Remove the argument registers that were pushed onto the stack. */
16873 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16874 SP_REGNUM, SP_REGNUM,
16875 crtl->args.pretend_args_size);
16877 thumb_exit (asm_out_file, regno);
16883 /* Functions to save and restore machine-specific function data. */
16884 static struct machine_function *
16885 arm_init_machine_status (void)
16887 struct machine_function *machine;
16888 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16890 #if ARM_FT_UNKNOWN != 0
16891 machine->func_type = ARM_FT_UNKNOWN;
16896 /* Return an RTX indicating where the return address to the
16897 calling function can be found. */
16899 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16904 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16907 /* Do anything needed before RTL is emitted for each function. */
16909 arm_init_expanders (void)
16911 /* Arrange to initialize and mark the machine per-function status. */
16912 init_machine_status = arm_init_machine_status;
16914 /* This is to stop the combine pass optimizing away the alignment
16915 adjustment of va_arg. */
16916 /* ??? It is claimed that this should not be necessary. */
16918 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16922 /* Like arm_compute_initial_elimination offset. Simpler because there
16923 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16924 to point at the base of the local variables after static stack
16925 space for a function has been allocated. */
16928 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16930 arm_stack_offsets *offsets;
16932 offsets = arm_get_frame_offsets ();
16936 case ARG_POINTER_REGNUM:
16939 case STACK_POINTER_REGNUM:
16940 return offsets->outgoing_args - offsets->saved_args;
16942 case FRAME_POINTER_REGNUM:
16943 return offsets->soft_frame - offsets->saved_args;
16945 case ARM_HARD_FRAME_POINTER_REGNUM:
16946 return offsets->saved_regs - offsets->saved_args;
16948 case THUMB_HARD_FRAME_POINTER_REGNUM:
16949 return offsets->locals_base - offsets->saved_args;
16952 gcc_unreachable ();
16956 case FRAME_POINTER_REGNUM:
16959 case STACK_POINTER_REGNUM:
16960 return offsets->outgoing_args - offsets->soft_frame;
16962 case ARM_HARD_FRAME_POINTER_REGNUM:
16963 return offsets->saved_regs - offsets->soft_frame;
16965 case THUMB_HARD_FRAME_POINTER_REGNUM:
16966 return offsets->locals_base - offsets->soft_frame;
16969 gcc_unreachable ();
16974 gcc_unreachable ();
16978 /* Generate the rest of a function's prologue. */
16980 thumb1_expand_prologue (void)
16984 HOST_WIDE_INT amount;
16985 arm_stack_offsets *offsets;
16986 unsigned long func_type;
16988 unsigned long live_regs_mask;
16990 func_type = arm_current_func_type ();
16992 /* Naked functions don't have prologues. */
16993 if (IS_NAKED (func_type))
16996 if (IS_INTERRUPT (func_type))
16998 error ("interrupt Service Routines cannot be coded in Thumb mode");
17002 offsets = arm_get_frame_offsets ();
17003 live_regs_mask = offsets->saved_regs_mask;
17004 /* Load the pic register before setting the frame pointer,
17005 so we can use r7 as a temporary work register. */
17006 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17007 arm_load_pic_register (live_regs_mask);
17009 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17010 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17011 stack_pointer_rtx);
17013 amount = offsets->outgoing_args - offsets->saved_regs;
17018 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17019 GEN_INT (- amount)));
17020 RTX_FRAME_RELATED_P (insn) = 1;
17026 /* The stack decrement is too big for an immediate value in a single
17027 insn. In theory we could issue multiple subtracts, but after
17028 three of them it becomes more space efficient to place the full
17029 value in the constant pool and load into a register. (Also the
17030 ARM debugger really likes to see only one stack decrement per
17031 function). So instead we look for a scratch register into which
17032 we can load the decrement, and then we subtract this from the
17033 stack pointer. Unfortunately on the thumb the only available
17034 scratch registers are the argument registers, and we cannot use
17035 these as they may hold arguments to the function. Instead we
17036 attempt to locate a call preserved register which is used by this
17037 function. If we can find one, then we know that it will have
17038 been pushed at the start of the prologue and so we can corrupt
17040 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17041 if (live_regs_mask & (1 << regno))
17044 gcc_assert(regno <= LAST_LO_REGNUM);
17046 reg = gen_rtx_REG (SImode, regno);
17048 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17050 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17051 stack_pointer_rtx, reg));
17052 RTX_FRAME_RELATED_P (insn) = 1;
17053 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17054 plus_constant (stack_pointer_rtx,
17056 RTX_FRAME_RELATED_P (dwarf) = 1;
17058 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
17063 if (frame_pointer_needed)
17064 thumb_set_frame_pointer (offsets);
17066 /* If we are profiling, make sure no instructions are scheduled before
17067 the call to mcount. Similarly if the user has requested no
17068 scheduling in the prolog. Similarly if we want non-call exceptions
17069 using the EABI unwinder, to prevent faulting instructions from being
17070 swapped with a stack adjustment. */
17071 if (crtl->profile || !TARGET_SCHED_PROLOG
17072 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17073 emit_insn (gen_blockage ());
17075 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17076 if (live_regs_mask & 0xff)
17077 cfun->machine->lr_save_eliminated = 0;
17082 thumb1_expand_epilogue (void)
17084 HOST_WIDE_INT amount;
17085 arm_stack_offsets *offsets;
17088 /* Naked functions don't have prologues. */
17089 if (IS_NAKED (arm_current_func_type ()))
17092 offsets = arm_get_frame_offsets ();
17093 amount = offsets->outgoing_args - offsets->saved_regs;
17095 if (frame_pointer_needed)
17097 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17098 amount = offsets->locals_base - offsets->saved_regs;
17101 gcc_assert (amount >= 0);
17105 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17106 GEN_INT (amount)));
17109 /* r3 is always free in the epilogue. */
17110 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17112 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17113 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17117 /* Emit a USE (stack_pointer_rtx), so that
17118 the stack adjustment will not be deleted. */
17119 emit_insn (gen_prologue_use (stack_pointer_rtx));
17121 if (crtl->profile || !TARGET_SCHED_PROLOG)
17122 emit_insn (gen_blockage ());
17124 /* Emit a clobber for each insn that will be restored in the epilogue,
17125 so that flow2 will get register lifetimes correct. */
17126 for (regno = 0; regno < 13; regno++)
17127 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17128 emit_clobber (gen_rtx_REG (SImode, regno));
17130 if (! df_regs_ever_live_p (LR_REGNUM))
17131 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17135 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17137 arm_stack_offsets *offsets;
17138 unsigned long live_regs_mask = 0;
17139 unsigned long l_mask;
17140 unsigned high_regs_pushed = 0;
17141 int cfa_offset = 0;
17144 if (IS_NAKED (arm_current_func_type ()))
17147 if (is_called_in_ARM_mode (current_function_decl))
17151 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17152 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17154 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17156 /* Generate code sequence to switch us into Thumb mode. */
17157 /* The .code 32 directive has already been emitted by
17158 ASM_DECLARE_FUNCTION_NAME. */
17159 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17160 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17162 /* Generate a label, so that the debugger will notice the
17163 change in instruction sets. This label is also used by
17164 the assembler to bypass the ARM code when this function
17165 is called from a Thumb encoded function elsewhere in the
17166 same file. Hence the definition of STUB_NAME here must
17167 agree with the definition in gas/config/tc-arm.c. */
17169 #define STUB_NAME ".real_start_of"
17171 fprintf (f, "\t.code\t16\n");
17173 if (arm_dllexport_name_p (name))
17174 name = arm_strip_name_encoding (name);
17176 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17177 fprintf (f, "\t.thumb_func\n");
17178 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17181 if (crtl->args.pretend_args_size)
17183 /* Output unwind directive for the stack adjustment. */
17184 if (ARM_EABI_UNWIND_TABLES)
17185 fprintf (f, "\t.pad #%d\n",
17186 crtl->args.pretend_args_size);
17188 if (cfun->machine->uses_anonymous_args)
17192 fprintf (f, "\tpush\t{");
17194 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17196 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17197 regno <= LAST_ARG_REGNUM;
17199 asm_fprintf (f, "%r%s", regno,
17200 regno == LAST_ARG_REGNUM ? "" : ", ");
17202 fprintf (f, "}\n");
17205 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17206 SP_REGNUM, SP_REGNUM,
17207 crtl->args.pretend_args_size);
17209 /* We don't need to record the stores for unwinding (would it
17210 help the debugger any if we did?), but record the change in
17211 the stack pointer. */
17212 if (dwarf2out_do_frame ())
17214 char *l = dwarf2out_cfi_label ();
17216 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17217 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17221 /* Get the registers we are going to push. */
17222 offsets = arm_get_frame_offsets ();
17223 live_regs_mask = offsets->saved_regs_mask;
17224 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17225 l_mask = live_regs_mask & 0x40ff;
17226 /* Then count how many other high registers will need to be pushed. */
17227 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17229 if (TARGET_BACKTRACE)
17232 unsigned work_register;
17234 /* We have been asked to create a stack backtrace structure.
17235 The code looks like this:
17239 0 sub SP, #16 Reserve space for 4 registers.
17240 2 push {R7} Push low registers.
17241 4 add R7, SP, #20 Get the stack pointer before the push.
17242 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17243 8 mov R7, PC Get hold of the start of this code plus 12.
17244 10 str R7, [SP, #16] Store it.
17245 12 mov R7, FP Get hold of the current frame pointer.
17246 14 str R7, [SP, #4] Store it.
17247 16 mov R7, LR Get hold of the current return address.
17248 18 str R7, [SP, #12] Store it.
17249 20 add R7, SP, #16 Point at the start of the backtrace structure.
17250 22 mov FP, R7 Put this value into the frame pointer. */
17252 work_register = thumb_find_work_register (live_regs_mask);
17254 if (ARM_EABI_UNWIND_TABLES)
17255 asm_fprintf (f, "\t.pad #16\n");
17258 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17259 SP_REGNUM, SP_REGNUM);
17261 if (dwarf2out_do_frame ())
17263 char *l = dwarf2out_cfi_label ();
17265 cfa_offset = cfa_offset + 16;
17266 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17271 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17272 offset = bit_count (l_mask) * UNITS_PER_WORD;
17277 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17278 offset + 16 + crtl->args.pretend_args_size);
17280 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17283 /* Make sure that the instruction fetching the PC is in the right place
17284 to calculate "start of backtrace creation code + 12". */
17287 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17288 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17290 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17291 ARM_HARD_FRAME_POINTER_REGNUM);
17292 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17297 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17298 ARM_HARD_FRAME_POINTER_REGNUM);
17299 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17301 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17302 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17306 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17307 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17309 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17311 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17312 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17314 /* Optimization: If we are not pushing any low registers but we are going
17315 to push some high registers then delay our first push. This will just
17316 be a push of LR and we can combine it with the push of the first high
17318 else if ((l_mask & 0xff) != 0
17319 || (high_regs_pushed == 0 && l_mask))
17320 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17322 if (high_regs_pushed)
17324 unsigned pushable_regs;
17325 unsigned next_hi_reg;
17327 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17328 if (live_regs_mask & (1 << next_hi_reg))
17331 pushable_regs = l_mask & 0xff;
17333 if (pushable_regs == 0)
17334 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17336 while (high_regs_pushed > 0)
17338 unsigned long real_regs_mask = 0;
17340 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17342 if (pushable_regs & (1 << regno))
17344 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17346 high_regs_pushed --;
17347 real_regs_mask |= (1 << next_hi_reg);
17349 if (high_regs_pushed)
17351 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17353 if (live_regs_mask & (1 << next_hi_reg))
17358 pushable_regs &= ~((1 << regno) - 1);
17364 /* If we had to find a work register and we have not yet
17365 saved the LR then add it to the list of regs to push. */
17366 if (l_mask == (1 << LR_REGNUM))
17368 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17370 real_regs_mask | (1 << LR_REGNUM));
17374 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17379 /* Handle the case of a double word load into a low register from
17380 a computed memory address. The computed address may involve a
17381 register which is overwritten by the load. */
17383 thumb_load_double_from_address (rtx *operands)
17391 gcc_assert (GET_CODE (operands[0]) == REG);
17392 gcc_assert (GET_CODE (operands[1]) == MEM);
17394 /* Get the memory address. */
17395 addr = XEXP (operands[1], 0);
17397 /* Work out how the memory address is computed. */
17398 switch (GET_CODE (addr))
17401 operands[2] = adjust_address (operands[1], SImode, 4);
17403 if (REGNO (operands[0]) == REGNO (addr))
17405 output_asm_insn ("ldr\t%H0, %2", operands);
17406 output_asm_insn ("ldr\t%0, %1", operands);
17410 output_asm_insn ("ldr\t%0, %1", operands);
17411 output_asm_insn ("ldr\t%H0, %2", operands);
17416 /* Compute <address> + 4 for the high order load. */
17417 operands[2] = adjust_address (operands[1], SImode, 4);
17419 output_asm_insn ("ldr\t%0, %1", operands);
17420 output_asm_insn ("ldr\t%H0, %2", operands);
17424 arg1 = XEXP (addr, 0);
17425 arg2 = XEXP (addr, 1);
17427 if (CONSTANT_P (arg1))
17428 base = arg2, offset = arg1;
17430 base = arg1, offset = arg2;
17432 gcc_assert (GET_CODE (base) == REG);
17434 /* Catch the case of <address> = <reg> + <reg> */
17435 if (GET_CODE (offset) == REG)
17437 int reg_offset = REGNO (offset);
17438 int reg_base = REGNO (base);
17439 int reg_dest = REGNO (operands[0]);
17441 /* Add the base and offset registers together into the
17442 higher destination register. */
17443 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17444 reg_dest + 1, reg_base, reg_offset);
17446 /* Load the lower destination register from the address in
17447 the higher destination register. */
17448 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17449 reg_dest, reg_dest + 1);
17451 /* Load the higher destination register from its own address
17453 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17454 reg_dest + 1, reg_dest + 1);
17458 /* Compute <address> + 4 for the high order load. */
17459 operands[2] = adjust_address (operands[1], SImode, 4);
17461 /* If the computed address is held in the low order register
17462 then load the high order register first, otherwise always
17463 load the low order register first. */
17464 if (REGNO (operands[0]) == REGNO (base))
17466 output_asm_insn ("ldr\t%H0, %2", operands);
17467 output_asm_insn ("ldr\t%0, %1", operands);
17471 output_asm_insn ("ldr\t%0, %1", operands);
17472 output_asm_insn ("ldr\t%H0, %2", operands);
17478 /* With no registers to worry about we can just load the value
17480 operands[2] = adjust_address (operands[1], SImode, 4);
17482 output_asm_insn ("ldr\t%H0, %2", operands);
17483 output_asm_insn ("ldr\t%0, %1", operands);
17487 gcc_unreachable ();
17494 thumb_output_move_mem_multiple (int n, rtx *operands)
17501 if (REGNO (operands[4]) > REGNO (operands[5]))
17504 operands[4] = operands[5];
17507 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17508 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17512 if (REGNO (operands[4]) > REGNO (operands[5]))
17515 operands[4] = operands[5];
17518 if (REGNO (operands[5]) > REGNO (operands[6]))
17521 operands[5] = operands[6];
17524 if (REGNO (operands[4]) > REGNO (operands[5]))
17527 operands[4] = operands[5];
17531 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17532 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17536 gcc_unreachable ();
17542 /* Output a call-via instruction for thumb state. */
17544 thumb_call_via_reg (rtx reg)
17546 int regno = REGNO (reg);
17549 gcc_assert (regno < LR_REGNUM);
17551 /* If we are in the normal text section we can use a single instance
17552 per compilation unit. If we are doing function sections, then we need
17553 an entry per section, since we can't rely on reachability. */
17554 if (in_section == text_section)
17556 thumb_call_reg_needed = 1;
17558 if (thumb_call_via_label[regno] == NULL)
17559 thumb_call_via_label[regno] = gen_label_rtx ();
17560 labelp = thumb_call_via_label + regno;
17564 if (cfun->machine->call_via[regno] == NULL)
17565 cfun->machine->call_via[regno] = gen_label_rtx ();
17566 labelp = cfun->machine->call_via + regno;
17569 output_asm_insn ("bl\t%a0", labelp);
17573 /* Routines for generating rtl. */
17575 thumb_expand_movmemqi (rtx *operands)
17577 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17578 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17579 HOST_WIDE_INT len = INTVAL (operands[2]);
17580 HOST_WIDE_INT offset = 0;
17584 emit_insn (gen_movmem12b (out, in, out, in));
17590 emit_insn (gen_movmem8b (out, in, out, in));
17596 rtx reg = gen_reg_rtx (SImode);
17597 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17598 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17605 rtx reg = gen_reg_rtx (HImode);
17606 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17607 plus_constant (in, offset))));
17608 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17616 rtx reg = gen_reg_rtx (QImode);
17617 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17618 plus_constant (in, offset))));
17619 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17625 thumb_reload_out_hi (rtx *operands)
17627 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17630 /* Handle reading a half-word from memory during reload. */
17632 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17634 gcc_unreachable ();
17637 /* Return the length of a function name prefix
17638 that starts with the character 'c'. */
17640 arm_get_strip_length (int c)
17644 ARM_NAME_ENCODING_LENGTHS
17649 /* Return a pointer to a function's name with any
17650 and all prefix encodings stripped from it. */
17652 arm_strip_name_encoding (const char *name)
17656 while ((skip = arm_get_strip_length (* name)))
17662 /* If there is a '*' anywhere in the name's prefix, then
17663 emit the stripped name verbatim, otherwise prepend an
17664 underscore if leading underscores are being used. */
17666 arm_asm_output_labelref (FILE *stream, const char *name)
17671 while ((skip = arm_get_strip_length (* name)))
17673 verbatim |= (*name == '*');
17678 fputs (name, stream);
17680 asm_fprintf (stream, "%U%s", name);
17684 arm_file_start (void)
17688 if (TARGET_UNIFIED_ASM)
17689 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17693 const char *fpu_name;
17694 if (arm_select[0].string)
17695 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17696 else if (arm_select[1].string)
17697 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17699 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17700 all_cores[arm_default_cpu].name);
17702 if (TARGET_SOFT_FLOAT)
17705 fpu_name = "softvfp";
17707 fpu_name = "softfpa";
17711 int set_float_abi_attributes = 0;
17712 switch (arm_fpu_arch)
17717 case FPUTYPE_FPA_EMU2:
17720 case FPUTYPE_FPA_EMU3:
17723 case FPUTYPE_MAVERICK:
17724 fpu_name = "maverick";
17728 set_float_abi_attributes = 1;
17730 case FPUTYPE_VFP3D16:
17731 fpu_name = "vfpv3-d16";
17732 set_float_abi_attributes = 1;
17735 fpu_name = "vfpv3";
17736 set_float_abi_attributes = 1;
17740 set_float_abi_attributes = 1;
17745 if (set_float_abi_attributes)
17747 if (TARGET_HARD_FLOAT)
17748 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17749 if (TARGET_HARD_FLOAT_ABI)
17750 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17753 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17755 /* Some of these attributes only apply when the corresponding features
17756 are used. However we don't have any easy way of figuring this out.
17757 Conservatively record the setting that would have been used. */
17759 /* Tag_ABI_FP_rounding. */
17760 if (flag_rounding_math)
17761 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17762 if (!flag_unsafe_math_optimizations)
17764 /* Tag_ABI_FP_denomal. */
17765 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17766 /* Tag_ABI_FP_exceptions. */
17767 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17769 /* Tag_ABI_FP_user_exceptions. */
17770 if (flag_signaling_nans)
17771 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17772 /* Tag_ABI_FP_number_model. */
17773 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17774 flag_finite_math_only ? 1 : 3);
17776 /* Tag_ABI_align8_needed. */
17777 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17778 /* Tag_ABI_align8_preserved. */
17779 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17780 /* Tag_ABI_enum_size. */
17781 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17782 flag_short_enums ? 1 : 2);
17784 /* Tag_ABI_optimization_goals. */
17787 else if (optimize >= 2)
17793 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17795 if (arm_lang_output_object_attributes_hook)
17796 arm_lang_output_object_attributes_hook();
17798 default_file_start();
17802 arm_file_end (void)
17806 if (NEED_INDICATE_EXEC_STACK)
17807 /* Add .note.GNU-stack. */
17808 file_end_indicate_exec_stack ();
17810 if (! thumb_call_reg_needed)
17813 switch_to_section (text_section);
17814 asm_fprintf (asm_out_file, "\t.code 16\n");
17815 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17817 for (regno = 0; regno < LR_REGNUM; regno++)
17819 rtx label = thumb_call_via_label[regno];
17823 targetm.asm_out.internal_label (asm_out_file, "L",
17824 CODE_LABEL_NUMBER (label));
17825 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17831 /* Symbols in the text segment can be accessed without indirecting via the
17832 constant pool; it may take an extra binary operation, but this is still
17833 faster than indirecting via memory. Don't do this when not optimizing,
17834 since we won't be calculating al of the offsets necessary to do this
17838 arm_encode_section_info (tree decl, rtx rtl, int first)
17840 if (optimize > 0 && TREE_CONSTANT (decl))
17841 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17843 default_encode_section_info (decl, rtl, first);
17845 #endif /* !ARM_PE */
17848 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17850 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17851 && !strcmp (prefix, "L"))
17853 arm_ccfsm_state = 0;
17854 arm_target_insn = NULL;
17856 default_internal_label (stream, prefix, labelno);
17859 /* Output code to add DELTA to the first argument, and then jump
17860 to FUNCTION. Used for C++ multiple inheritance. */
17862 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17863 HOST_WIDE_INT delta,
17864 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17867 static int thunk_label = 0;
17870 int mi_delta = delta;
17871 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17873 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17876 mi_delta = - mi_delta;
17880 int labelno = thunk_label++;
17881 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17882 /* Thunks are entered in arm mode when avaiable. */
17883 if (TARGET_THUMB1_ONLY)
17885 /* push r3 so we can use it as a temporary. */
17886 /* TODO: Omit this save if r3 is not used. */
17887 fputs ("\tpush {r3}\n", file);
17888 fputs ("\tldr\tr3, ", file);
17892 fputs ("\tldr\tr12, ", file);
17894 assemble_name (file, label);
17895 fputc ('\n', file);
17898 /* If we are generating PIC, the ldr instruction below loads
17899 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17900 the address of the add + 8, so we have:
17902 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17905 Note that we have "+ 1" because some versions of GNU ld
17906 don't set the low bit of the result for R_ARM_REL32
17907 relocations against thumb function symbols.
17908 On ARMv6M this is +4, not +8. */
17909 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17910 assemble_name (file, labelpc);
17911 fputs (":\n", file);
17912 if (TARGET_THUMB1_ONLY)
17914 /* This is 2 insns after the start of the thunk, so we know it
17915 is 4-byte aligned. */
17916 fputs ("\tadd\tr3, pc, r3\n", file);
17917 fputs ("\tmov r12, r3\n", file);
17920 fputs ("\tadd\tr12, pc, r12\n", file);
17922 else if (TARGET_THUMB1_ONLY)
17923 fputs ("\tmov r12, r3\n", file);
17925 if (TARGET_THUMB1_ONLY)
17927 if (mi_delta > 255)
17929 fputs ("\tldr\tr3, ", file);
17930 assemble_name (file, label);
17931 fputs ("+4\n", file);
17932 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
17933 mi_op, this_regno, this_regno);
17935 else if (mi_delta != 0)
17937 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17938 mi_op, this_regno, this_regno,
17944 /* TODO: Use movw/movt for large constants when available. */
17945 while (mi_delta != 0)
17947 if ((mi_delta & (3 << shift)) == 0)
17951 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17952 mi_op, this_regno, this_regno,
17953 mi_delta & (0xff << shift));
17954 mi_delta &= ~(0xff << shift);
17961 if (TARGET_THUMB1_ONLY)
17962 fputs ("\tpop\t{r3}\n", file);
17964 fprintf (file, "\tbx\tr12\n");
17965 ASM_OUTPUT_ALIGN (file, 2);
17966 assemble_name (file, label);
17967 fputs (":\n", file);
17970 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17971 rtx tem = XEXP (DECL_RTL (function), 0);
17972 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17973 tem = gen_rtx_MINUS (GET_MODE (tem),
17975 gen_rtx_SYMBOL_REF (Pmode,
17976 ggc_strdup (labelpc)));
17977 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17980 /* Output ".word .LTHUNKn". */
17981 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17983 if (TARGET_THUMB1_ONLY && mi_delta > 255)
17984 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
17988 fputs ("\tb\t", file);
17989 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17990 if (NEED_PLT_RELOC)
17991 fputs ("(PLT)", file);
17992 fputc ('\n', file);
17997 arm_emit_vector_const (FILE *file, rtx x)
18000 const char * pattern;
18002 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18004 switch (GET_MODE (x))
18006 case V2SImode: pattern = "%08x"; break;
18007 case V4HImode: pattern = "%04x"; break;
18008 case V8QImode: pattern = "%02x"; break;
18009 default: gcc_unreachable ();
18012 fprintf (file, "0x");
18013 for (i = CONST_VECTOR_NUNITS (x); i--;)
18017 element = CONST_VECTOR_ELT (x, i);
18018 fprintf (file, pattern, INTVAL (element));
18025 arm_output_load_gr (rtx *operands)
18032 if (GET_CODE (operands [1]) != MEM
18033 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18034 || GET_CODE (reg = XEXP (sum, 0)) != REG
18035 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18036 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18037 return "wldrw%?\t%0, %1";
18039 /* Fix up an out-of-range load of a GR register. */
18040 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18041 wcgr = operands[0];
18043 output_asm_insn ("ldr%?\t%0, %1", operands);
18045 operands[0] = wcgr;
18047 output_asm_insn ("tmcr%?\t%0, %1", operands);
18048 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18053 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18055 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18056 named arg and all anonymous args onto the stack.
18057 XXX I know the prologue shouldn't be pushing registers, but it is faster
18061 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18062 enum machine_mode mode,
18065 int second_time ATTRIBUTE_UNUSED)
18067 int nregs = cum->nregs;
18069 && ARM_DOUBLEWORD_ALIGN
18070 && arm_needs_doubleword_align (mode, type))
18073 cfun->machine->uses_anonymous_args = 1;
18074 if (nregs < NUM_ARG_REGS)
18075 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18078 /* Return nonzero if the CONSUMER instruction (a store) does not need
18079 PRODUCER's value to calculate the address. */
18082 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18084 rtx value = PATTERN (producer);
18085 rtx addr = PATTERN (consumer);
18087 if (GET_CODE (value) == COND_EXEC)
18088 value = COND_EXEC_CODE (value);
18089 if (GET_CODE (value) == PARALLEL)
18090 value = XVECEXP (value, 0, 0);
18091 value = XEXP (value, 0);
18092 if (GET_CODE (addr) == COND_EXEC)
18093 addr = COND_EXEC_CODE (addr);
18094 if (GET_CODE (addr) == PARALLEL)
18095 addr = XVECEXP (addr, 0, 0);
18096 addr = XEXP (addr, 0);
18098 return !reg_overlap_mentioned_p (value, addr);
18101 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18102 have an early register shift value or amount dependency on the
18103 result of PRODUCER. */
18106 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18108 rtx value = PATTERN (producer);
18109 rtx op = PATTERN (consumer);
18112 if (GET_CODE (value) == COND_EXEC)
18113 value = COND_EXEC_CODE (value);
18114 if (GET_CODE (value) == PARALLEL)
18115 value = XVECEXP (value, 0, 0);
18116 value = XEXP (value, 0);
18117 if (GET_CODE (op) == COND_EXEC)
18118 op = COND_EXEC_CODE (op);
18119 if (GET_CODE (op) == PARALLEL)
18120 op = XVECEXP (op, 0, 0);
18123 early_op = XEXP (op, 0);
18124 /* This is either an actual independent shift, or a shift applied to
18125 the first operand of another operation. We want the whole shift
18127 if (GET_CODE (early_op) == REG)
18130 return !reg_overlap_mentioned_p (value, early_op);
18133 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18134 have an early register shift value dependency on the result of
18138 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18140 rtx value = PATTERN (producer);
18141 rtx op = PATTERN (consumer);
18144 if (GET_CODE (value) == COND_EXEC)
18145 value = COND_EXEC_CODE (value);
18146 if (GET_CODE (value) == PARALLEL)
18147 value = XVECEXP (value, 0, 0);
18148 value = XEXP (value, 0);
18149 if (GET_CODE (op) == COND_EXEC)
18150 op = COND_EXEC_CODE (op);
18151 if (GET_CODE (op) == PARALLEL)
18152 op = XVECEXP (op, 0, 0);
18155 early_op = XEXP (op, 0);
18157 /* This is either an actual independent shift, or a shift applied to
18158 the first operand of another operation. We want the value being
18159 shifted, in either case. */
18160 if (GET_CODE (early_op) != REG)
18161 early_op = XEXP (early_op, 0);
18163 return !reg_overlap_mentioned_p (value, early_op);
18166 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18167 have an early register mult dependency on the result of
18171 arm_no_early_mul_dep (rtx producer, rtx consumer)
18173 rtx value = PATTERN (producer);
18174 rtx op = PATTERN (consumer);
18176 if (GET_CODE (value) == COND_EXEC)
18177 value = COND_EXEC_CODE (value);
18178 if (GET_CODE (value) == PARALLEL)
18179 value = XVECEXP (value, 0, 0);
18180 value = XEXP (value, 0);
18181 if (GET_CODE (op) == COND_EXEC)
18182 op = COND_EXEC_CODE (op);
18183 if (GET_CODE (op) == PARALLEL)
18184 op = XVECEXP (op, 0, 0);
18187 return (GET_CODE (op) == PLUS
18188 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18191 /* We can't rely on the caller doing the proper promotion when
18192 using APCS or ATPCS. */
18195 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18197 return !TARGET_AAPCS_BASED;
18201 /* AAPCS based ABIs use short enums by default. */
18204 arm_default_short_enums (void)
18206 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18210 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18213 arm_align_anon_bitfield (void)
18215 return TARGET_AAPCS_BASED;
18219 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18222 arm_cxx_guard_type (void)
18224 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18227 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18228 has an accumulator dependency on the result of the producer (a
18229 multiplication instruction) and no other dependency on that result. */
18231 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18233 rtx mul = PATTERN (producer);
18234 rtx mac = PATTERN (consumer);
18236 rtx mac_op0, mac_op1, mac_acc;
18238 if (GET_CODE (mul) == COND_EXEC)
18239 mul = COND_EXEC_CODE (mul);
18240 if (GET_CODE (mac) == COND_EXEC)
18241 mac = COND_EXEC_CODE (mac);
18243 /* Check that mul is of the form (set (...) (mult ...))
18244 and mla is of the form (set (...) (plus (mult ...) (...))). */
18245 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18246 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18247 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18250 mul_result = XEXP (mul, 0);
18251 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18252 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18253 mac_acc = XEXP (XEXP (mac, 1), 1);
18255 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18256 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18257 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18261 /* The EABI says test the least significant bit of a guard variable. */
18264 arm_cxx_guard_mask_bit (void)
18266 return TARGET_AAPCS_BASED;
18270 /* The EABI specifies that all array cookies are 8 bytes long. */
18273 arm_get_cookie_size (tree type)
18277 if (!TARGET_AAPCS_BASED)
18278 return default_cxx_get_cookie_size (type);
18280 size = build_int_cst (sizetype, 8);
18285 /* The EABI says that array cookies should also contain the element size. */
18288 arm_cookie_has_size (void)
18290 return TARGET_AAPCS_BASED;
18294 /* The EABI says constructors and destructors should return a pointer to
18295 the object constructed/destroyed. */
18298 arm_cxx_cdtor_returns_this (void)
18300 return TARGET_AAPCS_BASED;
18303 /* The EABI says that an inline function may never be the key
18307 arm_cxx_key_method_may_be_inline (void)
18309 return !TARGET_AAPCS_BASED;
18313 arm_cxx_determine_class_data_visibility (tree decl)
18315 if (!TARGET_AAPCS_BASED
18316 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
18319 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18320 is exported. However, on systems without dynamic vague linkage,
18321 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18322 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18323 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18325 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18326 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18330 arm_cxx_class_data_always_comdat (void)
18332 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18333 vague linkage if the class has no key function. */
18334 return !TARGET_AAPCS_BASED;
18338 /* The EABI says __aeabi_atexit should be used to register static
18342 arm_cxx_use_aeabi_atexit (void)
18344 return TARGET_AAPCS_BASED;
18349 arm_set_return_address (rtx source, rtx scratch)
18351 arm_stack_offsets *offsets;
18352 HOST_WIDE_INT delta;
18354 unsigned long saved_regs;
18356 offsets = arm_get_frame_offsets ();
18357 saved_regs = offsets->saved_regs_mask;
18359 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18360 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18363 if (frame_pointer_needed)
18364 addr = plus_constant(hard_frame_pointer_rtx, -4);
18367 /* LR will be the first saved register. */
18368 delta = offsets->outgoing_args - (offsets->frame + 4);
18373 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18374 GEN_INT (delta & ~4095)));
18379 addr = stack_pointer_rtx;
18381 addr = plus_constant (addr, delta);
18383 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18389 thumb_set_return_address (rtx source, rtx scratch)
18391 arm_stack_offsets *offsets;
18392 HOST_WIDE_INT delta;
18393 HOST_WIDE_INT limit;
18396 unsigned long mask;
18400 offsets = arm_get_frame_offsets ();
18401 mask = offsets->saved_regs_mask;
18402 if (mask & (1 << LR_REGNUM))
18405 /* Find the saved regs. */
18406 if (frame_pointer_needed)
18408 delta = offsets->soft_frame - offsets->saved_args;
18409 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18415 delta = offsets->outgoing_args - offsets->saved_args;
18418 /* Allow for the stack frame. */
18419 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18421 /* The link register is always the first saved register. */
18424 /* Construct the address. */
18425 addr = gen_rtx_REG (SImode, reg);
18428 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18429 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18433 addr = plus_constant (addr, delta);
18435 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18438 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18441 /* Implements target hook vector_mode_supported_p. */
18443 arm_vector_mode_supported_p (enum machine_mode mode)
18445 /* Neon also supports V2SImode, etc. listed in the clause below. */
18446 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18447 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18450 if ((mode == V2SImode)
18451 || (mode == V4HImode)
18452 || (mode == V8QImode))
18458 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18459 ARM insns and therefore guarantee that the shift count is modulo 256.
18460 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18461 guarantee no particular behavior for out-of-range counts. */
18463 static unsigned HOST_WIDE_INT
18464 arm_shift_truncation_mask (enum machine_mode mode)
18466 return mode == SImode ? 255 : 0;
18470 /* Map internal gcc register numbers to DWARF2 register numbers. */
18473 arm_dbx_register_number (unsigned int regno)
18478 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18479 compatibility. The EABI defines them as registers 96-103. */
18480 if (IS_FPA_REGNUM (regno))
18481 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18483 /* FIXME: VFPv3 register numbering. */
18484 if (IS_VFP_REGNUM (regno))
18485 return 64 + regno - FIRST_VFP_REGNUM;
18487 if (IS_IWMMXT_GR_REGNUM (regno))
18488 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18490 if (IS_IWMMXT_REGNUM (regno))
18491 return 112 + regno - FIRST_IWMMXT_REGNUM;
18493 gcc_unreachable ();
18497 #ifdef TARGET_UNWIND_INFO
18498 /* Emit unwind directives for a store-multiple instruction or stack pointer
18499 push during alignment.
18500 These should only ever be generated by the function prologue code, so
18501 expect them to have a particular form. */
18504 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18507 HOST_WIDE_INT offset;
18508 HOST_WIDE_INT nregs;
18514 e = XVECEXP (p, 0, 0);
18515 if (GET_CODE (e) != SET)
18518 /* First insn will adjust the stack pointer. */
18519 if (GET_CODE (e) != SET
18520 || GET_CODE (XEXP (e, 0)) != REG
18521 || REGNO (XEXP (e, 0)) != SP_REGNUM
18522 || GET_CODE (XEXP (e, 1)) != PLUS)
18525 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18526 nregs = XVECLEN (p, 0) - 1;
18528 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18531 /* The function prologue may also push pc, but not annotate it as it is
18532 never restored. We turn this into a stack pointer adjustment. */
18533 if (nregs * 4 == offset - 4)
18535 fprintf (asm_out_file, "\t.pad #4\n");
18539 fprintf (asm_out_file, "\t.save {");
18541 else if (IS_VFP_REGNUM (reg))
18544 fprintf (asm_out_file, "\t.vsave {");
18546 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18548 /* FPA registers are done differently. */
18549 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18553 /* Unknown register type. */
18556 /* If the stack increment doesn't match the size of the saved registers,
18557 something has gone horribly wrong. */
18558 if (offset != nregs * reg_size)
18563 /* The remaining insns will describe the stores. */
18564 for (i = 1; i <= nregs; i++)
18566 /* Expect (set (mem <addr>) (reg)).
18567 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18568 e = XVECEXP (p, 0, i);
18569 if (GET_CODE (e) != SET
18570 || GET_CODE (XEXP (e, 0)) != MEM
18571 || GET_CODE (XEXP (e, 1)) != REG)
18574 reg = REGNO (XEXP (e, 1));
18579 fprintf (asm_out_file, ", ");
18580 /* We can't use %r for vfp because we need to use the
18581 double precision register names. */
18582 if (IS_VFP_REGNUM (reg))
18583 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18585 asm_fprintf (asm_out_file, "%r", reg);
18587 #ifdef ENABLE_CHECKING
18588 /* Check that the addresses are consecutive. */
18589 e = XEXP (XEXP (e, 0), 0);
18590 if (GET_CODE (e) == PLUS)
18592 offset += reg_size;
18593 if (GET_CODE (XEXP (e, 0)) != REG
18594 || REGNO (XEXP (e, 0)) != SP_REGNUM
18595 || GET_CODE (XEXP (e, 1)) != CONST_INT
18596 || offset != INTVAL (XEXP (e, 1)))
18600 || GET_CODE (e) != REG
18601 || REGNO (e) != SP_REGNUM)
18605 fprintf (asm_out_file, "}\n");
18608 /* Emit unwind directives for a SET. */
18611 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18619 switch (GET_CODE (e0))
18622 /* Pushing a single register. */
18623 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18624 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18625 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18628 asm_fprintf (asm_out_file, "\t.save ");
18629 if (IS_VFP_REGNUM (REGNO (e1)))
18630 asm_fprintf(asm_out_file, "{d%d}\n",
18631 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18633 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18637 if (REGNO (e0) == SP_REGNUM)
18639 /* A stack increment. */
18640 if (GET_CODE (e1) != PLUS
18641 || GET_CODE (XEXP (e1, 0)) != REG
18642 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18643 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18646 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18647 -INTVAL (XEXP (e1, 1)));
18649 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18651 HOST_WIDE_INT offset;
18653 if (GET_CODE (e1) == PLUS)
18655 if (GET_CODE (XEXP (e1, 0)) != REG
18656 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18658 reg = REGNO (XEXP (e1, 0));
18659 offset = INTVAL (XEXP (e1, 1));
18660 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18661 HARD_FRAME_POINTER_REGNUM, reg,
18662 INTVAL (XEXP (e1, 1)));
18664 else if (GET_CODE (e1) == REG)
18667 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18668 HARD_FRAME_POINTER_REGNUM, reg);
18673 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18675 /* Move from sp to reg. */
18676 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18678 else if (GET_CODE (e1) == PLUS
18679 && GET_CODE (XEXP (e1, 0)) == REG
18680 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18681 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18683 /* Set reg to offset from sp. */
18684 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18685 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18687 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18689 /* Stack pointer save before alignment. */
18691 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18704 /* Emit unwind directives for the given insn. */
18707 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18711 if (!ARM_EABI_UNWIND_TABLES)
18714 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18715 && (TREE_NOTHROW (current_function_decl)
18716 || crtl->all_throwers_are_sibcalls))
18719 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18722 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18724 pat = XEXP (pat, 0);
18726 pat = PATTERN (insn);
18728 switch (GET_CODE (pat))
18731 arm_unwind_emit_set (asm_out_file, pat);
18735 /* Store multiple. */
18736 arm_unwind_emit_sequence (asm_out_file, pat);
18745 /* Output a reference from a function exception table to the type_info
18746 object X. The EABI specifies that the symbol should be relocated by
18747 an R_ARM_TARGET2 relocation. */
18750 arm_output_ttype (rtx x)
18752 fputs ("\t.word\t", asm_out_file);
18753 output_addr_const (asm_out_file, x);
18754 /* Use special relocations for symbol references. */
18755 if (GET_CODE (x) != CONST_INT)
18756 fputs ("(TARGET2)", asm_out_file);
18757 fputc ('\n', asm_out_file);
18761 #endif /* TARGET_UNWIND_INFO */
18764 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18765 stack alignment. */
18768 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18770 rtx unspec = SET_SRC (pattern);
18771 gcc_assert (GET_CODE (unspec) == UNSPEC);
18775 case UNSPEC_STACK_ALIGN:
18776 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18777 put anything on the stack, so hopefully it won't matter.
18778 CFA = SP will be correct after alignment. */
18779 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18780 SET_DEST (pattern));
18783 gcc_unreachable ();
18788 /* Output unwind directives for the start/end of a function. */
18791 arm_output_fn_unwind (FILE * f, bool prologue)
18793 if (!ARM_EABI_UNWIND_TABLES)
18797 fputs ("\t.fnstart\n", f);
18800 /* If this function will never be unwound, then mark it as such.
18801 The came condition is used in arm_unwind_emit to suppress
18802 the frame annotations. */
18803 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18804 && (TREE_NOTHROW (current_function_decl)
18805 || crtl->all_throwers_are_sibcalls))
18806 fputs("\t.cantunwind\n", f);
18808 fputs ("\t.fnend\n", f);
18813 arm_emit_tls_decoration (FILE *fp, rtx x)
18815 enum tls_reloc reloc;
18818 val = XVECEXP (x, 0, 0);
18819 reloc = INTVAL (XVECEXP (x, 0, 1));
18821 output_addr_const (fp, val);
18826 fputs ("(tlsgd)", fp);
18829 fputs ("(tlsldm)", fp);
18832 fputs ("(tlsldo)", fp);
18835 fputs ("(gottpoff)", fp);
18838 fputs ("(tpoff)", fp);
18841 gcc_unreachable ();
18849 fputs (" + (. - ", fp);
18850 output_addr_const (fp, XVECEXP (x, 0, 2));
18852 output_addr_const (fp, XVECEXP (x, 0, 3));
18862 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18865 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18867 gcc_assert (size == 4);
18868 fputs ("\t.word\t", file);
18869 output_addr_const (file, x);
18870 fputs ("(tlsldo)", file);
18874 arm_output_addr_const_extra (FILE *fp, rtx x)
18876 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18877 return arm_emit_tls_decoration (fp, x);
18878 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18881 int labelno = INTVAL (XVECEXP (x, 0, 0));
18883 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18884 assemble_name_raw (fp, label);
18888 else if (GET_CODE (x) == CONST_VECTOR)
18889 return arm_emit_vector_const (fp, x);
18894 /* Output assembly for a shift instruction.
18895 SET_FLAGS determines how the instruction modifies the condition codes.
18896 0 - Do not set condition codes.
18897 1 - Set condition codes.
18898 2 - Use smallest instruction. */
18900 arm_output_shift(rtx * operands, int set_flags)
18903 static const char flag_chars[3] = {'?', '.', '!'};
18908 c = flag_chars[set_flags];
18909 if (TARGET_UNIFIED_ASM)
18911 shift = shift_op(operands[3], &val);
18915 operands[2] = GEN_INT(val);
18916 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18919 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18922 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18923 output_asm_insn (pattern, operands);
18927 /* Output a Thumb-2 casesi instruction. */
18929 thumb2_output_casesi (rtx *operands)
18931 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18933 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18935 output_asm_insn ("cmp\t%0, %1", operands);
18936 output_asm_insn ("bhi\t%l3", operands);
18937 switch (GET_MODE(diff_vec))
18940 return "tbb\t[%|pc, %0]";
18942 return "tbh\t[%|pc, %0, lsl #1]";
18946 output_asm_insn ("adr\t%4, %l2", operands);
18947 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18948 output_asm_insn ("add\t%4, %4, %5", operands);
18953 output_asm_insn ("adr\t%4, %l2", operands);
18954 return "ldr\t%|pc, [%4, %0, lsl #2]";
18957 gcc_unreachable ();
18961 /* Most ARM cores are single issue, but some newer ones can dual issue.
18962 The scheduler descriptions rely on this being correct. */
18964 arm_issue_rate (void)
18977 /* A table and a function to perform ARM-specific name mangling for
18978 NEON vector types in order to conform to the AAPCS (see "Procedure
18979 Call Standard for the ARM Architecture", Appendix A). To qualify
18980 for emission with the mangled names defined in that document, a
18981 vector type must not only be of the correct mode but also be
18982 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18985 enum machine_mode mode;
18986 const char *element_type_name;
18987 const char *aapcs_name;
18988 } arm_mangle_map_entry;
18990 static arm_mangle_map_entry arm_mangle_map[] = {
18991 /* 64-bit containerized types. */
18992 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18993 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18994 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18995 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18996 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18997 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18998 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18999 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19000 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19001 /* 128-bit containerized types. */
19002 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19003 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19004 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19005 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19006 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19007 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19008 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19009 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19010 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19011 { VOIDmode, NULL, NULL }
19015 arm_mangle_type (const_tree type)
19017 arm_mangle_map_entry *pos = arm_mangle_map;
19019 if (TREE_CODE (type) != VECTOR_TYPE)
19022 /* Check the mode of the vector type, and the name of the vector
19023 element type, against the table. */
19024 while (pos->mode != VOIDmode)
19026 tree elt_type = TREE_TYPE (type);
19028 if (pos->mode == TYPE_MODE (type)
19029 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19030 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19031 pos->element_type_name))
19032 return pos->aapcs_name;
19037 /* Use the default mangling for unrecognized (possibly user-defined)
19042 /* Order of allocation of core registers for Thumb: this allocation is
19043 written over the corresponding initial entries of the array
19044 initialized with REG_ALLOC_ORDER. We allocate all low registers
19045 first. Saving and restoring a low register is usually cheaper than
19046 using a call-clobbered high register. */
19048 static const int thumb_core_reg_alloc_order[] =
19050 3, 2, 1, 0, 4, 5, 6, 7,
19051 14, 12, 8, 9, 10, 11, 13, 15
19054 /* Adjust register allocation order when compiling for Thumb. */
19057 arm_order_regs_for_local_alloc (void)
19059 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19060 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19062 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
19063 sizeof (thumb_core_reg_alloc_order));
19066 #include "gt-arm.h"