1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
57 /* Forward definitions of types. */
58 typedef struct minipool_node Mnode;
59 typedef struct minipool_fixup Mfix;
61 const struct attribute_spec arm_attribute_table[];
63 /* Forward function declarations. */
64 static arm_stack_offsets *arm_get_frame_offsets (void);
65 static void arm_add_gc_roots (void);
66 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
67 HOST_WIDE_INT, rtx, rtx, int, int);
68 static unsigned bit_count (unsigned long);
69 static int arm_address_register_rtx_p (rtx, int);
70 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
71 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
72 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
73 inline static int thumb1_index_register_rtx_p (rtx, int);
74 static int thumb_far_jump_used_p (void);
75 static bool thumb_force_lr_save (void);
76 static unsigned long thumb1_compute_save_reg_mask (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static int arm_size_return_regs (void);
81 static bool arm_assemble_integer (rtx, unsigned int, int);
83 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
84 static arm_cc get_arm_condition_code (rtx);
85 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
86 static rtx is_jump_table (rtx);
87 static const char *output_multi_immediate (rtx *, const char *, const char *,
89 static const char *shift_op (rtx, HOST_WIDE_INT *);
90 static struct machine_function *arm_init_machine_status (void);
91 static void thumb_exit (FILE *, int);
92 static rtx is_jump_table (rtx);
93 static HOST_WIDE_INT get_jump_table_size (rtx);
94 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_forward_ref (Mfix *);
96 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
97 static Mnode *add_minipool_backward_ref (Mfix *);
98 static void assign_minipool_offsets (Mfix *);
99 static void arm_print_value (FILE *, rtx);
100 static void dump_minipool (rtx);
101 static int arm_barrier_cost (rtx);
102 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
103 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
104 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
106 static void arm_reorg (void);
107 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
108 static unsigned long arm_compute_save_reg0_reg12_mask (void);
109 static unsigned long arm_compute_save_reg_mask (void);
110 static unsigned long arm_isr_value (tree);
111 static unsigned long arm_compute_func_type (void);
112 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
113 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
114 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
115 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
117 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
118 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
120 static int arm_comp_type_attributes (tree, tree);
121 static void arm_set_default_type_attributes (tree);
122 static int arm_adjust_cost (rtx, rtx, rtx, int);
123 static int count_insns_for_constant (HOST_WIDE_INT, int);
124 static int arm_get_strip_length (int);
125 static bool arm_function_ok_for_sibcall (tree, tree);
126 static void arm_internal_label (FILE *, const char *, unsigned long);
127 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
129 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
130 static bool arm_size_rtx_costs (rtx, int, int, int *);
131 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
133 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
134 static bool arm_9e_rtx_costs (rtx, int, int, int *);
135 static int arm_address_cost (rtx);
136 static bool arm_memory_load_p (rtx);
137 static bool arm_cirrus_insn_p (rtx);
138 static void cirrus_reorg (rtx);
139 static void arm_init_builtins (void);
140 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
141 static void arm_init_iwmmxt_builtins (void);
142 static rtx safe_vector_operand (rtx, enum machine_mode);
143 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
144 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void emit_constant_insn (rtx cond, rtx pattern);
147 static rtx emit_set_insn (rtx, rtx);
148 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
151 #ifdef OBJECT_FORMAT_ELF
152 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
153 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
156 static void arm_encode_section_info (tree, rtx, int);
159 static void arm_file_end (void);
160 static void arm_file_start (void);
163 static void aof_globalize_label (FILE *, const char *);
164 static void aof_dump_imports (FILE *);
165 static void aof_dump_pic_table (FILE *);
166 static void aof_file_start (void);
167 static void aof_file_end (void);
168 static void aof_asm_init_sections (void);
170 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
172 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
173 enum machine_mode, tree, bool);
174 static bool arm_promote_prototypes (tree);
175 static bool arm_default_short_enums (void);
176 static bool arm_align_anon_bitfield (void);
177 static bool arm_return_in_msb (tree);
178 static bool arm_must_pass_in_stack (enum machine_mode, tree);
179 #ifdef TARGET_UNWIND_INFO
180 static void arm_unwind_emit (FILE *, rtx);
181 static bool arm_output_ttype (rtx);
183 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
185 static tree arm_cxx_guard_type (void);
186 static bool arm_cxx_guard_mask_bit (void);
187 static tree arm_get_cookie_size (tree);
188 static bool arm_cookie_has_size (void);
189 static bool arm_cxx_cdtor_returns_this (void);
190 static bool arm_cxx_key_method_may_be_inline (void);
191 static void arm_cxx_determine_class_data_visibility (tree);
192 static bool arm_cxx_class_data_always_comdat (void);
193 static bool arm_cxx_use_aeabi_atexit (void);
194 static void arm_init_libfuncs (void);
195 static bool arm_handle_option (size_t, const char *, int);
196 static void arm_target_help (void);
197 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
198 static bool arm_cannot_copy_insn_p (rtx);
199 static bool arm_tls_symbol_p (rtx x);
200 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
203 /* Initialize the GCC target structure. */
204 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
205 #undef TARGET_MERGE_DECL_ATTRIBUTES
206 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
209 #undef TARGET_ATTRIBUTE_TABLE
210 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
212 #undef TARGET_ASM_FILE_START
213 #define TARGET_ASM_FILE_START arm_file_start
214 #undef TARGET_ASM_FILE_END
215 #define TARGET_ASM_FILE_END arm_file_end
218 #undef TARGET_ASM_BYTE_OP
219 #define TARGET_ASM_BYTE_OP "\tDCB\t"
220 #undef TARGET_ASM_ALIGNED_HI_OP
221 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
222 #undef TARGET_ASM_ALIGNED_SI_OP
223 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
224 #undef TARGET_ASM_GLOBALIZE_LABEL
225 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
226 #undef TARGET_ASM_FILE_START
227 #define TARGET_ASM_FILE_START aof_file_start
228 #undef TARGET_ASM_FILE_END
229 #define TARGET_ASM_FILE_END aof_file_end
231 #undef TARGET_ASM_ALIGNED_SI_OP
232 #define TARGET_ASM_ALIGNED_SI_OP NULL
233 #undef TARGET_ASM_INTEGER
234 #define TARGET_ASM_INTEGER arm_assemble_integer
237 #undef TARGET_ASM_FUNCTION_PROLOGUE
238 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
240 #undef TARGET_ASM_FUNCTION_EPILOGUE
241 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
243 #undef TARGET_DEFAULT_TARGET_FLAGS
244 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
245 #undef TARGET_HANDLE_OPTION
246 #define TARGET_HANDLE_OPTION arm_handle_option
248 #define TARGET_HELP arm_target_help
250 #undef TARGET_COMP_TYPE_ATTRIBUTES
251 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
253 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
254 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
256 #undef TARGET_SCHED_ADJUST_COST
257 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
259 #undef TARGET_ENCODE_SECTION_INFO
261 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
263 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
266 #undef TARGET_STRIP_NAME_ENCODING
267 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
269 #undef TARGET_ASM_INTERNAL_LABEL
270 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
272 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
273 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
275 #undef TARGET_ASM_OUTPUT_MI_THUNK
276 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
277 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
278 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
280 /* This will be overridden in arm_override_options. */
281 #undef TARGET_RTX_COSTS
282 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
283 #undef TARGET_ADDRESS_COST
284 #define TARGET_ADDRESS_COST arm_address_cost
286 #undef TARGET_SHIFT_TRUNCATION_MASK
287 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
288 #undef TARGET_VECTOR_MODE_SUPPORTED_P
289 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
291 #undef TARGET_MACHINE_DEPENDENT_REORG
292 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS arm_init_builtins
296 #undef TARGET_EXPAND_BUILTIN
297 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
299 #undef TARGET_INIT_LIBFUNCS
300 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
302 #undef TARGET_PROMOTE_FUNCTION_ARGS
303 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
304 #undef TARGET_PROMOTE_FUNCTION_RETURN
305 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
306 #undef TARGET_PROMOTE_PROTOTYPES
307 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
308 #undef TARGET_PASS_BY_REFERENCE
309 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
310 #undef TARGET_ARG_PARTIAL_BYTES
311 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
313 #undef TARGET_SETUP_INCOMING_VARARGS
314 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
316 #undef TARGET_DEFAULT_SHORT_ENUMS
317 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
319 #undef TARGET_ALIGN_ANON_BITFIELD
320 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
322 #undef TARGET_NARROW_VOLATILE_BITFIELD
323 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
325 #undef TARGET_CXX_GUARD_TYPE
326 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
328 #undef TARGET_CXX_GUARD_MASK_BIT
329 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
331 #undef TARGET_CXX_GET_COOKIE_SIZE
332 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
334 #undef TARGET_CXX_COOKIE_HAS_SIZE
335 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
337 #undef TARGET_CXX_CDTOR_RETURNS_THIS
338 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
340 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
341 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
343 #undef TARGET_CXX_USE_AEABI_ATEXIT
344 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
346 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
347 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
348 arm_cxx_determine_class_data_visibility
350 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
351 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
353 #undef TARGET_RETURN_IN_MSB
354 #define TARGET_RETURN_IN_MSB arm_return_in_msb
356 #undef TARGET_MUST_PASS_IN_STACK
357 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
359 #ifdef TARGET_UNWIND_INFO
360 #undef TARGET_UNWIND_EMIT
361 #define TARGET_UNWIND_EMIT arm_unwind_emit
363 /* EABI unwinding tables use a different format for the typeinfo tables. */
364 #undef TARGET_ASM_TTYPE
365 #define TARGET_ASM_TTYPE arm_output_ttype
367 #undef TARGET_ARM_EABI_UNWINDER
368 #define TARGET_ARM_EABI_UNWINDER true
369 #endif /* TARGET_UNWIND_INFO */
371 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
372 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
374 #undef TARGET_CANNOT_COPY_INSN_P
375 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
378 #undef TARGET_HAVE_TLS
379 #define TARGET_HAVE_TLS true
382 #undef TARGET_CANNOT_FORCE_CONST_MEM
383 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
385 #undef TARGET_MANGLE_TYPE
386 #define TARGET_MANGLE_TYPE arm_mangle_type
389 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
390 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
393 struct gcc_target targetm = TARGET_INITIALIZER;
395 /* Obstack for minipool constant handling. */
396 static struct obstack minipool_obstack;
397 static char * minipool_startobj;
399 /* The maximum number of insns skipped which
400 will be conditionalised if possible. */
401 static int max_insns_skipped = 5;
403 extern FILE * asm_out_file;
405 /* True if we are currently building a constant table. */
406 int making_const_table;
408 /* Define the information needed to generate branch insns. This is
409 stored from the compare operation. */
410 rtx arm_compare_op0, arm_compare_op1;
412 /* The processor for which instructions should be scheduled. */
413 enum processor_type arm_tune = arm_none;
415 /* The default processor used if not overridden by commandline. */
416 static enum processor_type arm_default_cpu = arm_none;
418 /* Which floating point model to use. */
419 enum arm_fp_model arm_fp_model;
421 /* Which floating point hardware is available. */
422 enum fputype arm_fpu_arch;
424 /* Which floating point hardware to schedule for. */
425 enum fputype arm_fpu_tune;
427 /* Whether to use floating point hardware. */
428 enum float_abi_type arm_float_abi;
430 /* Which ABI to use. */
431 enum arm_abi_type arm_abi;
433 /* Which thread pointer model to use. */
434 enum arm_tp_type target_thread_pointer = TP_AUTO;
436 /* Used to parse -mstructure_size_boundary command line option. */
437 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
439 /* Used for Thumb call_via trampolines. */
440 rtx thumb_call_via_label[14];
441 static int thumb_call_reg_needed;
443 /* Bit values used to identify processor capabilities. */
444 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
445 #define FL_ARCH3M (1 << 1) /* Extended multiply */
446 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
447 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
448 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
449 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
450 #define FL_THUMB (1 << 6) /* Thumb aware */
451 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
452 #define FL_STRONG (1 << 8) /* StrongARM */
453 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
454 #define FL_XSCALE (1 << 10) /* XScale */
455 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
456 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
457 media instructions. */
458 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
459 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
460 Note: ARM6 & 7 derivatives only. */
461 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
462 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
463 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
465 #define FL_DIV (1 << 18) /* Hardware divide. */
466 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
467 #define FL_NEON (1 << 20) /* Neon instructions. */
469 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
471 #define FL_FOR_ARCH2 FL_NOTM
472 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
473 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
474 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
475 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
476 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
477 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
478 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
479 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
480 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
481 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
482 #define FL_FOR_ARCH6J FL_FOR_ARCH6
483 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
484 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
485 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
486 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
487 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
488 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
489 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
490 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
492 /* The bits in this mask specify which
493 instructions we are allowed to generate. */
494 static unsigned long insn_flags = 0;
496 /* The bits in this mask specify which instruction scheduling options should
498 static unsigned long tune_flags = 0;
500 /* The following are used in the arm.md file as equivalents to bits
501 in the above two flag variables. */
503 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
506 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
509 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
512 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
515 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
518 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
521 /* Nonzero if this chip supports the ARM 6K extensions. */
524 /* Nonzero if instructions not present in the 'M' profile can be used. */
525 int arm_arch_notm = 0;
527 /* Nonzero if this chip can benefit from load scheduling. */
528 int arm_ld_sched = 0;
530 /* Nonzero if this chip is a StrongARM. */
531 int arm_tune_strongarm = 0;
533 /* Nonzero if this chip is a Cirrus variant. */
534 int arm_arch_cirrus = 0;
536 /* Nonzero if this chip supports Intel Wireless MMX technology. */
537 int arm_arch_iwmmxt = 0;
539 /* Nonzero if this chip is an XScale. */
540 int arm_arch_xscale = 0;
542 /* Nonzero if tuning for XScale */
543 int arm_tune_xscale = 0;
545 /* Nonzero if we want to tune for stores that access the write-buffer.
546 This typically means an ARM6 or ARM7 with MMU or MPU. */
547 int arm_tune_wbuf = 0;
549 /* Nonzero if generating Thumb instructions. */
552 /* Nonzero if we should define __THUMB_INTERWORK__ in the
554 XXX This is a bit of a hack, it's intended to help work around
555 problems in GLD which doesn't understand that armv5t code is
556 interworking clean. */
557 int arm_cpp_interwork = 0;
559 /* Nonzero if chip supports Thumb 2. */
562 /* Nonzero if chip supports integer division instruction. */
565 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
566 must report the mode of the memory reference from PRINT_OPERAND to
567 PRINT_OPERAND_ADDRESS. */
568 enum machine_mode output_memory_reference_mode;
570 /* The register number to be used for the PIC offset register. */
571 unsigned arm_pic_register = INVALID_REGNUM;
573 /* Set to 1 when a return insn is output, this means that the epilogue
575 int return_used_this_function;
577 /* Set to 1 after arm_reorg has started. Reset to start at the start of
578 the next function. */
579 static int after_arm_reorg = 0;
581 /* The maximum number of insns to be used when loading a constant. */
582 static int arm_constant_limit = 3;
584 /* For an explanation of these variables, see final_prescan_insn below. */
586 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
587 enum arm_cond_code arm_current_cc;
589 int arm_target_label;
590 /* The number of conditionally executed insns, including the current insn. */
591 int arm_condexec_count = 0;
592 /* A bitmask specifying the patterns for the IT block.
593 Zero means do not output an IT block before this insn. */
594 int arm_condexec_mask = 0;
595 /* The number of bits used in arm_condexec_mask. */
596 int arm_condexec_masklen = 0;
598 /* The condition codes of the ARM, and the inverse function. */
599 static const char * const arm_condition_codes[] =
601 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
602 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
605 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
606 #define streq(string1, string2) (strcmp (string1, string2) == 0)
608 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
609 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
610 | (1 << PIC_OFFSET_TABLE_REGNUM)))
612 /* Initialization code. */
616 const char *const name;
617 enum processor_type core;
619 const unsigned long flags;
620 bool (* rtx_costs) (rtx, int, int, int *);
623 /* Not all of these give usefully different compilation alternatives,
624 but there is no simple way of generalizing them. */
625 static const struct processors all_cores[] =
628 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
629 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
630 #include "arm-cores.def"
632 {NULL, arm_none, NULL, 0, NULL}
635 static const struct processors all_architectures[] =
637 /* ARM Architectures */
638 /* We don't specify rtx_costs here as it will be figured out
641 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
642 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
643 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
644 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
645 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
646 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
647 implementations that support it, so we will leave it out for now. */
648 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
649 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
650 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
651 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
652 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
653 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
654 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
655 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
656 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
657 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
658 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
659 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
660 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
661 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
662 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
663 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
664 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
665 {NULL, arm_none, NULL, 0 , NULL}
668 struct arm_cpu_select
672 const struct processors * processors;
675 /* This is a magic structure. The 'string' field is magically filled in
676 with a pointer to the value specified by the user on the command line
677 assuming that the user has specified such a value. */
679 static struct arm_cpu_select arm_select[] =
681 /* string name processors */
682 { NULL, "-mcpu=", all_cores },
683 { NULL, "-march=", all_architectures },
684 { NULL, "-mtune=", all_cores }
687 /* Defines representing the indexes into the above table. */
688 #define ARM_OPT_SET_CPU 0
689 #define ARM_OPT_SET_ARCH 1
690 #define ARM_OPT_SET_TUNE 2
692 /* The name of the preprocessor macro to define for this architecture. */
694 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
703 /* Available values for -mfpu=. */
705 static const struct fpu_desc all_fpus[] =
707 {"fpa", FPUTYPE_FPA},
708 {"fpe2", FPUTYPE_FPA_EMU2},
709 {"fpe3", FPUTYPE_FPA_EMU2},
710 {"maverick", FPUTYPE_MAVERICK},
711 {"vfp", FPUTYPE_VFP},
712 {"vfp3", FPUTYPE_VFP3},
713 {"neon", FPUTYPE_NEON}
717 /* Floating point models used by the different hardware.
718 See fputype in arm.h. */
720 static const enum fputype fp_model_for_fpu[] =
722 /* No FP hardware. */
723 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
724 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
725 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
726 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
727 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
728 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
729 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
730 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
737 enum float_abi_type abi_type;
741 /* Available values for -mfloat-abi=. */
743 static const struct float_abi all_float_abis[] =
745 {"soft", ARM_FLOAT_ABI_SOFT},
746 {"softfp", ARM_FLOAT_ABI_SOFTFP},
747 {"hard", ARM_FLOAT_ABI_HARD}
754 enum arm_abi_type abi_type;
758 /* Available values for -mabi=. */
760 static const struct abi_name arm_all_abis[] =
762 {"apcs-gnu", ARM_ABI_APCS},
763 {"atpcs", ARM_ABI_ATPCS},
764 {"aapcs", ARM_ABI_AAPCS},
765 {"iwmmxt", ARM_ABI_IWMMXT},
766 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
769 /* Supported TLS relocations. */
779 /* Emit an insn that's a simple single-set. Both the operands must be known
782 emit_set_insn (rtx x, rtx y)
784 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
787 /* Return the number of bits set in VALUE. */
789 bit_count (unsigned long value)
791 unsigned long count = 0;
796 value &= value - 1; /* Clear the least-significant set bit. */
802 /* Set up library functions unique to ARM. */
805 arm_init_libfuncs (void)
807 /* There are no special library functions unless we are using the
812 /* The functions below are described in Section 4 of the "Run-Time
813 ABI for the ARM architecture", Version 1.0. */
815 /* Double-precision floating-point arithmetic. Table 2. */
816 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
817 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
818 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
819 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
820 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
822 /* Double-precision comparisons. Table 3. */
823 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
824 set_optab_libfunc (ne_optab, DFmode, NULL);
825 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
826 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
827 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
828 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
829 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
831 /* Single-precision floating-point arithmetic. Table 4. */
832 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
833 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
834 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
835 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
836 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
838 /* Single-precision comparisons. Table 5. */
839 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
840 set_optab_libfunc (ne_optab, SFmode, NULL);
841 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
842 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
843 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
844 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
845 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
847 /* Floating-point to integer conversions. Table 6. */
848 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
849 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
850 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
851 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
852 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
853 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
854 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
855 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
857 /* Conversions between floating types. Table 7. */
858 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
859 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
861 /* Integer to floating-point conversions. Table 8. */
862 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
863 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
864 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
865 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
866 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
867 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
868 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
869 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
871 /* Long long. Table 9. */
872 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
873 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
874 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
875 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
876 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
877 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
878 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
879 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
881 /* Integer (32/32->32) division. \S 4.3.1. */
882 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
883 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
885 /* The divmod functions are designed so that they can be used for
886 plain division, even though they return both the quotient and the
887 remainder. The quotient is returned in the usual location (i.e.,
888 r0 for SImode, {r0, r1} for DImode), just as would be expected
889 for an ordinary division routine. Because the AAPCS calling
890 conventions specify that all of { r0, r1, r2, r3 } are
891 callee-saved registers, there is no need to tell the compiler
892 explicitly that those registers are clobbered by these
894 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
895 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
897 /* For SImode division the ABI provides div-without-mod routines,
899 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
900 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
902 /* We don't have mod libcalls. Fortunately gcc knows how to use the
903 divmod libcalls instead. */
904 set_optab_libfunc (smod_optab, DImode, NULL);
905 set_optab_libfunc (umod_optab, DImode, NULL);
906 set_optab_libfunc (smod_optab, SImode, NULL);
907 set_optab_libfunc (umod_optab, SImode, NULL);
910 /* Implement TARGET_HANDLE_OPTION. */
913 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
918 arm_select[1].string = arg;
922 arm_select[0].string = arg;
925 case OPT_mhard_float:
926 target_float_abi_name = "hard";
929 case OPT_msoft_float:
930 target_float_abi_name = "soft";
934 arm_select[2].string = arg;
943 arm_target_help (void)
946 static int columns = 0;
949 /* If we have not done so already, obtain the desired maximum width of
950 the output. Note - this is a duplication of the code at the start of
951 gcc/opts.c:print_specific_help() - the two copies should probably be
952 replaced by a single function. */
957 GET_ENVIRONMENT (p, "COLUMNS");
960 int value = atoi (p);
967 /* Use a reasonable default. */
971 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
973 /* The - 2 is because we know that the last entry in the array is NULL. */
974 i = ARRAY_SIZE (all_cores) - 2;
976 printf (" %s", all_cores[i].name);
977 remaining = columns - (strlen (all_cores[i].name) + 4);
978 gcc_assert (remaining >= 0);
982 int len = strlen (all_cores[i].name);
984 if (remaining > len + 2)
986 printf (", %s", all_cores[i].name);
987 remaining -= len + 2;
993 printf ("\n %s", all_cores[i].name);
994 remaining = columns - (len + 4);
998 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1000 i = ARRAY_SIZE (all_architectures) - 2;
1003 printf (" %s", all_architectures[i].name);
1004 remaining = columns - (strlen (all_architectures[i].name) + 4);
1005 gcc_assert (remaining >= 0);
1009 int len = strlen (all_architectures[i].name);
1011 if (remaining > len + 2)
1013 printf (", %s", all_architectures[i].name);
1014 remaining -= len + 2;
1020 printf ("\n %s", all_architectures[i].name);
1021 remaining = columns - (len + 4);
1028 /* Fix up any incompatible options that the user has specified.
1029 This has now turned into a maze. */
1031 arm_override_options (void)
1034 enum processor_type target_arch_cpu = arm_none;
1036 /* Set up the flags based on the cpu/architecture selected by the user. */
1037 for (i = ARRAY_SIZE (arm_select); i--;)
1039 struct arm_cpu_select * ptr = arm_select + i;
1041 if (ptr->string != NULL && ptr->string[0] != '\0')
1043 const struct processors * sel;
1045 for (sel = ptr->processors; sel->name != NULL; sel++)
1046 if (streq (ptr->string, sel->name))
1048 /* Set the architecture define. */
1049 if (i != ARM_OPT_SET_TUNE)
1050 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1052 /* Determine the processor core for which we should
1053 tune code-generation. */
1054 if (/* -mcpu= is a sensible default. */
1055 i == ARM_OPT_SET_CPU
1056 /* -mtune= overrides -mcpu= and -march=. */
1057 || i == ARM_OPT_SET_TUNE)
1058 arm_tune = (enum processor_type) (sel - ptr->processors);
1060 /* Remember the CPU associated with this architecture.
1061 If no other option is used to set the CPU type,
1062 we'll use this to guess the most suitable tuning
1064 if (i == ARM_OPT_SET_ARCH)
1065 target_arch_cpu = sel->core;
1067 if (i != ARM_OPT_SET_TUNE)
1069 /* If we have been given an architecture and a processor
1070 make sure that they are compatible. We only generate
1071 a warning though, and we prefer the CPU over the
1073 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1074 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1077 insn_flags = sel->flags;
1083 if (sel->name == NULL)
1084 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1088 /* Guess the tuning options from the architecture if necessary. */
1089 if (arm_tune == arm_none)
1090 arm_tune = target_arch_cpu;
1092 /* If the user did not specify a processor, choose one for them. */
1093 if (insn_flags == 0)
1095 const struct processors * sel;
1096 unsigned int sought;
1097 enum processor_type cpu;
1099 cpu = TARGET_CPU_DEFAULT;
1100 if (cpu == arm_none)
1102 #ifdef SUBTARGET_CPU_DEFAULT
1103 /* Use the subtarget default CPU if none was specified by
1105 cpu = SUBTARGET_CPU_DEFAULT;
1107 /* Default to ARM6. */
1108 if (cpu == arm_none)
1111 sel = &all_cores[cpu];
1113 insn_flags = sel->flags;
1115 /* Now check to see if the user has specified some command line
1116 switch that require certain abilities from the cpu. */
1119 if (TARGET_INTERWORK || TARGET_THUMB)
1121 sought |= (FL_THUMB | FL_MODE32);
1123 /* There are no ARM processors that support both APCS-26 and
1124 interworking. Therefore we force FL_MODE26 to be removed
1125 from insn_flags here (if it was set), so that the search
1126 below will always be able to find a compatible processor. */
1127 insn_flags &= ~FL_MODE26;
1130 if (sought != 0 && ((sought & insn_flags) != sought))
1132 /* Try to locate a CPU type that supports all of the abilities
1133 of the default CPU, plus the extra abilities requested by
1135 for (sel = all_cores; sel->name != NULL; sel++)
1136 if ((sel->flags & sought) == (sought | insn_flags))
1139 if (sel->name == NULL)
1141 unsigned current_bit_count = 0;
1142 const struct processors * best_fit = NULL;
1144 /* Ideally we would like to issue an error message here
1145 saying that it was not possible to find a CPU compatible
1146 with the default CPU, but which also supports the command
1147 line options specified by the programmer, and so they
1148 ought to use the -mcpu=<name> command line option to
1149 override the default CPU type.
1151 If we cannot find a cpu that has both the
1152 characteristics of the default cpu and the given
1153 command line options we scan the array again looking
1154 for a best match. */
1155 for (sel = all_cores; sel->name != NULL; sel++)
1156 if ((sel->flags & sought) == sought)
1160 count = bit_count (sel->flags & insn_flags);
1162 if (count >= current_bit_count)
1165 current_bit_count = count;
1169 gcc_assert (best_fit);
1173 insn_flags = sel->flags;
1175 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1176 arm_default_cpu = (enum processor_type) (sel - all_cores);
1177 if (arm_tune == arm_none)
1178 arm_tune = arm_default_cpu;
1181 /* The processor for which we should tune should now have been
1183 gcc_assert (arm_tune != arm_none);
1185 tune_flags = all_cores[(int)arm_tune].flags;
1187 targetm.rtx_costs = arm_size_rtx_costs;
1189 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1191 /* Make sure that the processor choice does not conflict with any of the
1192 other command line choices. */
1193 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1194 error ("target CPU does not support ARM mode");
1196 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1198 warning (0, "target CPU does not support interworking" );
1199 target_flags &= ~MASK_INTERWORK;
1202 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1204 warning (0, "target CPU does not support THUMB instructions");
1205 target_flags &= ~MASK_THUMB;
1208 if (TARGET_APCS_FRAME && TARGET_THUMB)
1210 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1211 target_flags &= ~MASK_APCS_FRAME;
1214 /* Callee super interworking implies thumb interworking. Adding
1215 this to the flags here simplifies the logic elsewhere. */
1216 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1217 target_flags |= MASK_INTERWORK;
1219 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1220 from here where no function is being compiled currently. */
1221 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1222 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1224 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1225 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1227 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1228 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1230 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1232 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1233 target_flags |= MASK_APCS_FRAME;
1236 if (TARGET_POKE_FUNCTION_NAME)
1237 target_flags |= MASK_APCS_FRAME;
1239 if (TARGET_APCS_REENT && flag_pic)
1240 error ("-fpic and -mapcs-reent are incompatible");
1242 if (TARGET_APCS_REENT)
1243 warning (0, "APCS reentrant code not supported. Ignored");
1245 /* If this target is normally configured to use APCS frames, warn if they
1246 are turned off and debugging is turned on. */
1248 && write_symbols != NO_DEBUG
1249 && !TARGET_APCS_FRAME
1250 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1251 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1253 if (TARGET_APCS_FLOAT)
1254 warning (0, "passing floating point arguments in fp regs not yet supported");
1256 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1257 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1258 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1259 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1260 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1261 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1262 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1263 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1264 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1265 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1266 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1267 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1269 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1270 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1271 thumb_code = (TARGET_ARM == 0);
1272 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1273 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1274 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1275 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1277 /* V5 code we generate is completely interworking capable, so we turn off
1278 TARGET_INTERWORK here to avoid many tests later on. */
1280 /* XXX However, we must pass the right pre-processor defines to CPP
1281 or GLD can get confused. This is a hack. */
1282 if (TARGET_INTERWORK)
1283 arm_cpp_interwork = 1;
1286 target_flags &= ~MASK_INTERWORK;
1288 if (target_abi_name)
1290 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1292 if (streq (arm_all_abis[i].name, target_abi_name))
1294 arm_abi = arm_all_abis[i].abi_type;
1298 if (i == ARRAY_SIZE (arm_all_abis))
1299 error ("invalid ABI option: -mabi=%s", target_abi_name);
1302 arm_abi = ARM_DEFAULT_ABI;
1304 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1305 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1307 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1308 error ("iwmmxt abi requires an iwmmxt capable cpu");
1310 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1311 if (target_fpu_name == NULL && target_fpe_name != NULL)
1313 if (streq (target_fpe_name, "2"))
1314 target_fpu_name = "fpe2";
1315 else if (streq (target_fpe_name, "3"))
1316 target_fpu_name = "fpe3";
1318 error ("invalid floating point emulation option: -mfpe=%s",
1321 if (target_fpu_name != NULL)
1323 /* The user specified a FPU. */
1324 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1326 if (streq (all_fpus[i].name, target_fpu_name))
1328 arm_fpu_arch = all_fpus[i].fpu;
1329 arm_fpu_tune = arm_fpu_arch;
1330 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1334 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1335 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1339 #ifdef FPUTYPE_DEFAULT
1340 /* Use the default if it is specified for this platform. */
1341 arm_fpu_arch = FPUTYPE_DEFAULT;
1342 arm_fpu_tune = FPUTYPE_DEFAULT;
1344 /* Pick one based on CPU type. */
1345 /* ??? Some targets assume FPA is the default.
1346 if ((insn_flags & FL_VFP) != 0)
1347 arm_fpu_arch = FPUTYPE_VFP;
1350 if (arm_arch_cirrus)
1351 arm_fpu_arch = FPUTYPE_MAVERICK;
1353 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1355 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1356 arm_fpu_tune = FPUTYPE_FPA;
1358 arm_fpu_tune = arm_fpu_arch;
1359 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1360 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1363 if (target_float_abi_name != NULL)
1365 /* The user specified a FP ABI. */
1366 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1368 if (streq (all_float_abis[i].name, target_float_abi_name))
1370 arm_float_abi = all_float_abis[i].abi_type;
1374 if (i == ARRAY_SIZE (all_float_abis))
1375 error ("invalid floating point abi: -mfloat-abi=%s",
1376 target_float_abi_name);
1379 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1381 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1382 sorry ("-mfloat-abi=hard and VFP");
1384 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1385 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1386 will ever exist. GCC makes no attempt to support this combination. */
1387 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1388 sorry ("iWMMXt and hardware floating point");
1390 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1391 if (TARGET_THUMB2 && TARGET_IWMMXT)
1392 sorry ("Thumb-2 iWMMXt");
1394 /* If soft-float is specified then don't use FPU. */
1395 if (TARGET_SOFT_FLOAT)
1396 arm_fpu_arch = FPUTYPE_NONE;
1398 /* For arm2/3 there is no need to do any scheduling if there is only
1399 a floating point emulator, or we are doing software floating-point. */
1400 if ((TARGET_SOFT_FLOAT
1401 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1402 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1403 && (tune_flags & FL_MODE32) == 0)
1404 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1406 if (target_thread_switch)
1408 if (strcmp (target_thread_switch, "soft") == 0)
1409 target_thread_pointer = TP_SOFT;
1410 else if (strcmp (target_thread_switch, "auto") == 0)
1411 target_thread_pointer = TP_AUTO;
1412 else if (strcmp (target_thread_switch, "cp15") == 0)
1413 target_thread_pointer = TP_CP15;
1415 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1418 /* Use the cp15 method if it is available. */
1419 if (target_thread_pointer == TP_AUTO)
1421 if (arm_arch6k && !TARGET_THUMB)
1422 target_thread_pointer = TP_CP15;
1424 target_thread_pointer = TP_SOFT;
1427 if (TARGET_HARD_TP && TARGET_THUMB1)
1428 error ("can not use -mtp=cp15 with 16-bit Thumb");
1430 /* Override the default structure alignment for AAPCS ABI. */
1431 if (TARGET_AAPCS_BASED)
1432 arm_structure_size_boundary = 8;
1434 if (structure_size_string != NULL)
1436 int size = strtol (structure_size_string, NULL, 0);
1438 if (size == 8 || size == 32
1439 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1440 arm_structure_size_boundary = size;
1442 warning (0, "structure size boundary can only be set to %s",
1443 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1446 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1448 error ("RTP PIC is incompatible with Thumb");
1452 /* If stack checking is disabled, we can use r10 as the PIC register,
1453 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1454 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1456 if (TARGET_VXWORKS_RTP)
1457 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1458 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1461 if (flag_pic && TARGET_VXWORKS_RTP)
1462 arm_pic_register = 9;
1464 if (arm_pic_register_string != NULL)
1466 int pic_register = decode_reg_name (arm_pic_register_string);
1469 warning (0, "-mpic-register= is useless without -fpic");
1471 /* Prevent the user from choosing an obviously stupid PIC register. */
1472 else if (pic_register < 0 || call_used_regs[pic_register]
1473 || pic_register == HARD_FRAME_POINTER_REGNUM
1474 || pic_register == STACK_POINTER_REGNUM
1475 || pic_register >= PC_REGNUM
1476 || (TARGET_VXWORKS_RTP
1477 && (unsigned int) pic_register != arm_pic_register))
1478 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1480 arm_pic_register = pic_register;
1483 /* ??? We might want scheduling for thumb2. */
1484 if (TARGET_THUMB && flag_schedule_insns)
1486 /* Don't warn since it's on by default in -O2. */
1487 flag_schedule_insns = 0;
1492 arm_constant_limit = 1;
1494 /* If optimizing for size, bump the number of instructions that we
1495 are prepared to conditionally execute (even on a StrongARM). */
1496 max_insns_skipped = 6;
1500 /* For processors with load scheduling, it never costs more than
1501 2 cycles to load a constant, and the load scheduler may well
1502 reduce that to 1. */
1504 arm_constant_limit = 1;
1506 /* On XScale the longer latency of a load makes it more difficult
1507 to achieve a good schedule, so it's faster to synthesize
1508 constants that can be done in two insns. */
1509 if (arm_tune_xscale)
1510 arm_constant_limit = 2;
1512 /* StrongARM has early execution of branches, so a sequence
1513 that is worth skipping is shorter. */
1514 if (arm_tune_strongarm)
1515 max_insns_skipped = 3;
1518 /* Register global variables with the garbage collector. */
1519 arm_add_gc_roots ();
1523 arm_add_gc_roots (void)
1525 gcc_obstack_init(&minipool_obstack);
1526 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1529 /* A table of known ARM exception types.
1530 For use with the interrupt function attribute. */
1534 const char *const arg;
1535 const unsigned long return_value;
1539 static const isr_attribute_arg isr_attribute_args [] =
1541 { "IRQ", ARM_FT_ISR },
1542 { "irq", ARM_FT_ISR },
1543 { "FIQ", ARM_FT_FIQ },
1544 { "fiq", ARM_FT_FIQ },
1545 { "ABORT", ARM_FT_ISR },
1546 { "abort", ARM_FT_ISR },
1547 { "ABORT", ARM_FT_ISR },
1548 { "abort", ARM_FT_ISR },
1549 { "UNDEF", ARM_FT_EXCEPTION },
1550 { "undef", ARM_FT_EXCEPTION },
1551 { "SWI", ARM_FT_EXCEPTION },
1552 { "swi", ARM_FT_EXCEPTION },
1553 { NULL, ARM_FT_NORMAL }
1556 /* Returns the (interrupt) function type of the current
1557 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1559 static unsigned long
1560 arm_isr_value (tree argument)
1562 const isr_attribute_arg * ptr;
1566 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1568 /* No argument - default to IRQ. */
1569 if (argument == NULL_TREE)
1572 /* Get the value of the argument. */
1573 if (TREE_VALUE (argument) == NULL_TREE
1574 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1575 return ARM_FT_UNKNOWN;
1577 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1579 /* Check it against the list of known arguments. */
1580 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1581 if (streq (arg, ptr->arg))
1582 return ptr->return_value;
1584 /* An unrecognized interrupt type. */
1585 return ARM_FT_UNKNOWN;
1588 /* Computes the type of the current function. */
1590 static unsigned long
1591 arm_compute_func_type (void)
1593 unsigned long type = ARM_FT_UNKNOWN;
1597 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1599 /* Decide if the current function is volatile. Such functions
1600 never return, and many memory cycles can be saved by not storing
1601 register values that will never be needed again. This optimization
1602 was added to speed up context switching in a kernel application. */
1604 && (TREE_NOTHROW (current_function_decl)
1605 || !(flag_unwind_tables
1606 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1607 && TREE_THIS_VOLATILE (current_function_decl))
1608 type |= ARM_FT_VOLATILE;
1610 if (cfun->static_chain_decl != NULL)
1611 type |= ARM_FT_NESTED;
1613 attr = DECL_ATTRIBUTES (current_function_decl);
1615 a = lookup_attribute ("naked", attr);
1617 type |= ARM_FT_NAKED;
1619 a = lookup_attribute ("isr", attr);
1621 a = lookup_attribute ("interrupt", attr);
1624 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1626 type |= arm_isr_value (TREE_VALUE (a));
1631 /* Returns the type of the current function. */
1634 arm_current_func_type (void)
1636 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1637 cfun->machine->func_type = arm_compute_func_type ();
1639 return cfun->machine->func_type;
1642 /* Return 1 if it is possible to return using a single instruction.
1643 If SIBLING is non-null, this is a test for a return before a sibling
1644 call. SIBLING is the call insn, so we can examine its register usage. */
1647 use_return_insn (int iscond, rtx sibling)
1650 unsigned int func_type;
1651 unsigned long saved_int_regs;
1652 unsigned HOST_WIDE_INT stack_adjust;
1653 arm_stack_offsets *offsets;
1655 /* Never use a return instruction before reload has run. */
1656 if (!reload_completed)
1659 func_type = arm_current_func_type ();
1661 /* Naked, volatile and stack alignment functions need special
1663 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1666 /* So do interrupt functions that use the frame pointer and Thumb
1667 interrupt functions. */
1668 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1671 offsets = arm_get_frame_offsets ();
1672 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1674 /* As do variadic functions. */
1675 if (current_function_pretend_args_size
1676 || cfun->machine->uses_anonymous_args
1677 /* Or if the function calls __builtin_eh_return () */
1678 || current_function_calls_eh_return
1679 /* Or if the function calls alloca */
1680 || current_function_calls_alloca
1681 /* Or if there is a stack adjustment. However, if the stack pointer
1682 is saved on the stack, we can use a pre-incrementing stack load. */
1683 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1686 saved_int_regs = arm_compute_save_reg_mask ();
1688 /* Unfortunately, the insn
1690 ldmib sp, {..., sp, ...}
1692 triggers a bug on most SA-110 based devices, such that the stack
1693 pointer won't be correctly restored if the instruction takes a
1694 page fault. We work around this problem by popping r3 along with
1695 the other registers, since that is never slower than executing
1696 another instruction.
1698 We test for !arm_arch5 here, because code for any architecture
1699 less than this could potentially be run on one of the buggy
1701 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1703 /* Validate that r3 is a call-clobbered register (always true in
1704 the default abi) ... */
1705 if (!call_used_regs[3])
1708 /* ... that it isn't being used for a return value ... */
1709 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1712 /* ... or for a tail-call argument ... */
1715 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1717 if (find_regno_fusage (sibling, USE, 3))
1721 /* ... and that there are no call-saved registers in r0-r2
1722 (always true in the default ABI). */
1723 if (saved_int_regs & 0x7)
1727 /* Can't be done if interworking with Thumb, and any registers have been
1729 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1732 /* On StrongARM, conditional returns are expensive if they aren't
1733 taken and multiple registers have been stacked. */
1734 if (iscond && arm_tune_strongarm)
1736 /* Conditional return when just the LR is stored is a simple
1737 conditional-load instruction, that's not expensive. */
1738 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1742 && arm_pic_register != INVALID_REGNUM
1743 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1747 /* If there are saved registers but the LR isn't saved, then we need
1748 two instructions for the return. */
1749 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1752 /* Can't be done if any of the FPA regs are pushed,
1753 since this also requires an insn. */
1754 if (TARGET_HARD_FLOAT && TARGET_FPA)
1755 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1756 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1759 /* Likewise VFP regs. */
1760 if (TARGET_HARD_FLOAT && TARGET_VFP)
1761 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1762 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1765 if (TARGET_REALLY_IWMMXT)
1766 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1767 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1773 /* Return TRUE if int I is a valid immediate ARM constant. */
1776 const_ok_for_arm (HOST_WIDE_INT i)
1780 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1781 be all zero, or all one. */
1782 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1783 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1784 != ((~(unsigned HOST_WIDE_INT) 0)
1785 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1788 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1790 /* Fast return for 0 and small values. We must do this for zero, since
1791 the code below can't handle that one case. */
1792 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1795 /* Get the number of trailing zeros. */
1796 lowbit = ffs((int) i) - 1;
1798 /* Only even shifts are allowed in ARM mode so round down to the
1799 nearest even number. */
1803 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1808 /* Allow rotated constants in ARM mode. */
1810 && ((i & ~0xc000003f) == 0
1811 || (i & ~0xf000000f) == 0
1812 || (i & ~0xfc000003) == 0))
1819 /* Allow repeated pattern. */
1822 if (i == v || i == (v | (v << 8)))
1829 /* Return true if I is a valid constant for the operation CODE. */
1831 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1833 if (const_ok_for_arm (i))
1839 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1841 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1847 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1854 /* Emit a sequence of insns to handle a large constant.
1855 CODE is the code of the operation required, it can be any of SET, PLUS,
1856 IOR, AND, XOR, MINUS;
1857 MODE is the mode in which the operation is being performed;
1858 VAL is the integer to operate on;
1859 SOURCE is the other operand (a register, or a null-pointer for SET);
1860 SUBTARGETS means it is safe to create scratch registers if that will
1861 either produce a simpler sequence, or we will want to cse the values.
1862 Return value is the number of insns emitted. */
1864 /* ??? Tweak this for thumb2. */
1866 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1867 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1871 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1872 cond = COND_EXEC_TEST (PATTERN (insn));
1876 if (subtargets || code == SET
1877 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1878 && REGNO (target) != REGNO (source)))
1880 /* After arm_reorg has been called, we can't fix up expensive
1881 constants by pushing them into memory so we must synthesize
1882 them in-line, regardless of the cost. This is only likely to
1883 be more costly on chips that have load delay slots and we are
1884 compiling without running the scheduler (so no splitting
1885 occurred before the final instruction emission).
1887 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1889 if (!after_arm_reorg
1891 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1893 > arm_constant_limit + (code != SET)))
1897 /* Currently SET is the only monadic value for CODE, all
1898 the rest are diadic. */
1899 emit_set_insn (target, GEN_INT (val));
1904 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1906 emit_set_insn (temp, GEN_INT (val));
1907 /* For MINUS, the value is subtracted from, since we never
1908 have subtraction of a constant. */
1910 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1912 emit_set_insn (target,
1913 gen_rtx_fmt_ee (code, mode, source, temp));
1919 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1923 /* Return the number of ARM instructions required to synthesize the given
1926 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1928 HOST_WIDE_INT temp1;
1936 if (remainder & (3 << (i - 2)))
1941 temp1 = remainder & ((0x0ff << end)
1942 | ((i < end) ? (0xff >> (32 - end)) : 0));
1943 remainder &= ~temp1;
1948 } while (remainder);
1952 /* Emit an instruction with the indicated PATTERN. If COND is
1953 non-NULL, conditionalize the execution of the instruction on COND
1957 emit_constant_insn (rtx cond, rtx pattern)
1960 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1961 emit_insn (pattern);
1964 /* As above, but extra parameter GENERATE which, if clear, suppresses
1966 /* ??? This needs more work for thumb2. */
1969 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1970 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1975 int can_negate_initial = 0;
1978 int num_bits_set = 0;
1979 int set_sign_bit_copies = 0;
1980 int clear_sign_bit_copies = 0;
1981 int clear_zero_bit_copies = 0;
1982 int set_zero_bit_copies = 0;
1984 unsigned HOST_WIDE_INT temp1, temp2;
1985 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1987 /* Find out which operations are safe for a given CODE. Also do a quick
1988 check for degenerate cases; these can occur when DImode operations
2000 can_negate_initial = 1;
2004 if (remainder == 0xffffffff)
2007 emit_constant_insn (cond,
2008 gen_rtx_SET (VOIDmode, target,
2009 GEN_INT (ARM_SIGN_EXTEND (val))));
2014 if (reload_completed && rtx_equal_p (target, source))
2017 emit_constant_insn (cond,
2018 gen_rtx_SET (VOIDmode, target, source));
2027 emit_constant_insn (cond,
2028 gen_rtx_SET (VOIDmode, target, const0_rtx));
2031 if (remainder == 0xffffffff)
2033 if (reload_completed && rtx_equal_p (target, source))
2036 emit_constant_insn (cond,
2037 gen_rtx_SET (VOIDmode, target, source));
2046 if (reload_completed && rtx_equal_p (target, source))
2049 emit_constant_insn (cond,
2050 gen_rtx_SET (VOIDmode, target, source));
2054 /* We don't know how to handle other cases yet. */
2055 gcc_assert (remainder == 0xffffffff);
2058 emit_constant_insn (cond,
2059 gen_rtx_SET (VOIDmode, target,
2060 gen_rtx_NOT (mode, source)));
2064 /* We treat MINUS as (val - source), since (source - val) is always
2065 passed as (source + (-val)). */
2069 emit_constant_insn (cond,
2070 gen_rtx_SET (VOIDmode, target,
2071 gen_rtx_NEG (mode, source)));
2074 if (const_ok_for_arm (val))
2077 emit_constant_insn (cond,
2078 gen_rtx_SET (VOIDmode, target,
2079 gen_rtx_MINUS (mode, GEN_INT (val),
2091 /* If we can do it in one insn get out quickly. */
2092 if (const_ok_for_arm (val)
2093 || (can_negate_initial && const_ok_for_arm (-val))
2094 || (can_invert && const_ok_for_arm (~val)))
2097 emit_constant_insn (cond,
2098 gen_rtx_SET (VOIDmode, target,
2100 ? gen_rtx_fmt_ee (code, mode, source,
2106 /* Calculate a few attributes that may be useful for specific
2108 for (i = 31; i >= 0; i--)
2110 if ((remainder & (1 << i)) == 0)
2111 clear_sign_bit_copies++;
2116 for (i = 31; i >= 0; i--)
2118 if ((remainder & (1 << i)) != 0)
2119 set_sign_bit_copies++;
2124 for (i = 0; i <= 31; i++)
2126 if ((remainder & (1 << i)) == 0)
2127 clear_zero_bit_copies++;
2132 for (i = 0; i <= 31; i++)
2134 if ((remainder & (1 << i)) != 0)
2135 set_zero_bit_copies++;
2143 /* See if we can use movw. */
2144 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2147 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2152 /* See if we can do this by sign_extending a constant that is known
2153 to be negative. This is a good, way of doing it, since the shift
2154 may well merge into a subsequent insn. */
2155 if (set_sign_bit_copies > 1)
2157 if (const_ok_for_arm
2158 (temp1 = ARM_SIGN_EXTEND (remainder
2159 << (set_sign_bit_copies - 1))))
2163 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2164 emit_constant_insn (cond,
2165 gen_rtx_SET (VOIDmode, new_src,
2167 emit_constant_insn (cond,
2168 gen_ashrsi3 (target, new_src,
2169 GEN_INT (set_sign_bit_copies - 1)));
2173 /* For an inverted constant, we will need to set the low bits,
2174 these will be shifted out of harm's way. */
2175 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2176 if (const_ok_for_arm (~temp1))
2180 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2181 emit_constant_insn (cond,
2182 gen_rtx_SET (VOIDmode, new_src,
2184 emit_constant_insn (cond,
2185 gen_ashrsi3 (target, new_src,
2186 GEN_INT (set_sign_bit_copies - 1)));
2192 /* See if we can calculate the value as the difference between two
2193 valid immediates. */
2194 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2196 int topshift = clear_sign_bit_copies & ~1;
2198 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2199 & (0xff000000 >> topshift));
2201 /* If temp1 is zero, then that means the 9 most significant
2202 bits of remainder were 1 and we've caused it to overflow.
2203 When topshift is 0 we don't need to do anything since we
2204 can borrow from 'bit 32'. */
2205 if (temp1 == 0 && topshift != 0)
2206 temp1 = 0x80000000 >> (topshift - 1);
2208 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2210 if (const_ok_for_arm (temp2))
2214 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2215 emit_constant_insn (cond,
2216 gen_rtx_SET (VOIDmode, new_src,
2218 emit_constant_insn (cond,
2219 gen_addsi3 (target, new_src,
2227 /* See if we can generate this by setting the bottom (or the top)
2228 16 bits, and then shifting these into the other half of the
2229 word. We only look for the simplest cases, to do more would cost
2230 too much. Be careful, however, not to generate this when the
2231 alternative would take fewer insns. */
2232 if (val & 0xffff0000)
2234 temp1 = remainder & 0xffff0000;
2235 temp2 = remainder & 0x0000ffff;
2237 /* Overlaps outside this range are best done using other methods. */
2238 for (i = 9; i < 24; i++)
2240 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2241 && !const_ok_for_arm (temp2))
2243 rtx new_src = (subtargets
2244 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2246 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2247 source, subtargets, generate);
2255 gen_rtx_ASHIFT (mode, source,
2262 /* Don't duplicate cases already considered. */
2263 for (i = 17; i < 24; i++)
2265 if (((temp1 | (temp1 >> i)) == remainder)
2266 && !const_ok_for_arm (temp1))
2268 rtx new_src = (subtargets
2269 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2271 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2272 source, subtargets, generate);
2277 gen_rtx_SET (VOIDmode, target,
2280 gen_rtx_LSHIFTRT (mode, source,
2291 /* If we have IOR or XOR, and the constant can be loaded in a
2292 single instruction, and we can find a temporary to put it in,
2293 then this can be done in two instructions instead of 3-4. */
2295 /* TARGET can't be NULL if SUBTARGETS is 0 */
2296 || (reload_completed && !reg_mentioned_p (target, source)))
2298 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2302 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2304 emit_constant_insn (cond,
2305 gen_rtx_SET (VOIDmode, sub,
2307 emit_constant_insn (cond,
2308 gen_rtx_SET (VOIDmode, target,
2309 gen_rtx_fmt_ee (code, mode,
2319 if (set_sign_bit_copies > 8
2320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2325 rtx shift = GEN_INT (set_sign_bit_copies);
2329 gen_rtx_SET (VOIDmode, sub,
2331 gen_rtx_ASHIFT (mode,
2336 gen_rtx_SET (VOIDmode, target,
2338 gen_rtx_LSHIFTRT (mode, sub,
2344 if (set_zero_bit_copies > 8
2345 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2349 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2350 rtx shift = GEN_INT (set_zero_bit_copies);
2354 gen_rtx_SET (VOIDmode, sub,
2356 gen_rtx_LSHIFTRT (mode,
2361 gen_rtx_SET (VOIDmode, target,
2363 gen_rtx_ASHIFT (mode, sub,
2369 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2373 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2374 emit_constant_insn (cond,
2375 gen_rtx_SET (VOIDmode, sub,
2376 gen_rtx_NOT (mode, source)));
2379 sub = gen_reg_rtx (mode);
2380 emit_constant_insn (cond,
2381 gen_rtx_SET (VOIDmode, sub,
2382 gen_rtx_AND (mode, source,
2384 emit_constant_insn (cond,
2385 gen_rtx_SET (VOIDmode, target,
2386 gen_rtx_NOT (mode, sub)));
2393 /* See if two shifts will do 2 or more insn's worth of work. */
2394 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2396 HOST_WIDE_INT shift_mask = ((0xffffffff
2397 << (32 - clear_sign_bit_copies))
2400 if ((remainder | shift_mask) != 0xffffffff)
2404 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2405 insns = arm_gen_constant (AND, mode, cond,
2406 remainder | shift_mask,
2407 new_src, source, subtargets, 1);
2412 rtx targ = subtargets ? NULL_RTX : target;
2413 insns = arm_gen_constant (AND, mode, cond,
2414 remainder | shift_mask,
2415 targ, source, subtargets, 0);
2421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2422 rtx shift = GEN_INT (clear_sign_bit_copies);
2424 emit_insn (gen_ashlsi3 (new_src, source, shift));
2425 emit_insn (gen_lshrsi3 (target, new_src, shift));
2431 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2433 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2435 if ((remainder | shift_mask) != 0xffffffff)
2439 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2441 insns = arm_gen_constant (AND, mode, cond,
2442 remainder | shift_mask,
2443 new_src, source, subtargets, 1);
2448 rtx targ = subtargets ? NULL_RTX : target;
2450 insns = arm_gen_constant (AND, mode, cond,
2451 remainder | shift_mask,
2452 targ, source, subtargets, 0);
2458 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2459 rtx shift = GEN_INT (clear_zero_bit_copies);
2461 emit_insn (gen_lshrsi3 (new_src, source, shift));
2462 emit_insn (gen_ashlsi3 (target, new_src, shift));
2474 for (i = 0; i < 32; i++)
2475 if (remainder & (1 << i))
2478 if (code == AND || (can_invert && num_bits_set > 16))
2479 remainder = (~remainder) & 0xffffffff;
2480 else if (code == PLUS && num_bits_set > 16)
2481 remainder = (-remainder) & 0xffffffff;
2488 /* Now try and find a way of doing the job in either two or three
2490 We start by looking for the largest block of zeros that are aligned on
2491 a 2-bit boundary, we then fill up the temps, wrapping around to the
2492 top of the word when we drop off the bottom.
2493 In the worst case this code should produce no more than four insns.
2494 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2495 best place to start. */
2497 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2503 int best_consecutive_zeros = 0;
2505 for (i = 0; i < 32; i += 2)
2507 int consecutive_zeros = 0;
2509 if (!(remainder & (3 << i)))
2511 while ((i < 32) && !(remainder & (3 << i)))
2513 consecutive_zeros += 2;
2516 if (consecutive_zeros > best_consecutive_zeros)
2518 best_consecutive_zeros = consecutive_zeros;
2519 best_start = i - consecutive_zeros;
2525 /* So long as it won't require any more insns to do so, it's
2526 desirable to emit a small constant (in bits 0...9) in the last
2527 insn. This way there is more chance that it can be combined with
2528 a later addressing insn to form a pre-indexed load or store
2529 operation. Consider:
2531 *((volatile int *)0xe0000100) = 1;
2532 *((volatile int *)0xe0000110) = 2;
2534 We want this to wind up as:
2538 str rB, [rA, #0x100]
2540 str rB, [rA, #0x110]
2542 rather than having to synthesize both large constants from scratch.
2544 Therefore, we calculate how many insns would be required to emit
2545 the constant starting from `best_start', and also starting from
2546 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2547 yield a shorter sequence, we may as well use zero. */
2549 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2550 && (count_insns_for_constant (remainder, 0) <=
2551 count_insns_for_constant (remainder, best_start)))
2555 /* Now start emitting the insns. */
2563 if (remainder & (3 << (i - 2)))
2568 temp1 = remainder & ((0x0ff << end)
2569 | ((i < end) ? (0xff >> (32 - end)) : 0));
2570 remainder &= ~temp1;
2574 rtx new_src, temp1_rtx;
2576 if (code == SET || code == MINUS)
2578 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2579 if (can_invert && code != MINUS)
2584 if (remainder && subtargets)
2585 new_src = gen_reg_rtx (mode);
2590 else if (can_negate)
2594 temp1 = trunc_int_for_mode (temp1, mode);
2595 temp1_rtx = GEN_INT (temp1);
2599 else if (code == MINUS)
2600 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2602 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2604 emit_constant_insn (cond,
2605 gen_rtx_SET (VOIDmode, new_src,
2615 else if (code == MINUS)
2624 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2637 /* Canonicalize a comparison so that we are more likely to recognize it.
2638 This can be done for a few constant compares, where we can make the
2639 immediate value easier to load. */
2642 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2645 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2646 unsigned HOST_WIDE_INT maxval;
2647 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2658 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2660 *op1 = GEN_INT (i + 1);
2661 return code == GT ? GE : LT;
2668 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2670 *op1 = GEN_INT (i - 1);
2671 return code == GE ? GT : LE;
2677 if (i != ~((unsigned HOST_WIDE_INT) 0)
2678 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2680 *op1 = GEN_INT (i + 1);
2681 return code == GTU ? GEU : LTU;
2688 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2690 *op1 = GEN_INT (i - 1);
2691 return code == GEU ? GTU : LEU;
2703 /* Define how to find the value returned by a function. */
2706 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2708 enum machine_mode mode;
2709 int unsignedp ATTRIBUTE_UNUSED;
2710 rtx r ATTRIBUTE_UNUSED;
2712 mode = TYPE_MODE (type);
2713 /* Promote integer types. */
2714 if (INTEGRAL_TYPE_P (type))
2715 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2717 /* Promotes small structs returned in a register to full-word size
2718 for big-endian AAPCS. */
2719 if (arm_return_in_msb (type))
2721 HOST_WIDE_INT size = int_size_in_bytes (type);
2722 if (size % UNITS_PER_WORD != 0)
2724 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2725 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2729 return LIBCALL_VALUE(mode);
2732 /* Determine the amount of memory needed to store the possible return
2733 registers of an untyped call. */
2735 arm_apply_result_size (void)
2741 if (TARGET_HARD_FLOAT_ABI)
2745 if (TARGET_MAVERICK)
2748 if (TARGET_IWMMXT_ABI)
2755 /* Decide whether a type should be returned in memory (true)
2756 or in a register (false). This is called by the macro
2757 RETURN_IN_MEMORY. */
2759 arm_return_in_memory (tree type)
2763 size = int_size_in_bytes (type);
2765 /* Vector values should be returned using ARM registers, not memory (unless
2766 they're over 16 bytes, which will break since we only have four
2767 call-clobbered registers to play with). */
2768 if (TREE_CODE (type) == VECTOR_TYPE)
2769 return (size < 0 || size > (4 * UNITS_PER_WORD));
2771 if (!AGGREGATE_TYPE_P (type) &&
2772 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2773 /* All simple types are returned in registers.
2774 For AAPCS, complex types are treated the same as aggregates. */
2777 if (arm_abi != ARM_ABI_APCS)
2779 /* ATPCS and later return aggregate types in memory only if they are
2780 larger than a word (or are variable size). */
2781 return (size < 0 || size > UNITS_PER_WORD);
2784 /* For the arm-wince targets we choose to be compatible with Microsoft's
2785 ARM and Thumb compilers, which always return aggregates in memory. */
2787 /* All structures/unions bigger than one word are returned in memory.
2788 Also catch the case where int_size_in_bytes returns -1. In this case
2789 the aggregate is either huge or of variable size, and in either case
2790 we will want to return it via memory and not in a register. */
2791 if (size < 0 || size > UNITS_PER_WORD)
2794 if (TREE_CODE (type) == RECORD_TYPE)
2798 /* For a struct the APCS says that we only return in a register
2799 if the type is 'integer like' and every addressable element
2800 has an offset of zero. For practical purposes this means
2801 that the structure can have at most one non bit-field element
2802 and that this element must be the first one in the structure. */
2804 /* Find the first field, ignoring non FIELD_DECL things which will
2805 have been created by C++. */
2806 for (field = TYPE_FIELDS (type);
2807 field && TREE_CODE (field) != FIELD_DECL;
2808 field = TREE_CHAIN (field))
2812 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2814 /* Check that the first field is valid for returning in a register. */
2816 /* ... Floats are not allowed */
2817 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2820 /* ... Aggregates that are not themselves valid for returning in
2821 a register are not allowed. */
2822 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2825 /* Now check the remaining fields, if any. Only bitfields are allowed,
2826 since they are not addressable. */
2827 for (field = TREE_CHAIN (field);
2829 field = TREE_CHAIN (field))
2831 if (TREE_CODE (field) != FIELD_DECL)
2834 if (!DECL_BIT_FIELD_TYPE (field))
2841 if (TREE_CODE (type) == UNION_TYPE)
2845 /* Unions can be returned in registers if every element is
2846 integral, or can be returned in an integer register. */
2847 for (field = TYPE_FIELDS (type);
2849 field = TREE_CHAIN (field))
2851 if (TREE_CODE (field) != FIELD_DECL)
2854 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2857 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2863 #endif /* not ARM_WINCE */
2865 /* Return all other types in memory. */
2869 /* Indicate whether or not words of a double are in big-endian order. */
2872 arm_float_words_big_endian (void)
2874 if (TARGET_MAVERICK)
2877 /* For FPA, float words are always big-endian. For VFP, floats words
2878 follow the memory system mode. */
2886 return (TARGET_BIG_END ? 1 : 0);
2891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2892 for a call to a function whose data type is FNTYPE.
2893 For a library call, FNTYPE is NULL. */
2895 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2896 rtx libname ATTRIBUTE_UNUSED,
2897 tree fndecl ATTRIBUTE_UNUSED)
2899 /* On the ARM, the offset starts at 0. */
2901 pcum->iwmmxt_nregs = 0;
2902 pcum->can_split = true;
2904 /* Varargs vectors are treated the same as long long.
2905 named_count avoids having to change the way arm handles 'named' */
2906 pcum->named_count = 0;
2909 if (TARGET_REALLY_IWMMXT && fntype)
2913 for (fn_arg = TYPE_ARG_TYPES (fntype);
2915 fn_arg = TREE_CHAIN (fn_arg))
2916 pcum->named_count += 1;
2918 if (! pcum->named_count)
2919 pcum->named_count = INT_MAX;
2924 /* Return true if mode/type need doubleword alignment. */
2926 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2928 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2929 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2933 /* Determine where to put an argument to a function.
2934 Value is zero to push the argument on the stack,
2935 or a hard register in which to store the argument.
2937 MODE is the argument's machine mode.
2938 TYPE is the data type of the argument (as a tree).
2939 This is null for libcalls where that information may
2941 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2942 the preceding args and about the function being called.
2943 NAMED is nonzero if this argument is a named parameter
2944 (otherwise it is an extra parameter matching an ellipsis). */
2947 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2948 tree type, int named)
2952 /* Varargs vectors are treated the same as long long.
2953 named_count avoids having to change the way arm handles 'named' */
2954 if (TARGET_IWMMXT_ABI
2955 && arm_vector_mode_supported_p (mode)
2956 && pcum->named_count > pcum->nargs + 1)
2958 if (pcum->iwmmxt_nregs <= 9)
2959 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2962 pcum->can_split = false;
2967 /* Put doubleword aligned quantities in even register pairs. */
2969 && ARM_DOUBLEWORD_ALIGN
2970 && arm_needs_doubleword_align (mode, type))
2973 if (mode == VOIDmode)
2974 /* Pick an arbitrary value for operand 2 of the call insn. */
2977 /* Only allow splitting an arg between regs and memory if all preceding
2978 args were allocated to regs. For args passed by reference we only count
2979 the reference pointer. */
2980 if (pcum->can_split)
2983 nregs = ARM_NUM_REGS2 (mode, type);
2985 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2988 return gen_rtx_REG (mode, pcum->nregs);
2992 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2993 tree type, bool named ATTRIBUTE_UNUSED)
2995 int nregs = pcum->nregs;
2997 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3000 if (NUM_ARG_REGS > nregs
3001 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3003 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3008 /* Variable sized types are passed by reference. This is a GCC
3009 extension to the ARM ABI. */
3012 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3013 enum machine_mode mode ATTRIBUTE_UNUSED,
3014 tree type, bool named ATTRIBUTE_UNUSED)
3016 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3019 /* Encode the current state of the #pragma [no_]long_calls. */
3022 OFF, /* No #pragma [no_]long_calls is in effect. */
3023 LONG, /* #pragma long_calls is in effect. */
3024 SHORT /* #pragma no_long_calls is in effect. */
3027 static arm_pragma_enum arm_pragma_long_calls = OFF;
3030 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3032 arm_pragma_long_calls = LONG;
3036 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3038 arm_pragma_long_calls = SHORT;
3042 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3044 arm_pragma_long_calls = OFF;
3047 /* Table of machine attributes. */
3048 const struct attribute_spec arm_attribute_table[] =
3050 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3051 /* Function calls made to this symbol must be done indirectly, because
3052 it may lie outside of the 26 bit addressing range of a normal function
3054 { "long_call", 0, 0, false, true, true, NULL },
3055 /* Whereas these functions are always known to reside within the 26 bit
3056 addressing range. */
3057 { "short_call", 0, 0, false, true, true, NULL },
3058 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3059 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3060 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3061 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3063 /* ARM/PE has three new attributes:
3065 dllexport - for exporting a function/variable that will live in a dll
3066 dllimport - for importing a function/variable from a dll
3068 Microsoft allows multiple declspecs in one __declspec, separating
3069 them with spaces. We do NOT support this. Instead, use __declspec
3072 { "dllimport", 0, 0, true, false, false, NULL },
3073 { "dllexport", 0, 0, true, false, false, NULL },
3074 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3075 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3076 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3077 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3078 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3080 { NULL, 0, 0, false, false, false, NULL }
3083 /* Handle an attribute requiring a FUNCTION_DECL;
3084 arguments as in struct attribute_spec.handler. */
3086 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3087 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3089 if (TREE_CODE (*node) != FUNCTION_DECL)
3091 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3092 IDENTIFIER_POINTER (name));
3093 *no_add_attrs = true;
3099 /* Handle an "interrupt" or "isr" attribute;
3100 arguments as in struct attribute_spec.handler. */
3102 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3107 if (TREE_CODE (*node) != FUNCTION_DECL)
3109 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3110 IDENTIFIER_POINTER (name));
3111 *no_add_attrs = true;
3113 /* FIXME: the argument if any is checked for type attributes;
3114 should it be checked for decl ones? */
3118 if (TREE_CODE (*node) == FUNCTION_TYPE
3119 || TREE_CODE (*node) == METHOD_TYPE)
3121 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3123 warning (OPT_Wattributes, "%qs attribute ignored",
3124 IDENTIFIER_POINTER (name));
3125 *no_add_attrs = true;
3128 else if (TREE_CODE (*node) == POINTER_TYPE
3129 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3130 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3131 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3133 *node = build_variant_type_copy (*node);
3134 TREE_TYPE (*node) = build_type_attribute_variant
3136 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3137 *no_add_attrs = true;
3141 /* Possibly pass this attribute on from the type to a decl. */
3142 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3143 | (int) ATTR_FLAG_FUNCTION_NEXT
3144 | (int) ATTR_FLAG_ARRAY_NEXT))
3146 *no_add_attrs = true;
3147 return tree_cons (name, args, NULL_TREE);
3151 warning (OPT_Wattributes, "%qs attribute ignored",
3152 IDENTIFIER_POINTER (name));
3160 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3161 /* Handle the "notshared" attribute. This attribute is another way of
3162 requesting hidden visibility. ARM's compiler supports
3163 "__declspec(notshared)"; we support the same thing via an
3167 arm_handle_notshared_attribute (tree *node,
3168 tree name ATTRIBUTE_UNUSED,
3169 tree args ATTRIBUTE_UNUSED,
3170 int flags ATTRIBUTE_UNUSED,
3173 tree decl = TYPE_NAME (*node);
3177 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3178 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3179 *no_add_attrs = false;
3185 /* Return 0 if the attributes for two types are incompatible, 1 if they
3186 are compatible, and 2 if they are nearly compatible (which causes a
3187 warning to be generated). */
3189 arm_comp_type_attributes (tree type1, tree type2)
3193 /* Check for mismatch of non-default calling convention. */
3194 if (TREE_CODE (type1) != FUNCTION_TYPE)
3197 /* Check for mismatched call attributes. */
3198 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3199 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3200 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3201 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3203 /* Only bother to check if an attribute is defined. */
3204 if (l1 | l2 | s1 | s2)
3206 /* If one type has an attribute, the other must have the same attribute. */
3207 if ((l1 != l2) || (s1 != s2))
3210 /* Disallow mixed attributes. */
3211 if ((l1 & s2) || (l2 & s1))
3215 /* Check for mismatched ISR attribute. */
3216 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3218 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3219 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3221 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3228 /* Assigns default attributes to newly defined type. This is used to
3229 set short_call/long_call attributes for function types of
3230 functions defined inside corresponding #pragma scopes. */
3232 arm_set_default_type_attributes (tree type)
3234 /* Add __attribute__ ((long_call)) to all functions, when
3235 inside #pragma long_calls or __attribute__ ((short_call)),
3236 when inside #pragma no_long_calls. */
3237 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3239 tree type_attr_list, attr_name;
3240 type_attr_list = TYPE_ATTRIBUTES (type);
3242 if (arm_pragma_long_calls == LONG)
3243 attr_name = get_identifier ("long_call");
3244 else if (arm_pragma_long_calls == SHORT)
3245 attr_name = get_identifier ("short_call");
3249 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3250 TYPE_ATTRIBUTES (type) = type_attr_list;
3254 /* Return true if DECL is known to be linked into section SECTION. */
3257 arm_function_in_section_p (tree decl, section *section)
3259 /* We can only be certain about functions defined in the same
3260 compilation unit. */
3261 if (!TREE_STATIC (decl))
3264 /* Make sure that SYMBOL always binds to the definition in this
3265 compilation unit. */
3266 if (!targetm.binds_local_p (decl))
3269 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3270 if (!DECL_SECTION_NAME (decl))
3272 /* Only cater for unit-at-a-time mode, where we know that the user
3273 cannot later specify a section for DECL. */
3274 if (!flag_unit_at_a_time)
3277 /* Make sure that we will not create a unique section for DECL. */
3278 if (flag_function_sections || DECL_ONE_ONLY (decl))
3282 return function_section (decl) == section;
3285 /* Return nonzero if a 32-bit "long_call" should be generated for
3286 a call from the current function to DECL. We generate a long_call
3289 a. has an __attribute__((long call))
3290 or b. is within the scope of a #pragma long_calls
3291 or c. the -mlong-calls command line switch has been specified
3293 However we do not generate a long call if the function:
3295 d. has an __attribute__ ((short_call))
3296 or e. is inside the scope of a #pragma no_long_calls
3297 or f. is defined in the same section as the current function. */
3300 arm_is_long_call_p (tree decl)
3305 return TARGET_LONG_CALLS;
3307 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3308 if (lookup_attribute ("short_call", attrs))
3311 /* For "f", be conservative, and only cater for cases in which the
3312 whole of the current function is placed in the same section. */
3313 if (!flag_reorder_blocks_and_partition
3314 && arm_function_in_section_p (decl, current_function_section ()))
3317 if (lookup_attribute ("long_call", attrs))
3320 return TARGET_LONG_CALLS;
3323 /* Return nonzero if it is ok to make a tail-call to DECL. */
3325 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3327 unsigned long func_type;
3329 if (cfun->machine->sibcall_blocked)
3332 /* Never tailcall something for which we have no decl, or if we
3333 are in Thumb mode. */
3334 if (decl == NULL || TARGET_THUMB)
3337 /* The PIC register is live on entry to VxWorks PLT entries, so we
3338 must make the call before restoring the PIC register. */
3339 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3342 /* Cannot tail-call to long calls, since these are out of range of
3343 a branch instruction. */
3344 if (arm_is_long_call_p (decl))
3347 /* If we are interworking and the function is not declared static
3348 then we can't tail-call it unless we know that it exists in this
3349 compilation unit (since it might be a Thumb routine). */
3350 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3353 func_type = arm_current_func_type ();
3354 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3355 if (IS_INTERRUPT (func_type))
3358 /* Never tailcall if function may be called with a misaligned SP. */
3359 if (IS_STACKALIGN (func_type))
3362 /* Everything else is ok. */
3367 /* Addressing mode support functions. */
3369 /* Return nonzero if X is a legitimate immediate operand when compiling
3370 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3372 legitimate_pic_operand_p (rtx x)
3374 if (GET_CODE (x) == SYMBOL_REF
3375 || (GET_CODE (x) == CONST
3376 && GET_CODE (XEXP (x, 0)) == PLUS
3377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3383 /* Record that the current function needs a PIC register. Initialize
3384 cfun->machine->pic_reg if we have not already done so. */
3387 require_pic_register (void)
3389 /* A lot of the logic here is made obscure by the fact that this
3390 routine gets called as part of the rtx cost estimation process.
3391 We don't want those calls to affect any assumptions about the real
3392 function; and further, we can't call entry_of_function() until we
3393 start the real expansion process. */
3394 if (!current_function_uses_pic_offset_table)
3396 gcc_assert (can_create_pseudo_p ());
3397 if (arm_pic_register != INVALID_REGNUM)
3399 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3401 /* Play games to avoid marking the function as needing pic
3402 if we are being called as part of the cost-estimation
3404 if (current_ir_type () != IR_GIMPLE)
3405 current_function_uses_pic_offset_table = 1;
3411 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3413 /* Play games to avoid marking the function as needing pic
3414 if we are being called as part of the cost-estimation
3416 if (current_ir_type () != IR_GIMPLE)
3418 current_function_uses_pic_offset_table = 1;
3421 arm_load_pic_register (0UL);
3425 emit_insn_after (seq, entry_of_function ());
3432 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3434 if (GET_CODE (orig) == SYMBOL_REF
3435 || GET_CODE (orig) == LABEL_REF)
3437 #ifndef AOF_ASSEMBLER
3438 rtx pic_ref, address;
3443 /* If this function doesn't have a pic register, create one now. */
3444 require_pic_register ();
3448 gcc_assert (can_create_pseudo_p ());
3449 reg = gen_reg_rtx (Pmode);
3454 #ifdef AOF_ASSEMBLER
3455 /* The AOF assembler can generate relocations for these directly, and
3456 understands that the PIC register has to be added into the offset. */
3457 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3460 address = gen_reg_rtx (Pmode);
3465 emit_insn (gen_pic_load_addr_arm (address, orig));
3466 else if (TARGET_THUMB2)
3467 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3468 else /* TARGET_THUMB1 */
3469 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3471 /* VxWorks does not impose a fixed gap between segments; the run-time
3472 gap can be different from the object-file gap. We therefore can't
3473 use GOTOFF unless we are absolutely sure that the symbol is in the
3474 same segment as the GOT. Unfortunately, the flexibility of linker
3475 scripts means that we can't be sure of that in general, so assume
3476 that GOTOFF is never valid on VxWorks. */
3477 if ((GET_CODE (orig) == LABEL_REF
3478 || (GET_CODE (orig) == SYMBOL_REF &&
3479 SYMBOL_REF_LOCAL_P (orig)))
3481 && !TARGET_VXWORKS_RTP)
3482 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3485 pic_ref = gen_const_mem (Pmode,
3486 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3490 insn = emit_move_insn (reg, pic_ref);
3492 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3494 set_unique_reg_note (insn, REG_EQUAL, orig);
3498 else if (GET_CODE (orig) == CONST)
3502 if (GET_CODE (XEXP (orig, 0)) == PLUS
3503 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3506 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3507 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3512 gcc_assert (can_create_pseudo_p ());
3513 reg = gen_reg_rtx (Pmode);
3516 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3518 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3519 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3520 base == reg ? 0 : reg);
3522 if (GET_CODE (offset) == CONST_INT)
3524 /* The base register doesn't really matter, we only want to
3525 test the index for the appropriate mode. */
3526 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3528 gcc_assert (can_create_pseudo_p ());
3529 offset = force_reg (Pmode, offset);
3532 if (GET_CODE (offset) == CONST_INT)
3533 return plus_constant (base, INTVAL (offset));
3536 if (GET_MODE_SIZE (mode) > 4
3537 && (GET_MODE_CLASS (mode) == MODE_INT
3538 || TARGET_SOFT_FLOAT))
3540 emit_insn (gen_addsi3 (reg, base, offset));
3544 return gen_rtx_PLUS (Pmode, base, offset);
3551 /* Find a spare register to use during the prolog of a function. */
3554 thumb_find_work_register (unsigned long pushed_regs_mask)
3558 /* Check the argument registers first as these are call-used. The
3559 register allocation order means that sometimes r3 might be used
3560 but earlier argument registers might not, so check them all. */
3561 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3562 if (!df_regs_ever_live_p (reg))
3565 /* Before going on to check the call-saved registers we can try a couple
3566 more ways of deducing that r3 is available. The first is when we are
3567 pushing anonymous arguments onto the stack and we have less than 4
3568 registers worth of fixed arguments(*). In this case r3 will be part of
3569 the variable argument list and so we can be sure that it will be
3570 pushed right at the start of the function. Hence it will be available
3571 for the rest of the prologue.
3572 (*): ie current_function_pretend_args_size is greater than 0. */
3573 if (cfun->machine->uses_anonymous_args
3574 && current_function_pretend_args_size > 0)
3575 return LAST_ARG_REGNUM;
3577 /* The other case is when we have fixed arguments but less than 4 registers
3578 worth. In this case r3 might be used in the body of the function, but
3579 it is not being used to convey an argument into the function. In theory
3580 we could just check current_function_args_size to see how many bytes are
3581 being passed in argument registers, but it seems that it is unreliable.
3582 Sometimes it will have the value 0 when in fact arguments are being
3583 passed. (See testcase execute/20021111-1.c for an example). So we also
3584 check the args_info.nregs field as well. The problem with this field is
3585 that it makes no allowances for arguments that are passed to the
3586 function but which are not used. Hence we could miss an opportunity
3587 when a function has an unused argument in r3. But it is better to be
3588 safe than to be sorry. */
3589 if (! cfun->machine->uses_anonymous_args
3590 && current_function_args_size >= 0
3591 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3592 && cfun->args_info.nregs < 4)
3593 return LAST_ARG_REGNUM;
3595 /* Otherwise look for a call-saved register that is going to be pushed. */
3596 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3597 if (pushed_regs_mask & (1 << reg))
3602 /* Thumb-2 can use high regs. */
3603 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3604 if (pushed_regs_mask & (1 << reg))
3607 /* Something went wrong - thumb_compute_save_reg_mask()
3608 should have arranged for a suitable register to be pushed. */
3612 static GTY(()) int pic_labelno;
3614 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3618 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3620 #ifndef AOF_ASSEMBLER
3621 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3622 rtx global_offset_table;
3624 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3627 gcc_assert (flag_pic);
3629 pic_reg = cfun->machine->pic_reg;
3630 if (TARGET_VXWORKS_RTP)
3632 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3633 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3634 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3636 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3638 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3639 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3643 /* We use an UNSPEC rather than a LABEL_REF because this label
3644 never appears in the code stream. */
3646 labelno = GEN_INT (pic_labelno++);
3647 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3648 l1 = gen_rtx_CONST (VOIDmode, l1);
3651 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3652 /* On the ARM the PC register contains 'dot + 8' at the time of the
3653 addition, on the Thumb it is 'dot + 4'. */
3654 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3657 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3658 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3661 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3663 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3664 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3668 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3669 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3671 else if (TARGET_THUMB2)
3673 /* Thumb-2 only allows very limited access to the PC. Calculate the
3674 address in a temporary register. */
3675 if (arm_pic_register != INVALID_REGNUM)
3677 pic_tmp = gen_rtx_REG (SImode,
3678 thumb_find_work_register (saved_regs));
3682 gcc_assert (can_create_pseudo_p ());
3683 pic_tmp = gen_reg_rtx (Pmode);
3686 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3687 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3688 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3690 else /* TARGET_THUMB1 */
3692 if (arm_pic_register != INVALID_REGNUM
3693 && REGNO (pic_reg) > LAST_LO_REGNUM)
3695 /* We will have pushed the pic register, so we should always be
3696 able to find a work register. */
3697 pic_tmp = gen_rtx_REG (SImode,
3698 thumb_find_work_register (saved_regs));
3699 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3700 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3703 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3704 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3708 /* Need to emit this whether or not we obey regdecls,
3709 since setjmp/longjmp can cause life info to screw up. */
3710 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3711 #endif /* AOF_ASSEMBLER */
3715 /* Return nonzero if X is valid as an ARM state addressing register. */
3717 arm_address_register_rtx_p (rtx x, int strict_p)
3721 if (GET_CODE (x) != REG)
3727 return ARM_REGNO_OK_FOR_BASE_P (regno);
3729 return (regno <= LAST_ARM_REGNUM
3730 || regno >= FIRST_PSEUDO_REGISTER
3731 || regno == FRAME_POINTER_REGNUM
3732 || regno == ARG_POINTER_REGNUM);
3735 /* Return TRUE if this rtx is the difference of a symbol and a label,
3736 and will reduce to a PC-relative relocation in the object file.
3737 Expressions like this can be left alone when generating PIC, rather
3738 than forced through the GOT. */
3740 pcrel_constant_p (rtx x)
3742 if (GET_CODE (x) == MINUS)
3743 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3748 /* Return nonzero if X is a valid ARM state address operand. */
3750 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3754 enum rtx_code code = GET_CODE (x);
3756 if (arm_address_register_rtx_p (x, strict_p))
3759 use_ldrd = (TARGET_LDRD
3761 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3763 if (code == POST_INC || code == PRE_DEC
3764 || ((code == PRE_INC || code == POST_DEC)
3765 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3766 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3768 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3769 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3770 && GET_CODE (XEXP (x, 1)) == PLUS
3771 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3773 rtx addend = XEXP (XEXP (x, 1), 1);
3775 /* Don't allow ldrd post increment by register because it's hard
3776 to fixup invalid register choices. */
3778 && GET_CODE (x) == POST_MODIFY
3779 && GET_CODE (addend) == REG)
3782 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3783 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3786 /* After reload constants split into minipools will have addresses
3787 from a LABEL_REF. */
3788 else if (reload_completed
3789 && (code == LABEL_REF
3791 && GET_CODE (XEXP (x, 0)) == PLUS
3792 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3793 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3796 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3799 else if (code == PLUS)
3801 rtx xop0 = XEXP (x, 0);
3802 rtx xop1 = XEXP (x, 1);
3804 return ((arm_address_register_rtx_p (xop0, strict_p)
3805 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3806 || (arm_address_register_rtx_p (xop1, strict_p)
3807 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3811 /* Reload currently can't handle MINUS, so disable this for now */
3812 else if (GET_CODE (x) == MINUS)
3814 rtx xop0 = XEXP (x, 0);
3815 rtx xop1 = XEXP (x, 1);
3817 return (arm_address_register_rtx_p (xop0, strict_p)
3818 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3822 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3823 && code == SYMBOL_REF
3824 && CONSTANT_POOL_ADDRESS_P (x)
3826 && symbol_mentioned_p (get_pool_constant (x))
3827 && ! pcrel_constant_p (get_pool_constant (x))))
3833 /* Return nonzero if X is a valid Thumb-2 address operand. */
3835 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3838 enum rtx_code code = GET_CODE (x);
3840 if (arm_address_register_rtx_p (x, strict_p))
3843 use_ldrd = (TARGET_LDRD
3845 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3847 if (code == POST_INC || code == PRE_DEC
3848 || ((code == PRE_INC || code == POST_DEC)
3849 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3850 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3852 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3853 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3854 && GET_CODE (XEXP (x, 1)) == PLUS
3855 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3857 /* Thumb-2 only has autoincrement by constant. */
3858 rtx addend = XEXP (XEXP (x, 1), 1);
3859 HOST_WIDE_INT offset;
3861 if (GET_CODE (addend) != CONST_INT)
3864 offset = INTVAL(addend);
3865 if (GET_MODE_SIZE (mode) <= 4)
3866 return (offset > -256 && offset < 256);
3868 return (use_ldrd && offset > -1024 && offset < 1024
3869 && (offset & 3) == 0);
3872 /* After reload constants split into minipools will have addresses
3873 from a LABEL_REF. */
3874 else if (reload_completed
3875 && (code == LABEL_REF
3877 && GET_CODE (XEXP (x, 0)) == PLUS
3878 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3879 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3882 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3885 else if (code == PLUS)
3887 rtx xop0 = XEXP (x, 0);
3888 rtx xop1 = XEXP (x, 1);
3890 return ((arm_address_register_rtx_p (xop0, strict_p)
3891 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3892 || (arm_address_register_rtx_p (xop1, strict_p)
3893 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3896 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3897 && code == SYMBOL_REF
3898 && CONSTANT_POOL_ADDRESS_P (x)
3900 && symbol_mentioned_p (get_pool_constant (x))
3901 && ! pcrel_constant_p (get_pool_constant (x))))
3907 /* Return nonzero if INDEX is valid for an address index operand in
3910 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3913 HOST_WIDE_INT range;
3914 enum rtx_code code = GET_CODE (index);
3916 /* Standard coprocessor addressing modes. */
3917 if (TARGET_HARD_FLOAT
3918 && (TARGET_FPA || TARGET_MAVERICK)
3919 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3920 || (TARGET_MAVERICK && mode == DImode)))
3921 return (code == CONST_INT && INTVAL (index) < 1024
3922 && INTVAL (index) > -1024
3923 && (INTVAL (index) & 3) == 0);
3926 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3927 return (code == CONST_INT
3928 && INTVAL (index) < 1016
3929 && INTVAL (index) > -1024
3930 && (INTVAL (index) & 3) == 0);
3932 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3933 return (code == CONST_INT
3934 && INTVAL (index) < 1024
3935 && INTVAL (index) > -1024
3936 && (INTVAL (index) & 3) == 0);
3938 if (arm_address_register_rtx_p (index, strict_p)
3939 && (GET_MODE_SIZE (mode) <= 4))
3942 if (mode == DImode || mode == DFmode)
3944 if (code == CONST_INT)
3946 HOST_WIDE_INT val = INTVAL (index);
3949 return val > -256 && val < 256;
3951 return val > -4096 && val < 4092;
3954 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3957 if (GET_MODE_SIZE (mode) <= 4
3960 || (mode == QImode && outer == SIGN_EXTEND))))
3964 rtx xiop0 = XEXP (index, 0);
3965 rtx xiop1 = XEXP (index, 1);
3967 return ((arm_address_register_rtx_p (xiop0, strict_p)
3968 && power_of_two_operand (xiop1, SImode))
3969 || (arm_address_register_rtx_p (xiop1, strict_p)
3970 && power_of_two_operand (xiop0, SImode)));
3972 else if (code == LSHIFTRT || code == ASHIFTRT
3973 || code == ASHIFT || code == ROTATERT)
3975 rtx op = XEXP (index, 1);
3977 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3978 && GET_CODE (op) == CONST_INT
3980 && INTVAL (op) <= 31);
3984 /* For ARM v4 we may be doing a sign-extend operation during the
3988 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3994 range = (mode == HImode) ? 4095 : 4096;
3996 return (code == CONST_INT
3997 && INTVAL (index) < range
3998 && INTVAL (index) > -range);
4001 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4002 index operand. i.e. 1, 2, 4 or 8. */
4004 thumb2_index_mul_operand (rtx op)
4008 if (GET_CODE(op) != CONST_INT)
4012 return (val == 1 || val == 2 || val == 4 || val == 8);
4015 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4017 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4019 enum rtx_code code = GET_CODE (index);
4021 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4022 /* Standard coprocessor addressing modes. */
4023 if (TARGET_HARD_FLOAT
4024 && (TARGET_FPA || TARGET_MAVERICK)
4025 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4026 || (TARGET_MAVERICK && mode == DImode)))
4027 return (code == CONST_INT && INTVAL (index) < 1024
4028 && INTVAL (index) > -1024
4029 && (INTVAL (index) & 3) == 0);
4031 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4033 /* For DImode assume values will usually live in core regs
4034 and only allow LDRD addressing modes. */
4035 if (!TARGET_LDRD || mode != DImode)
4036 return (code == CONST_INT
4037 && INTVAL (index) < 1024
4038 && INTVAL (index) > -1024
4039 && (INTVAL (index) & 3) == 0);
4043 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4044 return (code == CONST_INT
4045 && INTVAL (index) < 1016
4046 && INTVAL (index) > -1024
4047 && (INTVAL (index) & 3) == 0);
4049 if (arm_address_register_rtx_p (index, strict_p)
4050 && (GET_MODE_SIZE (mode) <= 4))
4053 if (mode == DImode || mode == DFmode)
4055 HOST_WIDE_INT val = INTVAL (index);
4056 /* ??? Can we assume ldrd for thumb2? */
4057 /* Thumb-2 ldrd only has reg+const addressing modes. */
4058 if (code != CONST_INT)
4061 /* ldrd supports offsets of +-1020.
4062 However the ldr fallback does not. */
4063 return val > -256 && val < 256 && (val & 3) == 0;
4068 rtx xiop0 = XEXP (index, 0);
4069 rtx xiop1 = XEXP (index, 1);
4071 return ((arm_address_register_rtx_p (xiop0, strict_p)
4072 && thumb2_index_mul_operand (xiop1))
4073 || (arm_address_register_rtx_p (xiop1, strict_p)
4074 && thumb2_index_mul_operand (xiop0)));
4076 else if (code == ASHIFT)
4078 rtx op = XEXP (index, 1);
4080 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4081 && GET_CODE (op) == CONST_INT
4083 && INTVAL (op) <= 3);
4086 return (code == CONST_INT
4087 && INTVAL (index) < 4096
4088 && INTVAL (index) > -256);
4091 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4093 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4097 if (GET_CODE (x) != REG)
4103 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4105 return (regno <= LAST_LO_REGNUM
4106 || regno > LAST_VIRTUAL_REGISTER
4107 || regno == FRAME_POINTER_REGNUM
4108 || (GET_MODE_SIZE (mode) >= 4
4109 && (regno == STACK_POINTER_REGNUM
4110 || regno >= FIRST_PSEUDO_REGISTER
4111 || x == hard_frame_pointer_rtx
4112 || x == arg_pointer_rtx)));
4115 /* Return nonzero if x is a legitimate index register. This is the case
4116 for any base register that can access a QImode object. */
4118 thumb1_index_register_rtx_p (rtx x, int strict_p)
4120 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4123 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4125 The AP may be eliminated to either the SP or the FP, so we use the
4126 least common denominator, e.g. SImode, and offsets from 0 to 64.
4128 ??? Verify whether the above is the right approach.
4130 ??? Also, the FP may be eliminated to the SP, so perhaps that
4131 needs special handling also.
4133 ??? Look at how the mips16 port solves this problem. It probably uses
4134 better ways to solve some of these problems.
4136 Although it is not incorrect, we don't accept QImode and HImode
4137 addresses based on the frame pointer or arg pointer until the
4138 reload pass starts. This is so that eliminating such addresses
4139 into stack based ones won't produce impossible code. */
4141 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4143 /* ??? Not clear if this is right. Experiment. */
4144 if (GET_MODE_SIZE (mode) < 4
4145 && !(reload_in_progress || reload_completed)
4146 && (reg_mentioned_p (frame_pointer_rtx, x)
4147 || reg_mentioned_p (arg_pointer_rtx, x)
4148 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4149 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4150 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4151 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4154 /* Accept any base register. SP only in SImode or larger. */
4155 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4158 /* This is PC relative data before arm_reorg runs. */
4159 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4160 && GET_CODE (x) == SYMBOL_REF
4161 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4164 /* This is PC relative data after arm_reorg runs. */
4165 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4166 && (GET_CODE (x) == LABEL_REF
4167 || (GET_CODE (x) == CONST
4168 && GET_CODE (XEXP (x, 0)) == PLUS
4169 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4170 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4173 /* Post-inc indexing only supported for SImode and larger. */
4174 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4175 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4178 else if (GET_CODE (x) == PLUS)
4180 /* REG+REG address can be any two index registers. */
4181 /* We disallow FRAME+REG addressing since we know that FRAME
4182 will be replaced with STACK, and SP relative addressing only
4183 permits SP+OFFSET. */
4184 if (GET_MODE_SIZE (mode) <= 4
4185 && XEXP (x, 0) != frame_pointer_rtx
4186 && XEXP (x, 1) != frame_pointer_rtx
4187 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4188 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4191 /* REG+const has 5-7 bit offset for non-SP registers. */
4192 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4193 || XEXP (x, 0) == arg_pointer_rtx)
4194 && GET_CODE (XEXP (x, 1)) == CONST_INT
4195 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4198 /* REG+const has 10-bit offset for SP, but only SImode and
4199 larger is supported. */
4200 /* ??? Should probably check for DI/DFmode overflow here
4201 just like GO_IF_LEGITIMATE_OFFSET does. */
4202 else if (GET_CODE (XEXP (x, 0)) == REG
4203 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4204 && GET_MODE_SIZE (mode) >= 4
4205 && GET_CODE (XEXP (x, 1)) == CONST_INT
4206 && INTVAL (XEXP (x, 1)) >= 0
4207 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4208 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4211 else if (GET_CODE (XEXP (x, 0)) == REG
4212 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4213 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4214 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4215 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4216 && GET_MODE_SIZE (mode) >= 4
4217 && GET_CODE (XEXP (x, 1)) == CONST_INT
4218 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4222 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4223 && GET_MODE_SIZE (mode) == 4
4224 && GET_CODE (x) == SYMBOL_REF
4225 && CONSTANT_POOL_ADDRESS_P (x)
4227 && symbol_mentioned_p (get_pool_constant (x))
4228 && ! pcrel_constant_p (get_pool_constant (x))))
4234 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4235 instruction of mode MODE. */
4237 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4239 switch (GET_MODE_SIZE (mode))
4242 return val >= 0 && val < 32;
4245 return val >= 0 && val < 64 && (val & 1) == 0;
4249 && (val + GET_MODE_SIZE (mode)) <= 128
4254 /* Build the SYMBOL_REF for __tls_get_addr. */
4256 static GTY(()) rtx tls_get_addr_libfunc;
4259 get_tls_get_addr (void)
4261 if (!tls_get_addr_libfunc)
4262 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4263 return tls_get_addr_libfunc;
4267 arm_load_tp (rtx target)
4270 target = gen_reg_rtx (SImode);
4274 /* Can return in any reg. */
4275 emit_insn (gen_load_tp_hard (target));
4279 /* Always returned in r0. Immediately copy the result into a pseudo,
4280 otherwise other uses of r0 (e.g. setting up function arguments) may
4281 clobber the value. */
4285 emit_insn (gen_load_tp_soft ());
4287 tmp = gen_rtx_REG (SImode, 0);
4288 emit_move_insn (target, tmp);
4294 load_tls_operand (rtx x, rtx reg)
4298 if (reg == NULL_RTX)
4299 reg = gen_reg_rtx (SImode);
4301 tmp = gen_rtx_CONST (SImode, x);
4303 emit_move_insn (reg, tmp);
4309 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4311 rtx insns, label, labelno, sum;
4315 labelno = GEN_INT (pic_labelno++);
4316 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4317 label = gen_rtx_CONST (VOIDmode, label);
4319 sum = gen_rtx_UNSPEC (Pmode,
4320 gen_rtvec (4, x, GEN_INT (reloc), label,
4321 GEN_INT (TARGET_ARM ? 8 : 4)),
4323 reg = load_tls_operand (sum, reg);
4326 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4327 else if (TARGET_THUMB2)
4330 /* Thumb-2 only allows very limited access to the PC. Calculate
4331 the address in a temporary register. */
4332 tmp = gen_reg_rtx (SImode);
4333 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4334 emit_insn (gen_addsi3(reg, reg, tmp));
4336 else /* TARGET_THUMB1 */
4337 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4339 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4340 Pmode, 1, reg, Pmode);
4342 insns = get_insns ();
4349 legitimize_tls_address (rtx x, rtx reg)
4351 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4352 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4356 case TLS_MODEL_GLOBAL_DYNAMIC:
4357 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4358 dest = gen_reg_rtx (Pmode);
4359 emit_libcall_block (insns, dest, ret, x);
4362 case TLS_MODEL_LOCAL_DYNAMIC:
4363 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4365 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4366 share the LDM result with other LD model accesses. */
4367 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4369 dest = gen_reg_rtx (Pmode);
4370 emit_libcall_block (insns, dest, ret, eqv);
4372 /* Load the addend. */
4373 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4375 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4376 return gen_rtx_PLUS (Pmode, dest, addend);
4378 case TLS_MODEL_INITIAL_EXEC:
4379 labelno = GEN_INT (pic_labelno++);
4380 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4381 label = gen_rtx_CONST (VOIDmode, label);
4382 sum = gen_rtx_UNSPEC (Pmode,
4383 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4384 GEN_INT (TARGET_ARM ? 8 : 4)),
4386 reg = load_tls_operand (sum, reg);
4389 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4390 else if (TARGET_THUMB2)
4393 /* Thumb-2 only allows very limited access to the PC. Calculate
4394 the address in a temporary register. */
4395 tmp = gen_reg_rtx (SImode);
4396 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4397 emit_insn (gen_addsi3(reg, reg, tmp));
4398 emit_move_insn (reg, gen_const_mem (SImode, reg));
4402 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4403 emit_move_insn (reg, gen_const_mem (SImode, reg));
4406 tp = arm_load_tp (NULL_RTX);
4408 return gen_rtx_PLUS (Pmode, tp, reg);
4410 case TLS_MODEL_LOCAL_EXEC:
4411 tp = arm_load_tp (NULL_RTX);
4413 reg = gen_rtx_UNSPEC (Pmode,
4414 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4416 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4418 return gen_rtx_PLUS (Pmode, tp, reg);
4425 /* Try machine-dependent ways of modifying an illegitimate address
4426 to be legitimate. If we find one, return the new, valid address. */
4428 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4430 if (arm_tls_symbol_p (x))
4431 return legitimize_tls_address (x, NULL_RTX);
4433 if (GET_CODE (x) == PLUS)
4435 rtx xop0 = XEXP (x, 0);
4436 rtx xop1 = XEXP (x, 1);
4438 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4439 xop0 = force_reg (SImode, xop0);
4441 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4442 xop1 = force_reg (SImode, xop1);
4444 if (ARM_BASE_REGISTER_RTX_P (xop0)
4445 && GET_CODE (xop1) == CONST_INT)
4447 HOST_WIDE_INT n, low_n;
4451 /* VFP addressing modes actually allow greater offsets, but for
4452 now we just stick with the lowest common denominator. */
4454 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4466 low_n = ((mode) == TImode ? 0
4467 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4471 base_reg = gen_reg_rtx (SImode);
4472 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4473 emit_move_insn (base_reg, val);
4474 x = plus_constant (base_reg, low_n);
4476 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4477 x = gen_rtx_PLUS (SImode, xop0, xop1);
4480 /* XXX We don't allow MINUS any more -- see comment in
4481 arm_legitimate_address_p (). */
4482 else if (GET_CODE (x) == MINUS)
4484 rtx xop0 = XEXP (x, 0);
4485 rtx xop1 = XEXP (x, 1);
4487 if (CONSTANT_P (xop0))
4488 xop0 = force_reg (SImode, xop0);
4490 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4491 xop1 = force_reg (SImode, xop1);
4493 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4494 x = gen_rtx_MINUS (SImode, xop0, xop1);
4497 /* Make sure to take full advantage of the pre-indexed addressing mode
4498 with absolute addresses which often allows for the base register to
4499 be factorized for multiple adjacent memory references, and it might
4500 even allows for the mini pool to be avoided entirely. */
4501 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4504 HOST_WIDE_INT mask, base, index;
4507 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4508 use a 8-bit index. So let's use a 12-bit index for SImode only and
4509 hope that arm_gen_constant will enable ldrb to use more bits. */
4510 bits = (mode == SImode) ? 12 : 8;
4511 mask = (1 << bits) - 1;
4512 base = INTVAL (x) & ~mask;
4513 index = INTVAL (x) & mask;
4514 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4516 /* It'll most probably be more efficient to generate the base
4517 with more bits set and use a negative index instead. */
4521 base_reg = force_reg (SImode, GEN_INT (base));
4522 x = plus_constant (base_reg, index);
4527 /* We need to find and carefully transform any SYMBOL and LABEL
4528 references; so go back to the original address expression. */
4529 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4531 if (new_x != orig_x)
4539 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4540 to be legitimate. If we find one, return the new, valid address. */
4542 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4544 if (arm_tls_symbol_p (x))
4545 return legitimize_tls_address (x, NULL_RTX);
4547 if (GET_CODE (x) == PLUS
4548 && GET_CODE (XEXP (x, 1)) == CONST_INT
4549 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4550 || INTVAL (XEXP (x, 1)) < 0))
4552 rtx xop0 = XEXP (x, 0);
4553 rtx xop1 = XEXP (x, 1);
4554 HOST_WIDE_INT offset = INTVAL (xop1);
4556 /* Try and fold the offset into a biasing of the base register and
4557 then offsetting that. Don't do this when optimizing for space
4558 since it can cause too many CSEs. */
4559 if (optimize_size && offset >= 0
4560 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4562 HOST_WIDE_INT delta;
4565 delta = offset - (256 - GET_MODE_SIZE (mode));
4566 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4567 delta = 31 * GET_MODE_SIZE (mode);
4569 delta = offset & (~31 * GET_MODE_SIZE (mode));
4571 xop0 = force_operand (plus_constant (xop0, offset - delta),
4573 x = plus_constant (xop0, delta);
4575 else if (offset < 0 && offset > -256)
4576 /* Small negative offsets are best done with a subtract before the
4577 dereference, forcing these into a register normally takes two
4579 x = force_operand (x, NULL_RTX);
4582 /* For the remaining cases, force the constant into a register. */
4583 xop1 = force_reg (SImode, xop1);
4584 x = gen_rtx_PLUS (SImode, xop0, xop1);
4587 else if (GET_CODE (x) == PLUS
4588 && s_register_operand (XEXP (x, 1), SImode)
4589 && !s_register_operand (XEXP (x, 0), SImode))
4591 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4593 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4598 /* We need to find and carefully transform any SYMBOL and LABEL
4599 references; so go back to the original address expression. */
4600 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4602 if (new_x != orig_x)
4610 thumb_legitimize_reload_address (rtx *x_p,
4611 enum machine_mode mode,
4612 int opnum, int type,
4613 int ind_levels ATTRIBUTE_UNUSED)
4617 if (GET_CODE (x) == PLUS
4618 && GET_MODE_SIZE (mode) < 4
4619 && REG_P (XEXP (x, 0))
4620 && XEXP (x, 0) == stack_pointer_rtx
4621 && GET_CODE (XEXP (x, 1)) == CONST_INT
4622 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4627 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4628 Pmode, VOIDmode, 0, 0, opnum, type);
4632 /* If both registers are hi-regs, then it's better to reload the
4633 entire expression rather than each register individually. That
4634 only requires one reload register rather than two. */
4635 if (GET_CODE (x) == PLUS
4636 && REG_P (XEXP (x, 0))
4637 && REG_P (XEXP (x, 1))
4638 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4639 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4644 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4645 Pmode, VOIDmode, 0, 0, opnum, type);
4652 /* Test for various thread-local symbols. */
4654 /* Return TRUE if X is a thread-local symbol. */
4657 arm_tls_symbol_p (rtx x)
4659 if (! TARGET_HAVE_TLS)
4662 if (GET_CODE (x) != SYMBOL_REF)
4665 return SYMBOL_REF_TLS_MODEL (x) != 0;
4668 /* Helper for arm_tls_referenced_p. */
4671 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4673 if (GET_CODE (*x) == SYMBOL_REF)
4674 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4676 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4677 TLS offsets, not real symbol references. */
4678 if (GET_CODE (*x) == UNSPEC
4679 && XINT (*x, 1) == UNSPEC_TLS)
4685 /* Return TRUE if X contains any TLS symbol references. */
4688 arm_tls_referenced_p (rtx x)
4690 if (! TARGET_HAVE_TLS)
4693 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4696 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4699 arm_cannot_force_const_mem (rtx x)
4703 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4705 split_const (x, &base, &offset);
4706 if (GET_CODE (base) == SYMBOL_REF
4707 && !offset_within_block_p (base, INTVAL (offset)))
4710 return arm_tls_referenced_p (x);
4713 #define REG_OR_SUBREG_REG(X) \
4714 (GET_CODE (X) == REG \
4715 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4717 #define REG_OR_SUBREG_RTX(X) \
4718 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4720 #ifndef COSTS_N_INSNS
4721 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4724 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4726 enum machine_mode mode = GET_MODE (x);
4739 return COSTS_N_INSNS (1);
4742 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4745 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4752 return COSTS_N_INSNS (2) + cycles;
4754 return COSTS_N_INSNS (1) + 16;
4757 return (COSTS_N_INSNS (1)
4758 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4759 + GET_CODE (SET_DEST (x)) == MEM));
4764 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4766 if (thumb_shiftable_const (INTVAL (x)))
4767 return COSTS_N_INSNS (2);
4768 return COSTS_N_INSNS (3);
4770 else if ((outer == PLUS || outer == COMPARE)
4771 && INTVAL (x) < 256 && INTVAL (x) > -256)
4773 else if (outer == AND
4774 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4775 return COSTS_N_INSNS (1);
4776 else if (outer == ASHIFT || outer == ASHIFTRT
4777 || outer == LSHIFTRT)
4779 return COSTS_N_INSNS (2);
4785 return COSTS_N_INSNS (3);
4803 /* XXX another guess. */
4804 /* Memory costs quite a lot for the first word, but subsequent words
4805 load at the equivalent of a single insn each. */
4806 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4807 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4812 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4817 /* XXX still guessing. */
4818 switch (GET_MODE (XEXP (x, 0)))
4821 return (1 + (mode == DImode ? 4 : 0)
4822 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4825 return (4 + (mode == DImode ? 4 : 0)
4826 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4829 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4841 /* Worker routine for arm_rtx_costs. */
4842 /* ??? This needs updating for thumb2. */
4844 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4846 enum machine_mode mode = GET_MODE (x);
4847 enum rtx_code subcode;
4853 /* Memory costs quite a lot for the first word, but subsequent words
4854 load at the equivalent of a single insn each. */
4855 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4856 + (GET_CODE (x) == SYMBOL_REF
4857 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4863 return optimize_size ? COSTS_N_INSNS (2) : 100;
4866 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4873 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4875 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4876 + ((GET_CODE (XEXP (x, 0)) == REG
4877 || (GET_CODE (XEXP (x, 0)) == SUBREG
4878 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4880 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4881 || (GET_CODE (XEXP (x, 0)) == SUBREG
4882 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4884 + ((GET_CODE (XEXP (x, 1)) == REG
4885 || (GET_CODE (XEXP (x, 1)) == SUBREG
4886 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4887 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4891 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4893 extra_cost = rtx_cost (XEXP (x, 1), code);
4894 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4895 extra_cost += 4 * ARM_NUM_REGS (mode);
4900 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4901 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4902 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4903 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4906 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4907 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4908 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4909 && arm_const_double_rtx (XEXP (x, 1))))
4911 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4912 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4913 && arm_const_double_rtx (XEXP (x, 0))))
4916 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4917 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4918 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4919 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4920 || subcode == ASHIFTRT || subcode == LSHIFTRT
4921 || subcode == ROTATE || subcode == ROTATERT
4923 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4924 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4925 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4926 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4927 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4928 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4929 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4934 if (GET_CODE (XEXP (x, 0)) == MULT)
4936 extra_cost = rtx_cost (XEXP (x, 0), code);
4937 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4938 extra_cost += 4 * ARM_NUM_REGS (mode);
4942 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4943 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4944 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4945 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4946 && arm_const_double_rtx (XEXP (x, 1))))
4950 case AND: case XOR: case IOR:
4953 /* Normally the frame registers will be spilt into reg+const during
4954 reload, so it is a bad idea to combine them with other instructions,
4955 since then they might not be moved outside of loops. As a compromise
4956 we allow integration with ops that have a constant as their second
4958 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4959 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4960 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4961 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4962 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4966 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4967 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4968 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4969 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4972 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4973 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4974 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4975 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4976 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4979 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4980 return (1 + extra_cost
4981 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4982 || subcode == LSHIFTRT || subcode == ASHIFTRT
4983 || subcode == ROTATE || subcode == ROTATERT
4985 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4986 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4987 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4988 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4989 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4990 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4996 /* This should have been handled by the CPU specific routines. */
5000 if (arm_arch3m && mode == SImode
5001 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5002 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5003 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5004 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5005 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5006 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5011 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5012 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5016 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5018 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5021 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5029 return 4 + (mode == DImode ? 4 : 0);
5032 /* ??? value extensions are cheaper on armv6. */
5033 if (GET_MODE (XEXP (x, 0)) == QImode)
5034 return (4 + (mode == DImode ? 4 : 0)
5035 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5038 switch (GET_MODE (XEXP (x, 0)))
5041 return (1 + (mode == DImode ? 4 : 0)
5042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5045 return (4 + (mode == DImode ? 4 : 0)
5046 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5049 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5064 if (const_ok_for_arm (INTVAL (x)))
5065 return outer == SET ? 2 : -1;
5066 else if (outer == AND
5067 && const_ok_for_arm (~INTVAL (x)))
5069 else if ((outer == COMPARE
5070 || outer == PLUS || outer == MINUS)
5071 && const_ok_for_arm (-INTVAL (x)))
5082 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5083 return outer == SET ? 2 : -1;
5084 else if ((outer == COMPARE || outer == PLUS)
5085 && neg_const_double_rtx_ok_for_fpa (x))
5094 /* RTX costs when optimizing for size. */
5096 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5098 enum machine_mode mode = GET_MODE (x);
5102 /* XXX TBD. For now, use the standard costs. */
5103 *total = thumb1_rtx_costs (x, code, outer_code);
5110 /* A memory access costs 1 insn if the mode is small, or the address is
5111 a single register, otherwise it costs one insn per word. */
5112 if (REG_P (XEXP (x, 0)))
5113 *total = COSTS_N_INSNS (1);
5115 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5122 /* Needs a libcall, so it costs about this. */
5123 *total = COSTS_N_INSNS (2);
5127 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5129 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5137 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5139 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5142 else if (mode == SImode)
5144 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5145 /* Slightly disparage register shifts, but not by much. */
5146 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5147 *total += 1 + rtx_cost (XEXP (x, 1), code);
5151 /* Needs a libcall. */
5152 *total = COSTS_N_INSNS (2);
5156 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5158 *total = COSTS_N_INSNS (1);
5164 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5165 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5167 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5168 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5169 || subcode1 == ROTATE || subcode1 == ROTATERT
5170 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5171 || subcode1 == ASHIFTRT)
5173 /* It's just the cost of the two operands. */
5178 *total = COSTS_N_INSNS (1);
5182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5186 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5188 *total = COSTS_N_INSNS (1);
5193 case AND: case XOR: case IOR:
5196 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5198 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5199 || subcode == LSHIFTRT || subcode == ASHIFTRT
5200 || (code == AND && subcode == NOT))
5202 /* It's just the cost of the two operands. */
5208 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5212 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5216 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5217 *total = COSTS_N_INSNS (1);
5220 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5229 if (cc_register (XEXP (x, 0), VOIDmode))
5232 *total = COSTS_N_INSNS (1);
5236 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5237 *total = COSTS_N_INSNS (1);
5239 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5244 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5246 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5247 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5250 *total += COSTS_N_INSNS (1);
5255 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5257 switch (GET_MODE (XEXP (x, 0)))
5260 *total += COSTS_N_INSNS (1);
5264 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5270 *total += COSTS_N_INSNS (2);
5275 *total += COSTS_N_INSNS (1);
5280 if (const_ok_for_arm (INTVAL (x)))
5281 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5282 else if (const_ok_for_arm (~INTVAL (x)))
5283 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5284 else if (const_ok_for_arm (-INTVAL (x)))
5286 if (outer_code == COMPARE || outer_code == PLUS
5287 || outer_code == MINUS)
5290 *total = COSTS_N_INSNS (1);
5293 *total = COSTS_N_INSNS (2);
5299 *total = COSTS_N_INSNS (2);
5303 *total = COSTS_N_INSNS (4);
5307 if (mode != VOIDmode)
5308 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5310 *total = COSTS_N_INSNS (4); /* How knows? */
5315 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5316 supported on any "slowmul" cores, so it can be ignored. */
5319 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5321 enum machine_mode mode = GET_MODE (x);
5325 *total = thumb1_rtx_costs (x, code, outer_code);
5332 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5339 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5341 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5342 & (unsigned HOST_WIDE_INT) 0xffffffff);
5343 int cost, const_ok = const_ok_for_arm (i);
5344 int j, booth_unit_size;
5346 /* Tune as appropriate. */
5347 cost = const_ok ? 4 : 8;
5348 booth_unit_size = 2;
5349 for (j = 0; i && j < 32; j += booth_unit_size)
5351 i >>= booth_unit_size;
5359 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5360 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5364 *total = arm_rtx_costs_1 (x, code, outer_code);
5370 /* RTX cost for cores with a fast multiply unit (M variants). */
5373 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5375 enum machine_mode mode = GET_MODE (x);
5379 *total = thumb1_rtx_costs (x, code, outer_code);
5383 /* ??? should thumb2 use different costs? */
5387 /* There is no point basing this on the tuning, since it is always the
5388 fast variant if it exists at all. */
5390 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5391 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5392 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5399 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5406 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5408 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5409 & (unsigned HOST_WIDE_INT) 0xffffffff);
5410 int cost, const_ok = const_ok_for_arm (i);
5411 int j, booth_unit_size;
5413 /* Tune as appropriate. */
5414 cost = const_ok ? 4 : 8;
5415 booth_unit_size = 8;
5416 for (j = 0; i && j < 32; j += booth_unit_size)
5418 i >>= booth_unit_size;
5426 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5427 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5431 *total = arm_rtx_costs_1 (x, code, outer_code);
5437 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5438 so it can be ignored. */
5441 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5443 enum machine_mode mode = GET_MODE (x);
5447 *total = thumb1_rtx_costs (x, code, outer_code);
5454 /* There is no point basing this on the tuning, since it is always the
5455 fast variant if it exists at all. */
5457 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5458 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5459 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5466 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5473 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5475 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5476 & (unsigned HOST_WIDE_INT) 0xffffffff);
5477 int cost, const_ok = const_ok_for_arm (i);
5478 unsigned HOST_WIDE_INT masked_const;
5480 /* The cost will be related to two insns.
5481 First a load of the constant (MOV or LDR), then a multiply. */
5484 cost += 1; /* LDR is probably more expensive because
5485 of longer result latency. */
5486 masked_const = i & 0xffff8000;
5487 if (masked_const != 0 && masked_const != 0xffff8000)
5489 masked_const = i & 0xf8000000;
5490 if (masked_const == 0 || masked_const == 0xf8000000)
5499 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5500 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5504 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5505 will stall until the multiplication is complete. */
5506 if (GET_CODE (XEXP (x, 0)) == MULT)
5507 *total = 4 + rtx_cost (XEXP (x, 0), code);
5509 *total = arm_rtx_costs_1 (x, code, outer_code);
5513 *total = arm_rtx_costs_1 (x, code, outer_code);
5519 /* RTX costs for 9e (and later) cores. */
5522 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5524 enum machine_mode mode = GET_MODE (x);
5533 *total = COSTS_N_INSNS (3);
5537 *total = thumb1_rtx_costs (x, code, outer_code);
5545 /* There is no point basing this on the tuning, since it is always the
5546 fast variant if it exists at all. */
5548 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5549 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5550 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5574 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5575 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5579 *total = arm_rtx_costs_1 (x, code, outer_code);
5583 /* All address computations that can be done are free, but rtx cost returns
5584 the same for practically all of them. So we weight the different types
5585 of address here in the order (most pref first):
5586 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5588 arm_arm_address_cost (rtx x)
5590 enum rtx_code c = GET_CODE (x);
5592 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5594 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5597 if (c == PLUS || c == MINUS)
5599 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5602 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5612 arm_thumb_address_cost (rtx x)
5614 enum rtx_code c = GET_CODE (x);
5619 && GET_CODE (XEXP (x, 0)) == REG
5620 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5627 arm_address_cost (rtx x)
5629 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5633 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5637 /* Some true dependencies can have a higher cost depending
5638 on precisely how certain input operands are used. */
5640 && REG_NOTE_KIND (link) == 0
5641 && recog_memoized (insn) >= 0
5642 && recog_memoized (dep) >= 0)
5644 int shift_opnum = get_attr_shift (insn);
5645 enum attr_type attr_type = get_attr_type (dep);
5647 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5648 operand for INSN. If we have a shifted input operand and the
5649 instruction we depend on is another ALU instruction, then we may
5650 have to account for an additional stall. */
5651 if (shift_opnum != 0
5652 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5654 rtx shifted_operand;
5657 /* Get the shifted operand. */
5658 extract_insn (insn);
5659 shifted_operand = recog_data.operand[shift_opnum];
5661 /* Iterate over all the operands in DEP. If we write an operand
5662 that overlaps with SHIFTED_OPERAND, then we have increase the
5663 cost of this dependency. */
5665 preprocess_constraints ();
5666 for (opno = 0; opno < recog_data.n_operands; opno++)
5668 /* We can ignore strict inputs. */
5669 if (recog_data.operand_type[opno] == OP_IN)
5672 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5679 /* XXX This is not strictly true for the FPA. */
5680 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5681 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5684 /* Call insns don't incur a stall, even if they follow a load. */
5685 if (REG_NOTE_KIND (link) == 0
5686 && GET_CODE (insn) == CALL_INSN)
5689 if ((i_pat = single_set (insn)) != NULL
5690 && GET_CODE (SET_SRC (i_pat)) == MEM
5691 && (d_pat = single_set (dep)) != NULL
5692 && GET_CODE (SET_DEST (d_pat)) == MEM)
5694 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5695 /* This is a load after a store, there is no conflict if the load reads
5696 from a cached area. Assume that loads from the stack, and from the
5697 constant pool are cached, and that others will miss. This is a
5700 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5701 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5702 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5703 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5710 static int fp_consts_inited = 0;
5712 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5713 static const char * const strings_fp[8] =
5716 "4", "5", "0.5", "10"
5719 static REAL_VALUE_TYPE values_fp[8];
5722 init_fp_table (void)
5728 fp_consts_inited = 1;
5730 fp_consts_inited = 8;
5732 for (i = 0; i < fp_consts_inited; i++)
5734 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5739 /* Return TRUE if rtx X is a valid immediate FP constant. */
5741 arm_const_double_rtx (rtx x)
5746 if (!fp_consts_inited)
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5750 if (REAL_VALUE_MINUS_ZERO (r))
5753 for (i = 0; i < fp_consts_inited; i++)
5754 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5760 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5762 neg_const_double_rtx_ok_for_fpa (rtx x)
5767 if (!fp_consts_inited)
5770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5771 r = REAL_VALUE_NEGATE (r);
5772 if (REAL_VALUE_MINUS_ZERO (r))
5775 for (i = 0; i < 8; i++)
5776 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5783 /* VFPv3 has a fairly wide range of representable immediates, formed from
5784 "quarter-precision" floating-point values. These can be evaluated using this
5785 formula (with ^ for exponentiation):
5789 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5790 16 <= n <= 31 and 0 <= r <= 7.
5792 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5794 - A (most-significant) is the sign bit.
5795 - BCD are the exponent (encoded as r XOR 3).
5796 - EFGH are the mantissa (encoded as n - 16).
5799 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5800 fconst[sd] instruction, or -1 if X isn't suitable. */
5802 vfp3_const_double_index (rtx x)
5804 REAL_VALUE_TYPE r, m;
5806 unsigned HOST_WIDE_INT mantissa, mant_hi;
5807 unsigned HOST_WIDE_INT mask;
5808 HOST_WIDE_INT m1, m2;
5809 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5811 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5814 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5816 /* We can't represent these things, so detect them first. */
5817 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5820 /* Extract sign, exponent and mantissa. */
5821 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5822 r = REAL_VALUE_ABS (r);
5823 exponent = REAL_EXP (&r);
5824 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5825 highest (sign) bit, with a fixed binary point at bit point_pos.
5826 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5827 bits for the mantissa, this may fail (low bits would be lost). */
5828 real_ldexp (&m, &r, point_pos - exponent);
5829 REAL_VALUE_TO_INT (&m1, &m2, m);
5833 /* If there are bits set in the low part of the mantissa, we can't
5834 represent this value. */
5838 /* Now make it so that mantissa contains the most-significant bits, and move
5839 the point_pos to indicate that the least-significant bits have been
5841 point_pos -= HOST_BITS_PER_WIDE_INT;
5844 /* We can permit four significant bits of mantissa only, plus a high bit
5845 which is always 1. */
5846 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5847 if ((mantissa & mask) != 0)
5850 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5851 mantissa >>= point_pos - 5;
5853 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5854 floating-point immediate zero with Neon using an integer-zero load, but
5855 that case is handled elsewhere.) */
5859 gcc_assert (mantissa >= 16 && mantissa <= 31);
5861 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5862 normalized significands are in the range [1, 2). (Our mantissa is shifted
5863 left 4 places at this point relative to normalized IEEE754 values). GCC
5864 internally uses [0.5, 1) (see real.c), so the exponent returned from
5865 REAL_EXP must be altered. */
5866 exponent = 5 - exponent;
5868 if (exponent < 0 || exponent > 7)
5871 /* Sign, mantissa and exponent are now in the correct form to plug into the
5872 formulae described in the comment above. */
5873 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5876 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5878 vfp3_const_double_rtx (rtx x)
5883 return vfp3_const_double_index (x) != -1;
5886 /* Recognize immediates which can be used in various Neon instructions. Legal
5887 immediates are described by the following table (for VMVN variants, the
5888 bitwise inverse of the constant shown is recognized. In either case, VMOV
5889 is output and the correct instruction to use for a given constant is chosen
5890 by the assembler). The constant shown is replicated across all elements of
5891 the destination vector.
5893 insn elems variant constant (binary)
5894 ---- ----- ------- -----------------
5895 vmov i32 0 00000000 00000000 00000000 abcdefgh
5896 vmov i32 1 00000000 00000000 abcdefgh 00000000
5897 vmov i32 2 00000000 abcdefgh 00000000 00000000
5898 vmov i32 3 abcdefgh 00000000 00000000 00000000
5899 vmov i16 4 00000000 abcdefgh
5900 vmov i16 5 abcdefgh 00000000
5901 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5902 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5903 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5904 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5905 vmvn i16 10 00000000 abcdefgh
5906 vmvn i16 11 abcdefgh 00000000
5907 vmov i32 12 00000000 00000000 abcdefgh 11111111
5908 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5909 vmov i32 14 00000000 abcdefgh 11111111 11111111
5910 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5912 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5913 eeeeeeee ffffffff gggggggg hhhhhhhh
5914 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5916 For case 18, B = !b. Representable values are exactly those accepted by
5917 vfp3_const_double_index, but are output as floating-point numbers rather
5920 Variants 0-5 (inclusive) may also be used as immediates for the second
5921 operand of VORR/VBIC instructions.
5923 The INVERSE argument causes the bitwise inverse of the given operand to be
5924 recognized instead (used for recognizing legal immediates for the VAND/VORN
5925 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5926 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5927 output, rather than the real insns vbic/vorr).
5929 INVERSE makes no difference to the recognition of float vectors.
5931 The return value is the variant of immediate as shown in the above table, or
5932 -1 if the given value doesn't match any of the listed patterns.
5935 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5936 rtx *modconst, int *elementwidth)
5938 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5940 for (i = 0; i < idx; i += (STRIDE)) \
5945 immtype = (CLASS); \
5946 elsize = (ELSIZE); \
5950 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5951 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5952 unsigned char bytes[16];
5953 int immtype = -1, matches;
5954 unsigned int invmask = inverse ? 0xff : 0;
5956 /* Vectors of float constants. */
5957 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5959 rtx el0 = CONST_VECTOR_ELT (op, 0);
5962 if (!vfp3_const_double_rtx (el0))
5965 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5967 for (i = 1; i < n_elts; i++)
5969 rtx elt = CONST_VECTOR_ELT (op, i);
5972 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5974 if (!REAL_VALUES_EQUAL (r0, re))
5979 *modconst = CONST_VECTOR_ELT (op, 0);
5987 /* Splat vector constant out into a byte vector. */
5988 for (i = 0; i < n_elts; i++)
5990 rtx el = CONST_VECTOR_ELT (op, i);
5991 unsigned HOST_WIDE_INT elpart;
5992 unsigned int part, parts;
5994 if (GET_CODE (el) == CONST_INT)
5996 elpart = INTVAL (el);
5999 else if (GET_CODE (el) == CONST_DOUBLE)
6001 elpart = CONST_DOUBLE_LOW (el);
6007 for (part = 0; part < parts; part++)
6010 for (byte = 0; byte < innersize; byte++)
6012 bytes[idx++] = (elpart & 0xff) ^ invmask;
6013 elpart >>= BITS_PER_UNIT;
6015 if (GET_CODE (el) == CONST_DOUBLE)
6016 elpart = CONST_DOUBLE_HIGH (el);
6021 gcc_assert (idx == GET_MODE_SIZE (mode));
6025 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6026 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6028 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6029 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6031 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6034 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6035 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6037 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6039 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6041 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6042 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6044 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6045 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6047 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6048 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6050 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6051 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6053 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6055 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6057 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6058 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6060 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6061 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6063 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6064 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6066 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6069 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6071 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6072 && bytes[i] == bytes[(i + 8) % idx]);
6080 *elementwidth = elsize;
6084 unsigned HOST_WIDE_INT imm = 0;
6086 /* Un-invert bytes of recognized vector, if neccessary. */
6088 for (i = 0; i < idx; i++)
6089 bytes[i] ^= invmask;
6093 /* FIXME: Broken on 32-bit H_W_I hosts. */
6094 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6096 for (i = 0; i < 8; i++)
6097 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6098 << (i * BITS_PER_UNIT);
6100 *modconst = GEN_INT (imm);
6104 unsigned HOST_WIDE_INT imm = 0;
6106 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6107 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6109 *modconst = GEN_INT (imm);
6117 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6118 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6119 float elements), and a modified constant (whatever should be output for a
6120 VMOV) in *MODCONST. */
6123 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6124 rtx *modconst, int *elementwidth)
6128 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6134 *modconst = tmpconst;
6137 *elementwidth = tmpwidth;
6142 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6143 the immediate is valid, write a constant suitable for using as an operand
6144 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6145 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6148 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6149 rtx *modconst, int *elementwidth)
6153 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6155 if (retval < 0 || retval > 5)
6159 *modconst = tmpconst;
6162 *elementwidth = tmpwidth;
6167 /* Return a string suitable for output of Neon immediate logic operation
6171 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6172 int inverse, int quad)
6174 int width, is_valid;
6175 static char templ[40];
6177 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6179 gcc_assert (is_valid != 0);
6182 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6184 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6189 /* Output a sequence of pairwise operations to implement a reduction.
6190 NOTE: We do "too much work" here, because pairwise operations work on two
6191 registers-worth of operands in one go. Unfortunately we can't exploit those
6192 extra calculations to do the full operation in fewer steps, I don't think.
6193 Although all vector elements of the result but the first are ignored, we
6194 actually calculate the same result in each of the elements. An alternative
6195 such as initially loading a vector with zero to use as each of the second
6196 operands would use up an additional register and take an extra instruction,
6197 for no particular gain. */
6200 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6201 rtx (*reduc) (rtx, rtx, rtx))
6203 enum machine_mode inner = GET_MODE_INNER (mode);
6204 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6207 for (i = parts / 2; i >= 1; i /= 2)
6209 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6210 emit_insn (reduc (dest, tmpsum, tmpsum));
6215 /* Initialise a vector with non-constant elements. FIXME: We can do better
6216 than the current implementation (building a vector on the stack and then
6217 loading it) in many cases. See rs6000.c. */
6220 neon_expand_vector_init (rtx target, rtx vals)
6222 enum machine_mode mode = GET_MODE (target);
6223 enum machine_mode inner = GET_MODE_INNER (mode);
6224 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6227 gcc_assert (VECTOR_MODE_P (mode));
6229 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6230 for (i = 0; i < n_elts; i++)
6231 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6232 XVECEXP (vals, 0, i));
6234 emit_move_insn (target, mem);
6237 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6238 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6239 reported source locations are bogus. */
6242 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6247 gcc_assert (GET_CODE (operand) == CONST_INT);
6249 lane = INTVAL (operand);
6251 if (lane < low || lane >= high)
6255 /* Bounds-check lanes. */
6258 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6260 bounds_check (operand, low, high, "lane out of range");
6263 /* Bounds-check constants. */
6266 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6268 bounds_check (operand, low, high, "constant out of range");
6272 neon_element_bits (enum machine_mode mode)
6275 return GET_MODE_BITSIZE (mode);
6277 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6281 /* Predicates for `match_operand' and `match_operator'. */
6283 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6285 cirrus_memory_offset (rtx op)
6287 /* Reject eliminable registers. */
6288 if (! (reload_in_progress || reload_completed)
6289 && ( reg_mentioned_p (frame_pointer_rtx, op)
6290 || reg_mentioned_p (arg_pointer_rtx, op)
6291 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6292 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6293 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6294 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6297 if (GET_CODE (op) == MEM)
6303 /* Match: (mem (reg)). */
6304 if (GET_CODE (ind) == REG)
6310 if (GET_CODE (ind) == PLUS
6311 && GET_CODE (XEXP (ind, 0)) == REG
6312 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6313 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6320 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6321 WB is true if full writeback address modes are allowed and is false
6322 if limited writeback address modes (POST_INC and PRE_DEC) are
6326 arm_coproc_mem_operand (rtx op, bool wb)
6330 /* Reject eliminable registers. */
6331 if (! (reload_in_progress || reload_completed)
6332 && ( reg_mentioned_p (frame_pointer_rtx, op)
6333 || reg_mentioned_p (arg_pointer_rtx, op)
6334 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6335 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6336 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6337 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6340 /* Constants are converted into offsets from labels. */
6341 if (GET_CODE (op) != MEM)
6346 if (reload_completed
6347 && (GET_CODE (ind) == LABEL_REF
6348 || (GET_CODE (ind) == CONST
6349 && GET_CODE (XEXP (ind, 0)) == PLUS
6350 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6351 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6354 /* Match: (mem (reg)). */
6355 if (GET_CODE (ind) == REG)
6356 return arm_address_register_rtx_p (ind, 0);
6358 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6359 acceptable in any case (subject to verification by
6360 arm_address_register_rtx_p). We need WB to be true to accept
6361 PRE_INC and POST_DEC. */
6362 if (GET_CODE (ind) == POST_INC
6363 || GET_CODE (ind) == PRE_DEC
6365 && (GET_CODE (ind) == PRE_INC
6366 || GET_CODE (ind) == POST_DEC)))
6367 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6370 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6371 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6372 && GET_CODE (XEXP (ind, 1)) == PLUS
6373 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6374 ind = XEXP (ind, 1);
6379 if (GET_CODE (ind) == PLUS
6380 && GET_CODE (XEXP (ind, 0)) == REG
6381 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6382 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6383 && INTVAL (XEXP (ind, 1)) > -1024
6384 && INTVAL (XEXP (ind, 1)) < 1024
6385 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6391 /* Return TRUE if OP is a memory operand which we can load or store a vector
6392 to/from. If CORE is true, we're moving from ARM registers not Neon
6395 neon_vector_mem_operand (rtx op, bool core)
6399 /* Reject eliminable registers. */
6400 if (! (reload_in_progress || reload_completed)
6401 && ( reg_mentioned_p (frame_pointer_rtx, op)
6402 || reg_mentioned_p (arg_pointer_rtx, op)
6403 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6404 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6405 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6406 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6409 /* Constants are converted into offsets from labels. */
6410 if (GET_CODE (op) != MEM)
6415 if (reload_completed
6416 && (GET_CODE (ind) == LABEL_REF
6417 || (GET_CODE (ind) == CONST
6418 && GET_CODE (XEXP (ind, 0)) == PLUS
6419 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6420 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6423 /* Match: (mem (reg)). */
6424 if (GET_CODE (ind) == REG)
6425 return arm_address_register_rtx_p (ind, 0);
6427 /* Allow post-increment with Neon registers. */
6428 if (!core && GET_CODE (ind) == POST_INC)
6429 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6432 /* FIXME: We can support this too if we use VLD1/VST1. */
6434 && GET_CODE (ind) == POST_MODIFY
6435 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6436 && GET_CODE (XEXP (ind, 1)) == PLUS
6437 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6438 ind = XEXP (ind, 1);
6445 && GET_CODE (ind) == PLUS
6446 && GET_CODE (XEXP (ind, 0)) == REG
6447 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6448 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6449 && INTVAL (XEXP (ind, 1)) > -1024
6450 && INTVAL (XEXP (ind, 1)) < 1016
6451 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6457 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6460 neon_struct_mem_operand (rtx op)
6464 /* Reject eliminable registers. */
6465 if (! (reload_in_progress || reload_completed)
6466 && ( reg_mentioned_p (frame_pointer_rtx, op)
6467 || reg_mentioned_p (arg_pointer_rtx, op)
6468 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6469 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6470 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6471 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6474 /* Constants are converted into offsets from labels. */
6475 if (GET_CODE (op) != MEM)
6480 if (reload_completed
6481 && (GET_CODE (ind) == LABEL_REF
6482 || (GET_CODE (ind) == CONST
6483 && GET_CODE (XEXP (ind, 0)) == PLUS
6484 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6485 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6488 /* Match: (mem (reg)). */
6489 if (GET_CODE (ind) == REG)
6490 return arm_address_register_rtx_p (ind, 0);
6495 /* Return true if X is a register that will be eliminated later on. */
6497 arm_eliminable_register (rtx x)
6499 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6500 || REGNO (x) == ARG_POINTER_REGNUM
6501 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6502 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6505 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6506 coprocessor registers. Otherwise return NO_REGS. */
6509 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6512 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6513 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6514 && neon_vector_mem_operand (x, FALSE))
6517 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6520 return GENERAL_REGS;
6523 /* Values which must be returned in the most-significant end of the return
6527 arm_return_in_msb (tree valtype)
6529 return (TARGET_AAPCS_BASED
6531 && (AGGREGATE_TYPE_P (valtype)
6532 || TREE_CODE (valtype) == COMPLEX_TYPE));
6535 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6536 Use by the Cirrus Maverick code which has to workaround
6537 a hardware bug triggered by such instructions. */
6539 arm_memory_load_p (rtx insn)
6541 rtx body, lhs, rhs;;
6543 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6546 body = PATTERN (insn);
6548 if (GET_CODE (body) != SET)
6551 lhs = XEXP (body, 0);
6552 rhs = XEXP (body, 1);
6554 lhs = REG_OR_SUBREG_RTX (lhs);
6556 /* If the destination is not a general purpose
6557 register we do not have to worry. */
6558 if (GET_CODE (lhs) != REG
6559 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6562 /* As well as loads from memory we also have to react
6563 to loads of invalid constants which will be turned
6564 into loads from the minipool. */
6565 return (GET_CODE (rhs) == MEM
6566 || GET_CODE (rhs) == SYMBOL_REF
6567 || note_invalid_constants (insn, -1, false));
6570 /* Return TRUE if INSN is a Cirrus instruction. */
6572 arm_cirrus_insn_p (rtx insn)
6574 enum attr_cirrus attr;
6576 /* get_attr cannot accept USE or CLOBBER. */
6578 || GET_CODE (insn) != INSN
6579 || GET_CODE (PATTERN (insn)) == USE
6580 || GET_CODE (PATTERN (insn)) == CLOBBER)
6583 attr = get_attr_cirrus (insn);
6585 return attr != CIRRUS_NOT;
6588 /* Cirrus reorg for invalid instruction combinations. */
6590 cirrus_reorg (rtx first)
6592 enum attr_cirrus attr;
6593 rtx body = PATTERN (first);
6597 /* Any branch must be followed by 2 non Cirrus instructions. */
6598 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6601 t = next_nonnote_insn (first);
6603 if (arm_cirrus_insn_p (t))
6606 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6610 emit_insn_after (gen_nop (), first);
6615 /* (float (blah)) is in parallel with a clobber. */
6616 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6617 body = XVECEXP (body, 0, 0);
6619 if (GET_CODE (body) == SET)
6621 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6623 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6624 be followed by a non Cirrus insn. */
6625 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6627 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6628 emit_insn_after (gen_nop (), first);
6632 else if (arm_memory_load_p (first))
6634 unsigned int arm_regno;
6636 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6637 ldr/cfmv64hr combination where the Rd field is the same
6638 in both instructions must be split with a non Cirrus
6645 /* Get Arm register number for ldr insn. */
6646 if (GET_CODE (lhs) == REG)
6647 arm_regno = REGNO (lhs);
6650 gcc_assert (GET_CODE (rhs) == REG);
6651 arm_regno = REGNO (rhs);
6655 first = next_nonnote_insn (first);
6657 if (! arm_cirrus_insn_p (first))
6660 body = PATTERN (first);
6662 /* (float (blah)) is in parallel with a clobber. */
6663 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6664 body = XVECEXP (body, 0, 0);
6666 if (GET_CODE (body) == FLOAT)
6667 body = XEXP (body, 0);
6669 if (get_attr_cirrus (first) == CIRRUS_MOVE
6670 && GET_CODE (XEXP (body, 1)) == REG
6671 && arm_regno == REGNO (XEXP (body, 1)))
6672 emit_insn_after (gen_nop (), first);
6678 /* get_attr cannot accept USE or CLOBBER. */
6680 || GET_CODE (first) != INSN
6681 || GET_CODE (PATTERN (first)) == USE
6682 || GET_CODE (PATTERN (first)) == CLOBBER)
6685 attr = get_attr_cirrus (first);
6687 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6688 must be followed by a non-coprocessor instruction. */
6689 if (attr == CIRRUS_COMPARE)
6693 t = next_nonnote_insn (first);
6695 if (arm_cirrus_insn_p (t))
6698 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6702 emit_insn_after (gen_nop (), first);
6708 /* Return TRUE if X references a SYMBOL_REF. */
6710 symbol_mentioned_p (rtx x)
6715 if (GET_CODE (x) == SYMBOL_REF)
6718 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6719 are constant offsets, not symbols. */
6720 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6723 fmt = GET_RTX_FORMAT (GET_CODE (x));
6725 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6731 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6732 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6735 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6742 /* Return TRUE if X references a LABEL_REF. */
6744 label_mentioned_p (rtx x)
6749 if (GET_CODE (x) == LABEL_REF)
6752 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6753 instruction, but they are constant offsets, not symbols. */
6754 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6757 fmt = GET_RTX_FORMAT (GET_CODE (x));
6758 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6764 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6765 if (label_mentioned_p (XVECEXP (x, i, j)))
6768 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6776 tls_mentioned_p (rtx x)
6778 switch (GET_CODE (x))
6781 return tls_mentioned_p (XEXP (x, 0));
6784 if (XINT (x, 1) == UNSPEC_TLS)
6792 /* Must not copy a SET whose source operand is PC-relative. */
6795 arm_cannot_copy_insn_p (rtx insn)
6797 rtx pat = PATTERN (insn);
6799 if (GET_CODE (pat) == SET)
6801 rtx rhs = SET_SRC (pat);
6803 if (GET_CODE (rhs) == UNSPEC
6804 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6807 if (GET_CODE (rhs) == MEM
6808 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6809 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6819 enum rtx_code code = GET_CODE (x);
6836 /* Return 1 if memory locations are adjacent. */
6838 adjacent_mem_locations (rtx a, rtx b)
6840 /* We don't guarantee to preserve the order of these memory refs. */
6841 if (volatile_refs_p (a) || volatile_refs_p (b))
6844 if ((GET_CODE (XEXP (a, 0)) == REG
6845 || (GET_CODE (XEXP (a, 0)) == PLUS
6846 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6847 && (GET_CODE (XEXP (b, 0)) == REG
6848 || (GET_CODE (XEXP (b, 0)) == PLUS
6849 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6851 HOST_WIDE_INT val0 = 0, val1 = 0;
6855 if (GET_CODE (XEXP (a, 0)) == PLUS)
6857 reg0 = XEXP (XEXP (a, 0), 0);
6858 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6863 if (GET_CODE (XEXP (b, 0)) == PLUS)
6865 reg1 = XEXP (XEXP (b, 0), 0);
6866 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6871 /* Don't accept any offset that will require multiple
6872 instructions to handle, since this would cause the
6873 arith_adjacentmem pattern to output an overlong sequence. */
6874 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6877 /* Don't allow an eliminable register: register elimination can make
6878 the offset too large. */
6879 if (arm_eliminable_register (reg0))
6882 val_diff = val1 - val0;
6886 /* If the target has load delay slots, then there's no benefit
6887 to using an ldm instruction unless the offset is zero and
6888 we are optimizing for size. */
6889 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6890 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6891 && (val_diff == 4 || val_diff == -4));
6894 return ((REGNO (reg0) == REGNO (reg1))
6895 && (val_diff == 4 || val_diff == -4));
6902 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6903 HOST_WIDE_INT *load_offset)
6905 int unsorted_regs[4];
6906 HOST_WIDE_INT unsorted_offsets[4];
6911 /* Can only handle 2, 3, or 4 insns at present,
6912 though could be easily extended if required. */
6913 gcc_assert (nops >= 2 && nops <= 4);
6915 /* Loop over the operands and check that the memory references are
6916 suitable (i.e. immediate offsets from the same base register). At
6917 the same time, extract the target register, and the memory
6919 for (i = 0; i < nops; i++)
6924 /* Convert a subreg of a mem into the mem itself. */
6925 if (GET_CODE (operands[nops + i]) == SUBREG)
6926 operands[nops + i] = alter_subreg (operands + (nops + i));
6928 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6930 /* Don't reorder volatile memory references; it doesn't seem worth
6931 looking for the case where the order is ok anyway. */
6932 if (MEM_VOLATILE_P (operands[nops + i]))
6935 offset = const0_rtx;
6937 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6938 || (GET_CODE (reg) == SUBREG
6939 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6940 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6941 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6943 || (GET_CODE (reg) == SUBREG
6944 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6945 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6950 base_reg = REGNO (reg);
6951 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6952 ? REGNO (operands[i])
6953 : REGNO (SUBREG_REG (operands[i])));
6958 if (base_reg != (int) REGNO (reg))
6959 /* Not addressed from the same base register. */
6962 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6963 ? REGNO (operands[i])
6964 : REGNO (SUBREG_REG (operands[i])));
6965 if (unsorted_regs[i] < unsorted_regs[order[0]])
6969 /* If it isn't an integer register, or if it overwrites the
6970 base register but isn't the last insn in the list, then
6971 we can't do this. */
6972 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6973 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6976 unsorted_offsets[i] = INTVAL (offset);
6979 /* Not a suitable memory address. */
6983 /* All the useful information has now been extracted from the
6984 operands into unsorted_regs and unsorted_offsets; additionally,
6985 order[0] has been set to the lowest numbered register in the
6986 list. Sort the registers into order, and check that the memory
6987 offsets are ascending and adjacent. */
6989 for (i = 1; i < nops; i++)
6993 order[i] = order[i - 1];
6994 for (j = 0; j < nops; j++)
6995 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6996 && (order[i] == order[i - 1]
6997 || unsorted_regs[j] < unsorted_regs[order[i]]))
7000 /* Have we found a suitable register? if not, one must be used more
7002 if (order[i] == order[i - 1])
7005 /* Is the memory address adjacent and ascending? */
7006 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7014 for (i = 0; i < nops; i++)
7015 regs[i] = unsorted_regs[order[i]];
7017 *load_offset = unsorted_offsets[order[0]];
7020 if (unsorted_offsets[order[0]] == 0)
7021 return 1; /* ldmia */
7023 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7024 return 2; /* ldmib */
7026 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7027 return 3; /* ldmda */
7029 if (unsorted_offsets[order[nops - 1]] == -4)
7030 return 4; /* ldmdb */
7032 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7033 if the offset isn't small enough. The reason 2 ldrs are faster
7034 is because these ARMs are able to do more than one cache access
7035 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7036 whilst the ARM8 has a double bandwidth cache. This means that
7037 these cores can do both an instruction fetch and a data fetch in
7038 a single cycle, so the trick of calculating the address into a
7039 scratch register (one of the result regs) and then doing a load
7040 multiple actually becomes slower (and no smaller in code size).
7041 That is the transformation
7043 ldr rd1, [rbase + offset]
7044 ldr rd2, [rbase + offset + 4]
7048 add rd1, rbase, offset
7049 ldmia rd1, {rd1, rd2}
7051 produces worse code -- '3 cycles + any stalls on rd2' instead of
7052 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7053 access per cycle, the first sequence could never complete in less
7054 than 6 cycles, whereas the ldm sequence would only take 5 and
7055 would make better use of sequential accesses if not hitting the
7058 We cheat here and test 'arm_ld_sched' which we currently know to
7059 only be true for the ARM8, ARM9 and StrongARM. If this ever
7060 changes, then the test below needs to be reworked. */
7061 if (nops == 2 && arm_ld_sched)
7064 /* Can't do it without setting up the offset, only do this if it takes
7065 no more than one insn. */
7066 return (const_ok_for_arm (unsorted_offsets[order[0]])
7067 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7071 emit_ldm_seq (rtx *operands, int nops)
7075 HOST_WIDE_INT offset;
7079 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7082 strcpy (buf, "ldm%(ia%)\t");
7086 strcpy (buf, "ldm%(ib%)\t");
7090 strcpy (buf, "ldm%(da%)\t");
7094 strcpy (buf, "ldm%(db%)\t");
7099 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7100 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7103 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7104 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7106 output_asm_insn (buf, operands);
7108 strcpy (buf, "ldm%(ia%)\t");
7115 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7116 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7118 for (i = 1; i < nops; i++)
7119 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7120 reg_names[regs[i]]);
7122 strcat (buf, "}\t%@ phole ldm");
7124 output_asm_insn (buf, operands);
7129 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7130 HOST_WIDE_INT * load_offset)
7132 int unsorted_regs[4];
7133 HOST_WIDE_INT unsorted_offsets[4];
7138 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7139 extended if required. */
7140 gcc_assert (nops >= 2 && nops <= 4);
7142 /* Loop over the operands and check that the memory references are
7143 suitable (i.e. immediate offsets from the same base register). At
7144 the same time, extract the target register, and the memory
7146 for (i = 0; i < nops; i++)
7151 /* Convert a subreg of a mem into the mem itself. */
7152 if (GET_CODE (operands[nops + i]) == SUBREG)
7153 operands[nops + i] = alter_subreg (operands + (nops + i));
7155 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7157 /* Don't reorder volatile memory references; it doesn't seem worth
7158 looking for the case where the order is ok anyway. */
7159 if (MEM_VOLATILE_P (operands[nops + i]))
7162 offset = const0_rtx;
7164 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7165 || (GET_CODE (reg) == SUBREG
7166 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7167 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7168 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7170 || (GET_CODE (reg) == SUBREG
7171 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7172 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7177 base_reg = REGNO (reg);
7178 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7179 ? REGNO (operands[i])
7180 : REGNO (SUBREG_REG (operands[i])));
7185 if (base_reg != (int) REGNO (reg))
7186 /* Not addressed from the same base register. */
7189 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7190 ? REGNO (operands[i])
7191 : REGNO (SUBREG_REG (operands[i])));
7192 if (unsorted_regs[i] < unsorted_regs[order[0]])
7196 /* If it isn't an integer register, then we can't do this. */
7197 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7200 unsorted_offsets[i] = INTVAL (offset);
7203 /* Not a suitable memory address. */
7207 /* All the useful information has now been extracted from the
7208 operands into unsorted_regs and unsorted_offsets; additionally,
7209 order[0] has been set to the lowest numbered register in the
7210 list. Sort the registers into order, and check that the memory
7211 offsets are ascending and adjacent. */
7213 for (i = 1; i < nops; i++)
7217 order[i] = order[i - 1];
7218 for (j = 0; j < nops; j++)
7219 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7220 && (order[i] == order[i - 1]
7221 || unsorted_regs[j] < unsorted_regs[order[i]]))
7224 /* Have we found a suitable register? if not, one must be used more
7226 if (order[i] == order[i - 1])
7229 /* Is the memory address adjacent and ascending? */
7230 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7238 for (i = 0; i < nops; i++)
7239 regs[i] = unsorted_regs[order[i]];
7241 *load_offset = unsorted_offsets[order[0]];
7244 if (unsorted_offsets[order[0]] == 0)
7245 return 1; /* stmia */
7247 if (unsorted_offsets[order[0]] == 4)
7248 return 2; /* stmib */
7250 if (unsorted_offsets[order[nops - 1]] == 0)
7251 return 3; /* stmda */
7253 if (unsorted_offsets[order[nops - 1]] == -4)
7254 return 4; /* stmdb */
7260 emit_stm_seq (rtx *operands, int nops)
7264 HOST_WIDE_INT offset;
7268 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7271 strcpy (buf, "stm%(ia%)\t");
7275 strcpy (buf, "stm%(ib%)\t");
7279 strcpy (buf, "stm%(da%)\t");
7283 strcpy (buf, "stm%(db%)\t");
7290 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7291 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7293 for (i = 1; i < nops; i++)
7294 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7295 reg_names[regs[i]]);
7297 strcat (buf, "}\t%@ phole stm");
7299 output_asm_insn (buf, operands);
7303 /* Routines for use in generating RTL. */
7306 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7307 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7309 HOST_WIDE_INT offset = *offsetp;
7312 int sign = up ? 1 : -1;
7315 /* XScale has load-store double instructions, but they have stricter
7316 alignment requirements than load-store multiple, so we cannot
7319 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7320 the pipeline until completion.
7328 An ldr instruction takes 1-3 cycles, but does not block the
7337 Best case ldr will always win. However, the more ldr instructions
7338 we issue, the less likely we are to be able to schedule them well.
7339 Using ldr instructions also increases code size.
7341 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7342 for counts of 3 or 4 regs. */
7343 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7349 for (i = 0; i < count; i++)
7351 addr = plus_constant (from, i * 4 * sign);
7352 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7353 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7359 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7369 result = gen_rtx_PARALLEL (VOIDmode,
7370 rtvec_alloc (count + (write_back ? 1 : 0)));
7373 XVECEXP (result, 0, 0)
7374 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7379 for (j = 0; i < count; i++, j++)
7381 addr = plus_constant (from, j * 4 * sign);
7382 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7383 XVECEXP (result, 0, i)
7384 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7395 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7396 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7398 HOST_WIDE_INT offset = *offsetp;
7401 int sign = up ? 1 : -1;
7404 /* See arm_gen_load_multiple for discussion of
7405 the pros/cons of ldm/stm usage for XScale. */
7406 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7412 for (i = 0; i < count; i++)
7414 addr = plus_constant (to, i * 4 * sign);
7415 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7416 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7422 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7432 result = gen_rtx_PARALLEL (VOIDmode,
7433 rtvec_alloc (count + (write_back ? 1 : 0)));
7436 XVECEXP (result, 0, 0)
7437 = gen_rtx_SET (VOIDmode, to,
7438 plus_constant (to, count * 4 * sign));
7443 for (j = 0; i < count; i++, j++)
7445 addr = plus_constant (to, j * 4 * sign);
7446 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7447 XVECEXP (result, 0, i)
7448 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7459 arm_gen_movmemqi (rtx *operands)
7461 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7462 HOST_WIDE_INT srcoffset, dstoffset;
7464 rtx src, dst, srcbase, dstbase;
7465 rtx part_bytes_reg = NULL;
7468 if (GET_CODE (operands[2]) != CONST_INT
7469 || GET_CODE (operands[3]) != CONST_INT
7470 || INTVAL (operands[2]) > 64
7471 || INTVAL (operands[3]) & 3)
7474 dstbase = operands[0];
7475 srcbase = operands[1];
7477 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7478 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7480 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7481 out_words_to_go = INTVAL (operands[2]) / 4;
7482 last_bytes = INTVAL (operands[2]) & 3;
7483 dstoffset = srcoffset = 0;
7485 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7486 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7488 for (i = 0; in_words_to_go >= 2; i+=4)
7490 if (in_words_to_go > 4)
7491 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7492 srcbase, &srcoffset));
7494 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7495 FALSE, srcbase, &srcoffset));
7497 if (out_words_to_go)
7499 if (out_words_to_go > 4)
7500 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7501 dstbase, &dstoffset));
7502 else if (out_words_to_go != 1)
7503 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7507 dstbase, &dstoffset));
7510 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7511 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7512 if (last_bytes != 0)
7514 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7520 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7521 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7524 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7525 if (out_words_to_go)
7529 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7530 sreg = copy_to_reg (mem);
7532 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7533 emit_move_insn (mem, sreg);
7536 gcc_assert (!in_words_to_go); /* Sanity check */
7541 gcc_assert (in_words_to_go > 0);
7543 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7544 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7547 gcc_assert (!last_bytes || part_bytes_reg);
7549 if (BYTES_BIG_ENDIAN && last_bytes)
7551 rtx tmp = gen_reg_rtx (SImode);
7553 /* The bytes we want are in the top end of the word. */
7554 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7555 GEN_INT (8 * (4 - last_bytes))));
7556 part_bytes_reg = tmp;
7560 mem = adjust_automodify_address (dstbase, QImode,
7561 plus_constant (dst, last_bytes - 1),
7562 dstoffset + last_bytes - 1);
7563 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7567 tmp = gen_reg_rtx (SImode);
7568 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7569 part_bytes_reg = tmp;
7578 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7579 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7583 rtx tmp = gen_reg_rtx (SImode);
7584 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7585 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7586 part_bytes_reg = tmp;
7593 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7594 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7601 /* Select a dominance comparison mode if possible for a test of the general
7602 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7603 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7604 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7605 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7606 In all cases OP will be either EQ or NE, but we don't need to know which
7607 here. If we are unable to support a dominance comparison we return
7608 CC mode. This will then fail to match for the RTL expressions that
7609 generate this call. */
7611 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7613 enum rtx_code cond1, cond2;
7616 /* Currently we will probably get the wrong result if the individual
7617 comparisons are not simple. This also ensures that it is safe to
7618 reverse a comparison if necessary. */
7619 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7621 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7625 /* The if_then_else variant of this tests the second condition if the
7626 first passes, but is true if the first fails. Reverse the first
7627 condition to get a true "inclusive-or" expression. */
7628 if (cond_or == DOM_CC_NX_OR_Y)
7629 cond1 = reverse_condition (cond1);
7631 /* If the comparisons are not equal, and one doesn't dominate the other,
7632 then we can't do this. */
7634 && !comparison_dominates_p (cond1, cond2)
7635 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7640 enum rtx_code temp = cond1;
7648 if (cond_or == DOM_CC_X_AND_Y)
7653 case EQ: return CC_DEQmode;
7654 case LE: return CC_DLEmode;
7655 case LEU: return CC_DLEUmode;
7656 case GE: return CC_DGEmode;
7657 case GEU: return CC_DGEUmode;
7658 default: gcc_unreachable ();
7662 if (cond_or == DOM_CC_X_AND_Y)
7678 if (cond_or == DOM_CC_X_AND_Y)
7694 if (cond_or == DOM_CC_X_AND_Y)
7710 if (cond_or == DOM_CC_X_AND_Y)
7725 /* The remaining cases only occur when both comparisons are the
7728 gcc_assert (cond1 == cond2);
7732 gcc_assert (cond1 == cond2);
7736 gcc_assert (cond1 == cond2);
7740 gcc_assert (cond1 == cond2);
7744 gcc_assert (cond1 == cond2);
7753 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7755 /* All floating point compares return CCFP if it is an equality
7756 comparison, and CCFPE otherwise. */
7757 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7777 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7786 /* A compare with a shifted operand. Because of canonicalization, the
7787 comparison will have to be swapped when we emit the assembler. */
7788 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7789 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7790 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7791 || GET_CODE (x) == ROTATERT))
7794 /* This operation is performed swapped, but since we only rely on the Z
7795 flag we don't need an additional mode. */
7796 if (GET_MODE (y) == SImode && REG_P (y)
7797 && GET_CODE (x) == NEG
7798 && (op == EQ || op == NE))
7801 /* This is a special case that is used by combine to allow a
7802 comparison of a shifted byte load to be split into a zero-extend
7803 followed by a comparison of the shifted integer (only valid for
7804 equalities and unsigned inequalities). */
7805 if (GET_MODE (x) == SImode
7806 && GET_CODE (x) == ASHIFT
7807 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7808 && GET_CODE (XEXP (x, 0)) == SUBREG
7809 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7810 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7811 && (op == EQ || op == NE
7812 || op == GEU || op == GTU || op == LTU || op == LEU)
7813 && GET_CODE (y) == CONST_INT)
7816 /* A construct for a conditional compare, if the false arm contains
7817 0, then both conditions must be true, otherwise either condition
7818 must be true. Not all conditions are possible, so CCmode is
7819 returned if it can't be done. */
7820 if (GET_CODE (x) == IF_THEN_ELSE
7821 && (XEXP (x, 2) == const0_rtx
7822 || XEXP (x, 2) == const1_rtx)
7823 && COMPARISON_P (XEXP (x, 0))
7824 && COMPARISON_P (XEXP (x, 1)))
7825 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7826 INTVAL (XEXP (x, 2)));
7828 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7829 if (GET_CODE (x) == AND
7830 && COMPARISON_P (XEXP (x, 0))
7831 && COMPARISON_P (XEXP (x, 1)))
7832 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7835 if (GET_CODE (x) == IOR
7836 && COMPARISON_P (XEXP (x, 0))
7837 && COMPARISON_P (XEXP (x, 1)))
7838 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7841 /* An operation (on Thumb) where we want to test for a single bit.
7842 This is done by shifting that bit up into the top bit of a
7843 scratch register; we can then branch on the sign bit. */
7845 && GET_MODE (x) == SImode
7846 && (op == EQ || op == NE)
7847 && GET_CODE (x) == ZERO_EXTRACT
7848 && XEXP (x, 1) == const1_rtx)
7851 /* An operation that sets the condition codes as a side-effect, the
7852 V flag is not set correctly, so we can only use comparisons where
7853 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7855 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7856 if (GET_MODE (x) == SImode
7858 && (op == EQ || op == NE || op == LT || op == GE)
7859 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7860 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7861 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7862 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7863 || GET_CODE (x) == LSHIFTRT
7864 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7865 || GET_CODE (x) == ROTATERT
7866 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7869 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7872 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7873 && GET_CODE (x) == PLUS
7874 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7880 /* X and Y are two things to compare using CODE. Emit the compare insn and
7881 return the rtx for register 0 in the proper mode. FP means this is a
7882 floating point compare: I don't think that it is needed on the arm. */
7884 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7886 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7887 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7889 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7894 /* Generate a sequence of insns that will generate the correct return
7895 address mask depending on the physical architecture that the program
7898 arm_gen_return_addr_mask (void)
7900 rtx reg = gen_reg_rtx (Pmode);
7902 emit_insn (gen_return_addr_mask (reg));
7907 arm_reload_in_hi (rtx *operands)
7909 rtx ref = operands[1];
7911 HOST_WIDE_INT offset = 0;
7913 if (GET_CODE (ref) == SUBREG)
7915 offset = SUBREG_BYTE (ref);
7916 ref = SUBREG_REG (ref);
7919 if (GET_CODE (ref) == REG)
7921 /* We have a pseudo which has been spilt onto the stack; there
7922 are two cases here: the first where there is a simple
7923 stack-slot replacement and a second where the stack-slot is
7924 out of range, or is used as a subreg. */
7925 if (reg_equiv_mem[REGNO (ref)])
7927 ref = reg_equiv_mem[REGNO (ref)];
7928 base = find_replacement (&XEXP (ref, 0));
7931 /* The slot is out of range, or was dressed up in a SUBREG. */
7932 base = reg_equiv_address[REGNO (ref)];
7935 base = find_replacement (&XEXP (ref, 0));
7937 /* Handle the case where the address is too complex to be offset by 1. */
7938 if (GET_CODE (base) == MINUS
7939 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7941 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7943 emit_set_insn (base_plus, base);
7946 else if (GET_CODE (base) == PLUS)
7948 /* The addend must be CONST_INT, or we would have dealt with it above. */
7949 HOST_WIDE_INT hi, lo;
7951 offset += INTVAL (XEXP (base, 1));
7952 base = XEXP (base, 0);
7954 /* Rework the address into a legal sequence of insns. */
7955 /* Valid range for lo is -4095 -> 4095 */
7958 : -((-offset) & 0xfff));
7960 /* Corner case, if lo is the max offset then we would be out of range
7961 once we have added the additional 1 below, so bump the msb into the
7962 pre-loading insn(s). */
7966 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7967 ^ (HOST_WIDE_INT) 0x80000000)
7968 - (HOST_WIDE_INT) 0x80000000);
7970 gcc_assert (hi + lo == offset);
7974 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7976 /* Get the base address; addsi3 knows how to handle constants
7977 that require more than one insn. */
7978 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7984 /* Operands[2] may overlap operands[0] (though it won't overlap
7985 operands[1]), that's why we asked for a DImode reg -- so we can
7986 use the bit that does not overlap. */
7987 if (REGNO (operands[2]) == REGNO (operands[0]))
7988 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7990 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7992 emit_insn (gen_zero_extendqisi2 (scratch,
7993 gen_rtx_MEM (QImode,
7994 plus_constant (base,
7996 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7997 gen_rtx_MEM (QImode,
7998 plus_constant (base,
8000 if (!BYTES_BIG_ENDIAN)
8001 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8002 gen_rtx_IOR (SImode,
8005 gen_rtx_SUBREG (SImode, operands[0], 0),
8009 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8010 gen_rtx_IOR (SImode,
8011 gen_rtx_ASHIFT (SImode, scratch,
8013 gen_rtx_SUBREG (SImode, operands[0], 0)));
8016 /* Handle storing a half-word to memory during reload by synthesizing as two
8017 byte stores. Take care not to clobber the input values until after we
8018 have moved them somewhere safe. This code assumes that if the DImode
8019 scratch in operands[2] overlaps either the input value or output address
8020 in some way, then that value must die in this insn (we absolutely need
8021 two scratch registers for some corner cases). */
8023 arm_reload_out_hi (rtx *operands)
8025 rtx ref = operands[0];
8026 rtx outval = operands[1];
8028 HOST_WIDE_INT offset = 0;
8030 if (GET_CODE (ref) == SUBREG)
8032 offset = SUBREG_BYTE (ref);
8033 ref = SUBREG_REG (ref);
8036 if (GET_CODE (ref) == REG)
8038 /* We have a pseudo which has been spilt onto the stack; there
8039 are two cases here: the first where there is a simple
8040 stack-slot replacement and a second where the stack-slot is
8041 out of range, or is used as a subreg. */
8042 if (reg_equiv_mem[REGNO (ref)])
8044 ref = reg_equiv_mem[REGNO (ref)];
8045 base = find_replacement (&XEXP (ref, 0));
8048 /* The slot is out of range, or was dressed up in a SUBREG. */
8049 base = reg_equiv_address[REGNO (ref)];
8052 base = find_replacement (&XEXP (ref, 0));
8054 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8056 /* Handle the case where the address is too complex to be offset by 1. */
8057 if (GET_CODE (base) == MINUS
8058 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8060 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8062 /* Be careful not to destroy OUTVAL. */
8063 if (reg_overlap_mentioned_p (base_plus, outval))
8065 /* Updating base_plus might destroy outval, see if we can
8066 swap the scratch and base_plus. */
8067 if (!reg_overlap_mentioned_p (scratch, outval))
8070 scratch = base_plus;
8075 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8077 /* Be conservative and copy OUTVAL into the scratch now,
8078 this should only be necessary if outval is a subreg
8079 of something larger than a word. */
8080 /* XXX Might this clobber base? I can't see how it can,
8081 since scratch is known to overlap with OUTVAL, and
8082 must be wider than a word. */
8083 emit_insn (gen_movhi (scratch_hi, outval));
8084 outval = scratch_hi;
8088 emit_set_insn (base_plus, base);
8091 else if (GET_CODE (base) == PLUS)
8093 /* The addend must be CONST_INT, or we would have dealt with it above. */
8094 HOST_WIDE_INT hi, lo;
8096 offset += INTVAL (XEXP (base, 1));
8097 base = XEXP (base, 0);
8099 /* Rework the address into a legal sequence of insns. */
8100 /* Valid range for lo is -4095 -> 4095 */
8103 : -((-offset) & 0xfff));
8105 /* Corner case, if lo is the max offset then we would be out of range
8106 once we have added the additional 1 below, so bump the msb into the
8107 pre-loading insn(s). */
8111 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8112 ^ (HOST_WIDE_INT) 0x80000000)
8113 - (HOST_WIDE_INT) 0x80000000);
8115 gcc_assert (hi + lo == offset);
8119 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8121 /* Be careful not to destroy OUTVAL. */
8122 if (reg_overlap_mentioned_p (base_plus, outval))
8124 /* Updating base_plus might destroy outval, see if we
8125 can swap the scratch and base_plus. */
8126 if (!reg_overlap_mentioned_p (scratch, outval))
8129 scratch = base_plus;
8134 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8136 /* Be conservative and copy outval into scratch now,
8137 this should only be necessary if outval is a
8138 subreg of something larger than a word. */
8139 /* XXX Might this clobber base? I can't see how it
8140 can, since scratch is known to overlap with
8142 emit_insn (gen_movhi (scratch_hi, outval));
8143 outval = scratch_hi;
8147 /* Get the base address; addsi3 knows how to handle constants
8148 that require more than one insn. */
8149 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8155 if (BYTES_BIG_ENDIAN)
8157 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8158 plus_constant (base, offset + 1)),
8159 gen_lowpart (QImode, outval)));
8160 emit_insn (gen_lshrsi3 (scratch,
8161 gen_rtx_SUBREG (SImode, outval, 0),
8163 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8164 gen_lowpart (QImode, scratch)));
8168 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8169 gen_lowpart (QImode, outval)));
8170 emit_insn (gen_lshrsi3 (scratch,
8171 gen_rtx_SUBREG (SImode, outval, 0),
8173 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8174 plus_constant (base, offset + 1)),
8175 gen_lowpart (QImode, scratch)));
8179 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8180 (padded to the size of a word) should be passed in a register. */
8183 arm_must_pass_in_stack (enum machine_mode mode, tree type)
8185 if (TARGET_AAPCS_BASED)
8186 return must_pass_in_stack_var_size (mode, type);
8188 return must_pass_in_stack_var_size_or_pad (mode, type);
8192 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8193 Return true if an argument passed on the stack should be padded upwards,
8194 i.e. if the least-significant byte has useful data.
8195 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8196 aggregate types are placed in the lowest memory address. */
8199 arm_pad_arg_upward (enum machine_mode mode, tree type)
8201 if (!TARGET_AAPCS_BASED)
8202 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8204 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8211 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8212 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8213 byte of the register has useful data, and return the opposite if the
8214 most significant byte does.
8215 For AAPCS, small aggregates and small complex types are always padded
8219 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8220 tree type, int first ATTRIBUTE_UNUSED)
8222 if (TARGET_AAPCS_BASED
8224 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8225 && int_size_in_bytes (type) <= 4)
8228 /* Otherwise, use default padding. */
8229 return !BYTES_BIG_ENDIAN;
8233 /* Print a symbolic form of X to the debug file, F. */
8235 arm_print_value (FILE *f, rtx x)
8237 switch (GET_CODE (x))
8240 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8244 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8252 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8254 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8255 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8263 fprintf (f, "\"%s\"", XSTR (x, 0));
8267 fprintf (f, "`%s'", XSTR (x, 0));
8271 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8275 arm_print_value (f, XEXP (x, 0));
8279 arm_print_value (f, XEXP (x, 0));
8281 arm_print_value (f, XEXP (x, 1));
8289 fprintf (f, "????");
8294 /* Routines for manipulation of the constant pool. */
8296 /* Arm instructions cannot load a large constant directly into a
8297 register; they have to come from a pc relative load. The constant
8298 must therefore be placed in the addressable range of the pc
8299 relative load. Depending on the precise pc relative load
8300 instruction the range is somewhere between 256 bytes and 4k. This
8301 means that we often have to dump a constant inside a function, and
8302 generate code to branch around it.
8304 It is important to minimize this, since the branches will slow
8305 things down and make the code larger.
8307 Normally we can hide the table after an existing unconditional
8308 branch so that there is no interruption of the flow, but in the
8309 worst case the code looks like this:
8327 We fix this by performing a scan after scheduling, which notices
8328 which instructions need to have their operands fetched from the
8329 constant table and builds the table.
8331 The algorithm starts by building a table of all the constants that
8332 need fixing up and all the natural barriers in the function (places
8333 where a constant table can be dropped without breaking the flow).
8334 For each fixup we note how far the pc-relative replacement will be
8335 able to reach and the offset of the instruction into the function.
8337 Having built the table we then group the fixes together to form
8338 tables that are as large as possible (subject to addressing
8339 constraints) and emit each table of constants after the last
8340 barrier that is within range of all the instructions in the group.
8341 If a group does not contain a barrier, then we forcibly create one
8342 by inserting a jump instruction into the flow. Once the table has
8343 been inserted, the insns are then modified to reference the
8344 relevant entry in the pool.
8346 Possible enhancements to the algorithm (not implemented) are:
8348 1) For some processors and object formats, there may be benefit in
8349 aligning the pools to the start of cache lines; this alignment
8350 would need to be taken into account when calculating addressability
8353 /* These typedefs are located at the start of this file, so that
8354 they can be used in the prototypes there. This comment is to
8355 remind readers of that fact so that the following structures
8356 can be understood more easily.
8358 typedef struct minipool_node Mnode;
8359 typedef struct minipool_fixup Mfix; */
8361 struct minipool_node
8363 /* Doubly linked chain of entries. */
8366 /* The maximum offset into the code that this entry can be placed. While
8367 pushing fixes for forward references, all entries are sorted in order
8368 of increasing max_address. */
8369 HOST_WIDE_INT max_address;
8370 /* Similarly for an entry inserted for a backwards ref. */
8371 HOST_WIDE_INT min_address;
8372 /* The number of fixes referencing this entry. This can become zero
8373 if we "unpush" an entry. In this case we ignore the entry when we
8374 come to emit the code. */
8376 /* The offset from the start of the minipool. */
8377 HOST_WIDE_INT offset;
8378 /* The value in table. */
8380 /* The mode of value. */
8381 enum machine_mode mode;
8382 /* The size of the value. With iWMMXt enabled
8383 sizes > 4 also imply an alignment of 8-bytes. */
8387 struct minipool_fixup
8391 HOST_WIDE_INT address;
8393 enum machine_mode mode;
8397 HOST_WIDE_INT forwards;
8398 HOST_WIDE_INT backwards;
8401 /* Fixes less than a word need padding out to a word boundary. */
8402 #define MINIPOOL_FIX_SIZE(mode) \
8403 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8405 static Mnode * minipool_vector_head;
8406 static Mnode * minipool_vector_tail;
8407 static rtx minipool_vector_label;
8408 static int minipool_pad;
8410 /* The linked list of all minipool fixes required for this function. */
8411 Mfix * minipool_fix_head;
8412 Mfix * minipool_fix_tail;
8413 /* The fix entry for the current minipool, once it has been placed. */
8414 Mfix * minipool_barrier;
8416 /* Determines if INSN is the start of a jump table. Returns the end
8417 of the TABLE or NULL_RTX. */
8419 is_jump_table (rtx insn)
8423 if (GET_CODE (insn) == JUMP_INSN
8424 && JUMP_LABEL (insn) != NULL
8425 && ((table = next_real_insn (JUMP_LABEL (insn)))
8426 == next_real_insn (insn))
8428 && GET_CODE (table) == JUMP_INSN
8429 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8430 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8436 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8437 #define JUMP_TABLES_IN_TEXT_SECTION 0
8440 static HOST_WIDE_INT
8441 get_jump_table_size (rtx insn)
8443 /* ADDR_VECs only take room if read-only data does into the text
8445 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8447 rtx body = PATTERN (insn);
8448 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8450 HOST_WIDE_INT modesize;
8452 modesize = GET_MODE_SIZE (GET_MODE (body));
8453 size = modesize * XVECLEN (body, elt);
8457 /* Round up size of TBB table to a halfword boundary. */
8458 size = (size + 1) & ~(HOST_WIDE_INT)1;
8461 /* No padding necessary for TBH. */
8464 /* Add two bytes for alignment on Thumb. */
8477 /* Move a minipool fix MP from its current location to before MAX_MP.
8478 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8479 constraints may need updating. */
8481 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8482 HOST_WIDE_INT max_address)
8484 /* The code below assumes these are different. */
8485 gcc_assert (mp != max_mp);
8489 if (max_address < mp->max_address)
8490 mp->max_address = max_address;
8494 if (max_address > max_mp->max_address - mp->fix_size)
8495 mp->max_address = max_mp->max_address - mp->fix_size;
8497 mp->max_address = max_address;
8499 /* Unlink MP from its current position. Since max_mp is non-null,
8500 mp->prev must be non-null. */
8501 mp->prev->next = mp->next;
8502 if (mp->next != NULL)
8503 mp->next->prev = mp->prev;
8505 minipool_vector_tail = mp->prev;
8507 /* Re-insert it before MAX_MP. */
8509 mp->prev = max_mp->prev;
8512 if (mp->prev != NULL)
8513 mp->prev->next = mp;
8515 minipool_vector_head = mp;
8518 /* Save the new entry. */
8521 /* Scan over the preceding entries and adjust their addresses as
8523 while (mp->prev != NULL
8524 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8526 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8533 /* Add a constant to the minipool for a forward reference. Returns the
8534 node added or NULL if the constant will not fit in this pool. */
8536 add_minipool_forward_ref (Mfix *fix)
8538 /* If set, max_mp is the first pool_entry that has a lower
8539 constraint than the one we are trying to add. */
8540 Mnode * max_mp = NULL;
8541 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8544 /* If the minipool starts before the end of FIX->INSN then this FIX
8545 can not be placed into the current pool. Furthermore, adding the
8546 new constant pool entry may cause the pool to start FIX_SIZE bytes
8548 if (minipool_vector_head &&
8549 (fix->address + get_attr_length (fix->insn)
8550 >= minipool_vector_head->max_address - fix->fix_size))
8553 /* Scan the pool to see if a constant with the same value has
8554 already been added. While we are doing this, also note the
8555 location where we must insert the constant if it doesn't already
8557 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8559 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8560 && fix->mode == mp->mode
8561 && (GET_CODE (fix->value) != CODE_LABEL
8562 || (CODE_LABEL_NUMBER (fix->value)
8563 == CODE_LABEL_NUMBER (mp->value)))
8564 && rtx_equal_p (fix->value, mp->value))
8566 /* More than one fix references this entry. */
8568 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8571 /* Note the insertion point if necessary. */
8573 && mp->max_address > max_address)
8576 /* If we are inserting an 8-bytes aligned quantity and
8577 we have not already found an insertion point, then
8578 make sure that all such 8-byte aligned quantities are
8579 placed at the start of the pool. */
8580 if (ARM_DOUBLEWORD_ALIGN
8582 && fix->fix_size >= 8
8583 && mp->fix_size < 8)
8586 max_address = mp->max_address;
8590 /* The value is not currently in the minipool, so we need to create
8591 a new entry for it. If MAX_MP is NULL, the entry will be put on
8592 the end of the list since the placement is less constrained than
8593 any existing entry. Otherwise, we insert the new fix before
8594 MAX_MP and, if necessary, adjust the constraints on the other
8597 mp->fix_size = fix->fix_size;
8598 mp->mode = fix->mode;
8599 mp->value = fix->value;
8601 /* Not yet required for a backwards ref. */
8602 mp->min_address = -65536;
8606 mp->max_address = max_address;
8608 mp->prev = minipool_vector_tail;
8610 if (mp->prev == NULL)
8612 minipool_vector_head = mp;
8613 minipool_vector_label = gen_label_rtx ();
8616 mp->prev->next = mp;
8618 minipool_vector_tail = mp;
8622 if (max_address > max_mp->max_address - mp->fix_size)
8623 mp->max_address = max_mp->max_address - mp->fix_size;
8625 mp->max_address = max_address;
8628 mp->prev = max_mp->prev;
8630 if (mp->prev != NULL)
8631 mp->prev->next = mp;
8633 minipool_vector_head = mp;
8636 /* Save the new entry. */
8639 /* Scan over the preceding entries and adjust their addresses as
8641 while (mp->prev != NULL
8642 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8644 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8652 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8653 HOST_WIDE_INT min_address)
8655 HOST_WIDE_INT offset;
8657 /* The code below assumes these are different. */
8658 gcc_assert (mp != min_mp);
8662 if (min_address > mp->min_address)
8663 mp->min_address = min_address;
8667 /* We will adjust this below if it is too loose. */
8668 mp->min_address = min_address;
8670 /* Unlink MP from its current position. Since min_mp is non-null,
8671 mp->next must be non-null. */
8672 mp->next->prev = mp->prev;
8673 if (mp->prev != NULL)
8674 mp->prev->next = mp->next;
8676 minipool_vector_head = mp->next;
8678 /* Reinsert it after MIN_MP. */
8680 mp->next = min_mp->next;
8682 if (mp->next != NULL)
8683 mp->next->prev = mp;
8685 minipool_vector_tail = mp;
8691 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8693 mp->offset = offset;
8694 if (mp->refcount > 0)
8695 offset += mp->fix_size;
8697 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8698 mp->next->min_address = mp->min_address + mp->fix_size;
8704 /* Add a constant to the minipool for a backward reference. Returns the
8705 node added or NULL if the constant will not fit in this pool.
8707 Note that the code for insertion for a backwards reference can be
8708 somewhat confusing because the calculated offsets for each fix do
8709 not take into account the size of the pool (which is still under
8712 add_minipool_backward_ref (Mfix *fix)
8714 /* If set, min_mp is the last pool_entry that has a lower constraint
8715 than the one we are trying to add. */
8716 Mnode *min_mp = NULL;
8717 /* This can be negative, since it is only a constraint. */
8718 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8721 /* If we can't reach the current pool from this insn, or if we can't
8722 insert this entry at the end of the pool without pushing other
8723 fixes out of range, then we don't try. This ensures that we
8724 can't fail later on. */
8725 if (min_address >= minipool_barrier->address
8726 || (minipool_vector_tail->min_address + fix->fix_size
8727 >= minipool_barrier->address))
8730 /* Scan the pool to see if a constant with the same value has
8731 already been added. While we are doing this, also note the
8732 location where we must insert the constant if it doesn't already
8734 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8736 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8737 && fix->mode == mp->mode
8738 && (GET_CODE (fix->value) != CODE_LABEL
8739 || (CODE_LABEL_NUMBER (fix->value)
8740 == CODE_LABEL_NUMBER (mp->value)))
8741 && rtx_equal_p (fix->value, mp->value)
8742 /* Check that there is enough slack to move this entry to the
8743 end of the table (this is conservative). */
8745 > (minipool_barrier->address
8746 + minipool_vector_tail->offset
8747 + minipool_vector_tail->fix_size)))
8750 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8754 mp->min_address += fix->fix_size;
8757 /* Note the insertion point if necessary. */
8758 if (mp->min_address < min_address)
8760 /* For now, we do not allow the insertion of 8-byte alignment
8761 requiring nodes anywhere but at the start of the pool. */
8762 if (ARM_DOUBLEWORD_ALIGN
8763 && fix->fix_size >= 8 && mp->fix_size < 8)
8768 else if (mp->max_address
8769 < minipool_barrier->address + mp->offset + fix->fix_size)
8771 /* Inserting before this entry would push the fix beyond
8772 its maximum address (which can happen if we have
8773 re-located a forwards fix); force the new fix to come
8776 min_address = mp->min_address + fix->fix_size;
8778 /* If we are inserting an 8-bytes aligned quantity and
8779 we have not already found an insertion point, then
8780 make sure that all such 8-byte aligned quantities are
8781 placed at the start of the pool. */
8782 else if (ARM_DOUBLEWORD_ALIGN
8784 && fix->fix_size >= 8
8785 && mp->fix_size < 8)
8788 min_address = mp->min_address + fix->fix_size;
8793 /* We need to create a new entry. */
8795 mp->fix_size = fix->fix_size;
8796 mp->mode = fix->mode;
8797 mp->value = fix->value;
8799 mp->max_address = minipool_barrier->address + 65536;
8801 mp->min_address = min_address;
8806 mp->next = minipool_vector_head;
8808 if (mp->next == NULL)
8810 minipool_vector_tail = mp;
8811 minipool_vector_label = gen_label_rtx ();
8814 mp->next->prev = mp;
8816 minipool_vector_head = mp;
8820 mp->next = min_mp->next;
8824 if (mp->next != NULL)
8825 mp->next->prev = mp;
8827 minipool_vector_tail = mp;
8830 /* Save the new entry. */
8838 /* Scan over the following entries and adjust their offsets. */
8839 while (mp->next != NULL)
8841 if (mp->next->min_address < mp->min_address + mp->fix_size)
8842 mp->next->min_address = mp->min_address + mp->fix_size;
8845 mp->next->offset = mp->offset + mp->fix_size;
8847 mp->next->offset = mp->offset;
8856 assign_minipool_offsets (Mfix *barrier)
8858 HOST_WIDE_INT offset = 0;
8861 minipool_barrier = barrier;
8863 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8865 mp->offset = offset;
8867 if (mp->refcount > 0)
8868 offset += mp->fix_size;
8872 /* Output the literal table */
8874 dump_minipool (rtx scan)
8880 if (ARM_DOUBLEWORD_ALIGN)
8881 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8882 if (mp->refcount > 0 && mp->fix_size >= 8)
8890 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8891 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8893 scan = emit_label_after (gen_label_rtx (), scan);
8894 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8895 scan = emit_label_after (minipool_vector_label, scan);
8897 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8899 if (mp->refcount > 0)
8904 ";; Offset %u, min %ld, max %ld ",
8905 (unsigned) mp->offset, (unsigned long) mp->min_address,
8906 (unsigned long) mp->max_address);
8907 arm_print_value (dump_file, mp->value);
8908 fputc ('\n', dump_file);
8911 switch (mp->fix_size)
8913 #ifdef HAVE_consttable_1
8915 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8919 #ifdef HAVE_consttable_2
8921 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8925 #ifdef HAVE_consttable_4
8927 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8931 #ifdef HAVE_consttable_8
8933 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8937 #ifdef HAVE_consttable_16
8939 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8952 minipool_vector_head = minipool_vector_tail = NULL;
8953 scan = emit_insn_after (gen_consttable_end (), scan);
8954 scan = emit_barrier_after (scan);
8957 /* Return the cost of forcibly inserting a barrier after INSN. */
8959 arm_barrier_cost (rtx insn)
8961 /* Basing the location of the pool on the loop depth is preferable,
8962 but at the moment, the basic block information seems to be
8963 corrupt by this stage of the compilation. */
8965 rtx next = next_nonnote_insn (insn);
8967 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8970 switch (GET_CODE (insn))
8973 /* It will always be better to place the table before the label, rather
8982 return base_cost - 10;
8985 return base_cost + 10;
8989 /* Find the best place in the insn stream in the range
8990 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8991 Create the barrier by inserting a jump and add a new fix entry for
8994 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8996 HOST_WIDE_INT count = 0;
8998 rtx from = fix->insn;
8999 /* The instruction after which we will insert the jump. */
9000 rtx selected = NULL;
9002 /* The address at which the jump instruction will be placed. */
9003 HOST_WIDE_INT selected_address;
9005 HOST_WIDE_INT max_count = max_address - fix->address;
9006 rtx label = gen_label_rtx ();
9008 selected_cost = arm_barrier_cost (from);
9009 selected_address = fix->address;
9011 while (from && count < max_count)
9016 /* This code shouldn't have been called if there was a natural barrier
9018 gcc_assert (GET_CODE (from) != BARRIER);
9020 /* Count the length of this insn. */
9021 count += get_attr_length (from);
9023 /* If there is a jump table, add its length. */
9024 tmp = is_jump_table (from);
9027 count += get_jump_table_size (tmp);
9029 /* Jump tables aren't in a basic block, so base the cost on
9030 the dispatch insn. If we select this location, we will
9031 still put the pool after the table. */
9032 new_cost = arm_barrier_cost (from);
9034 if (count < max_count
9035 && (!selected || new_cost <= selected_cost))
9038 selected_cost = new_cost;
9039 selected_address = fix->address + count;
9042 /* Continue after the dispatch table. */
9043 from = NEXT_INSN (tmp);
9047 new_cost = arm_barrier_cost (from);
9049 if (count < max_count
9050 && (!selected || new_cost <= selected_cost))
9053 selected_cost = new_cost;
9054 selected_address = fix->address + count;
9057 from = NEXT_INSN (from);
9060 /* Make sure that we found a place to insert the jump. */
9061 gcc_assert (selected);
9063 /* Create a new JUMP_INSN that branches around a barrier. */
9064 from = emit_jump_insn_after (gen_jump (label), selected);
9065 JUMP_LABEL (from) = label;
9066 barrier = emit_barrier_after (from);
9067 emit_label_after (label, barrier);
9069 /* Create a minipool barrier entry for the new barrier. */
9070 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9071 new_fix->insn = barrier;
9072 new_fix->address = selected_address;
9073 new_fix->next = fix->next;
9074 fix->next = new_fix;
9079 /* Record that there is a natural barrier in the insn stream at
9082 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9084 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9087 fix->address = address;
9090 if (minipool_fix_head != NULL)
9091 minipool_fix_tail->next = fix;
9093 minipool_fix_head = fix;
9095 minipool_fix_tail = fix;
9098 /* Record INSN, which will need fixing up to load a value from the
9099 minipool. ADDRESS is the offset of the insn since the start of the
9100 function; LOC is a pointer to the part of the insn which requires
9101 fixing; VALUE is the constant that must be loaded, which is of type
9104 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9105 enum machine_mode mode, rtx value)
9107 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9109 #ifdef AOF_ASSEMBLER
9110 /* PIC symbol references need to be converted into offsets into the
9112 /* XXX This shouldn't be done here. */
9113 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
9114 value = aof_pic_entry (value);
9115 #endif /* AOF_ASSEMBLER */
9118 fix->address = address;
9121 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9123 fix->forwards = get_attr_pool_range (insn);
9124 fix->backwards = get_attr_neg_pool_range (insn);
9125 fix->minipool = NULL;
9127 /* If an insn doesn't have a range defined for it, then it isn't
9128 expecting to be reworked by this code. Better to stop now than
9129 to generate duff assembly code. */
9130 gcc_assert (fix->forwards || fix->backwards);
9132 /* If an entry requires 8-byte alignment then assume all constant pools
9133 require 4 bytes of padding. Trying to do this later on a per-pool
9134 basis is awkward because existing pool entries have to be modified. */
9135 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9141 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9142 GET_MODE_NAME (mode),
9143 INSN_UID (insn), (unsigned long) address,
9144 -1 * (long)fix->backwards, (long)fix->forwards);
9145 arm_print_value (dump_file, fix->value);
9146 fprintf (dump_file, "\n");
9149 /* Add it to the chain of fixes. */
9152 if (minipool_fix_head != NULL)
9153 minipool_fix_tail->next = fix;
9155 minipool_fix_head = fix;
9157 minipool_fix_tail = fix;
9160 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9161 Returns the number of insns needed, or 99 if we don't know how to
9164 arm_const_double_inline_cost (rtx val)
9166 rtx lowpart, highpart;
9167 enum machine_mode mode;
9169 mode = GET_MODE (val);
9171 if (mode == VOIDmode)
9174 gcc_assert (GET_MODE_SIZE (mode) == 8);
9176 lowpart = gen_lowpart (SImode, val);
9177 highpart = gen_highpart_mode (SImode, mode, val);
9179 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9180 gcc_assert (GET_CODE (highpart) == CONST_INT);
9182 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9183 NULL_RTX, NULL_RTX, 0, 0)
9184 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9185 NULL_RTX, NULL_RTX, 0, 0));
9188 /* Return true if it is worthwhile to split a 64-bit constant into two
9189 32-bit operations. This is the case if optimizing for size, or
9190 if we have load delay slots, or if one 32-bit part can be done with
9191 a single data operation. */
9193 arm_const_double_by_parts (rtx val)
9195 enum machine_mode mode = GET_MODE (val);
9198 if (optimize_size || arm_ld_sched)
9201 if (mode == VOIDmode)
9204 part = gen_highpart_mode (SImode, mode, val);
9206 gcc_assert (GET_CODE (part) == CONST_INT);
9208 if (const_ok_for_arm (INTVAL (part))
9209 || const_ok_for_arm (~INTVAL (part)))
9212 part = gen_lowpart (SImode, val);
9214 gcc_assert (GET_CODE (part) == CONST_INT);
9216 if (const_ok_for_arm (INTVAL (part))
9217 || const_ok_for_arm (~INTVAL (part)))
9223 /* Scan INSN and note any of its operands that need fixing.
9224 If DO_PUSHES is false we do not actually push any of the fixups
9225 needed. The function returns TRUE if any fixups were needed/pushed.
9226 This is used by arm_memory_load_p() which needs to know about loads
9227 of constants that will be converted into minipool loads. */
9229 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9231 bool result = false;
9234 extract_insn (insn);
9236 if (!constrain_operands (1))
9237 fatal_insn_not_found (insn);
9239 if (recog_data.n_alternatives == 0)
9242 /* Fill in recog_op_alt with information about the constraints of
9244 preprocess_constraints ();
9246 for (opno = 0; opno < recog_data.n_operands; opno++)
9248 /* Things we need to fix can only occur in inputs. */
9249 if (recog_data.operand_type[opno] != OP_IN)
9252 /* If this alternative is a memory reference, then any mention
9253 of constants in this alternative is really to fool reload
9254 into allowing us to accept one there. We need to fix them up
9255 now so that we output the right code. */
9256 if (recog_op_alt[opno][which_alternative].memory_ok)
9258 rtx op = recog_data.operand[opno];
9260 if (CONSTANT_P (op))
9263 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9264 recog_data.operand_mode[opno], op);
9267 else if (GET_CODE (op) == MEM
9268 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9269 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9273 rtx cop = avoid_constant_pool_reference (op);
9275 /* Casting the address of something to a mode narrower
9276 than a word can cause avoid_constant_pool_reference()
9277 to return the pool reference itself. That's no good to
9278 us here. Lets just hope that we can use the
9279 constant pool value directly. */
9281 cop = get_pool_constant (XEXP (op, 0));
9283 push_minipool_fix (insn, address,
9284 recog_data.operand_loc[opno],
9285 recog_data.operand_mode[opno], cop);
9296 /* Gcc puts the pool in the wrong place for ARM, since we can only
9297 load addresses a limited distance around the pc. We do some
9298 special munging to move the constant pool values to the correct
9299 point in the code. */
9304 HOST_WIDE_INT address = 0;
9307 minipool_fix_head = minipool_fix_tail = NULL;
9309 /* The first insn must always be a note, or the code below won't
9310 scan it properly. */
9311 insn = get_insns ();
9312 gcc_assert (GET_CODE (insn) == NOTE);
9315 /* Scan all the insns and record the operands that will need fixing. */
9316 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9318 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9319 && (arm_cirrus_insn_p (insn)
9320 || GET_CODE (insn) == JUMP_INSN
9321 || arm_memory_load_p (insn)))
9322 cirrus_reorg (insn);
9324 if (GET_CODE (insn) == BARRIER)
9325 push_minipool_barrier (insn, address);
9326 else if (INSN_P (insn))
9330 note_invalid_constants (insn, address, true);
9331 address += get_attr_length (insn);
9333 /* If the insn is a vector jump, add the size of the table
9334 and skip the table. */
9335 if ((table = is_jump_table (insn)) != NULL)
9337 address += get_jump_table_size (table);
9343 fix = minipool_fix_head;
9345 /* Now scan the fixups and perform the required changes. */
9350 Mfix * last_added_fix;
9351 Mfix * last_barrier = NULL;
9354 /* Skip any further barriers before the next fix. */
9355 while (fix && GET_CODE (fix->insn) == BARRIER)
9358 /* No more fixes. */
9362 last_added_fix = NULL;
9364 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9366 if (GET_CODE (ftmp->insn) == BARRIER)
9368 if (ftmp->address >= minipool_vector_head->max_address)
9371 last_barrier = ftmp;
9373 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9376 last_added_fix = ftmp; /* Keep track of the last fix added. */
9379 /* If we found a barrier, drop back to that; any fixes that we
9380 could have reached but come after the barrier will now go in
9381 the next mini-pool. */
9382 if (last_barrier != NULL)
9384 /* Reduce the refcount for those fixes that won't go into this
9386 for (fdel = last_barrier->next;
9387 fdel && fdel != ftmp;
9390 fdel->minipool->refcount--;
9391 fdel->minipool = NULL;
9394 ftmp = last_barrier;
9398 /* ftmp is first fix that we can't fit into this pool and
9399 there no natural barriers that we could use. Insert a
9400 new barrier in the code somewhere between the previous
9401 fix and this one, and arrange to jump around it. */
9402 HOST_WIDE_INT max_address;
9404 /* The last item on the list of fixes must be a barrier, so
9405 we can never run off the end of the list of fixes without
9406 last_barrier being set. */
9409 max_address = minipool_vector_head->max_address;
9410 /* Check that there isn't another fix that is in range that
9411 we couldn't fit into this pool because the pool was
9412 already too large: we need to put the pool before such an
9413 instruction. The pool itself may come just after the
9414 fix because create_fix_barrier also allows space for a
9415 jump instruction. */
9416 if (ftmp->address < max_address)
9417 max_address = ftmp->address + 1;
9419 last_barrier = create_fix_barrier (last_added_fix, max_address);
9422 assign_minipool_offsets (last_barrier);
9426 if (GET_CODE (ftmp->insn) != BARRIER
9427 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9434 /* Scan over the fixes we have identified for this pool, fixing them
9435 up and adding the constants to the pool itself. */
9436 for (this_fix = fix; this_fix && ftmp != this_fix;
9437 this_fix = this_fix->next)
9438 if (GET_CODE (this_fix->insn) != BARRIER)
9441 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9442 minipool_vector_label),
9443 this_fix->minipool->offset);
9444 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9447 dump_minipool (last_barrier->insn);
9451 /* From now on we must synthesize any constants that we can't handle
9452 directly. This can happen if the RTL gets split during final
9453 instruction generation. */
9454 after_arm_reorg = 1;
9456 /* Free the minipool memory. */
9457 obstack_free (&minipool_obstack, minipool_startobj);
9460 /* Routines to output assembly language. */
9462 /* If the rtx is the correct value then return the string of the number.
9463 In this way we can ensure that valid double constants are generated even
9464 when cross compiling. */
9466 fp_immediate_constant (rtx x)
9471 if (!fp_consts_inited)
9474 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9475 for (i = 0; i < 8; i++)
9476 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9477 return strings_fp[i];
9482 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9484 fp_const_from_val (REAL_VALUE_TYPE *r)
9488 if (!fp_consts_inited)
9491 for (i = 0; i < 8; i++)
9492 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9493 return strings_fp[i];
9498 /* Output the operands of a LDM/STM instruction to STREAM.
9499 MASK is the ARM register set mask of which only bits 0-15 are important.
9500 REG is the base register, either the frame pointer or the stack pointer,
9501 INSTR is the possibly suffixed load or store instruction.
9502 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9505 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9506 unsigned long mask, int rfe)
9509 bool not_first = FALSE;
9511 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9512 fputc ('\t', stream);
9513 asm_fprintf (stream, instr, reg);
9514 fputc ('{', stream);
9516 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9517 if (mask & (1 << i))
9520 fprintf (stream, ", ");
9522 asm_fprintf (stream, "%r", i);
9527 fprintf (stream, "}^\n");
9529 fprintf (stream, "}\n");
9533 /* Output a FLDMD instruction to STREAM.
9534 BASE if the register containing the address.
9535 REG and COUNT specify the register range.
9536 Extra registers may be added to avoid hardware bugs.
9538 We output FLDMD even for ARMv5 VFP implementations. Although
9539 FLDMD is technically not supported until ARMv6, it is believed
9540 that all VFP implementations support its use in this context. */
9543 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9547 /* Workaround ARM10 VFPr1 bug. */
9548 if (count == 2 && !arm_arch6)
9555 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9556 load into multiple parts if we have to handle more than 16 registers. */
9559 vfp_output_fldmd (stream, base, reg, 16);
9560 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9564 fputc ('\t', stream);
9565 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9567 for (i = reg; i < reg + count; i++)
9570 fputs (", ", stream);
9571 asm_fprintf (stream, "d%d", i);
9573 fputs ("}\n", stream);
9578 /* Output the assembly for a store multiple. */
9581 vfp_output_fstmd (rtx * operands)
9588 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9589 p = strlen (pattern);
9591 gcc_assert (GET_CODE (operands[1]) == REG);
9593 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9594 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9596 p += sprintf (&pattern[p], ", d%d", base + i);
9598 strcpy (&pattern[p], "}");
9600 output_asm_insn (pattern, operands);
9605 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9606 number of bytes pushed. */
9609 vfp_emit_fstmd (int base_reg, int count)
9616 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9617 register pairs are stored by a store multiple insn. We avoid this
9618 by pushing an extra pair. */
9619 if (count == 2 && !arm_arch6)
9621 if (base_reg == LAST_VFP_REGNUM - 3)
9626 /* FSTMD may not store more than 16 doubleword registers at once. Split
9627 larger stores into multiple parts (up to a maximum of two, in
9632 /* NOTE: base_reg is an internal register number, so each D register
9634 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9635 saved += vfp_emit_fstmd (base_reg, 16);
9639 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9640 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9642 reg = gen_rtx_REG (DFmode, base_reg);
9646 = gen_rtx_SET (VOIDmode,
9647 gen_frame_mem (BLKmode,
9648 gen_rtx_PRE_DEC (BLKmode,
9649 stack_pointer_rtx)),
9650 gen_rtx_UNSPEC (BLKmode,
9654 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9655 plus_constant (stack_pointer_rtx, -(count * 8)));
9656 RTX_FRAME_RELATED_P (tmp) = 1;
9657 XVECEXP (dwarf, 0, 0) = tmp;
9659 tmp = gen_rtx_SET (VOIDmode,
9660 gen_frame_mem (DFmode, stack_pointer_rtx),
9662 RTX_FRAME_RELATED_P (tmp) = 1;
9663 XVECEXP (dwarf, 0, 1) = tmp;
9665 for (i = 1; i < count; i++)
9667 reg = gen_rtx_REG (DFmode, base_reg);
9669 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9671 tmp = gen_rtx_SET (VOIDmode,
9672 gen_frame_mem (DFmode,
9673 plus_constant (stack_pointer_rtx,
9676 RTX_FRAME_RELATED_P (tmp) = 1;
9677 XVECEXP (dwarf, 0, i + 1) = tmp;
9680 par = emit_insn (par);
9681 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9683 RTX_FRAME_RELATED_P (par) = 1;
9688 /* Emit a call instruction with pattern PAT. ADDR is the address of
9692 arm_emit_call_insn (rtx pat, rtx addr)
9696 insn = emit_call_insn (pat);
9698 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9699 If the call might use such an entry, add a use of the PIC register
9700 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9701 if (TARGET_VXWORKS_RTP
9703 && GET_CODE (addr) == SYMBOL_REF
9704 && (SYMBOL_REF_DECL (addr)
9705 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9706 : !SYMBOL_REF_LOCAL_P (addr)))
9708 require_pic_register ();
9709 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9713 /* Output a 'call' insn. */
9715 output_call (rtx *operands)
9717 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9719 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9720 if (REGNO (operands[0]) == LR_REGNUM)
9722 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9723 output_asm_insn ("mov%?\t%0, %|lr", operands);
9726 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9728 if (TARGET_INTERWORK || arm_arch4t)
9729 output_asm_insn ("bx%?\t%0", operands);
9731 output_asm_insn ("mov%?\t%|pc, %0", operands);
9736 /* Output a 'call' insn that is a reference in memory. */
9738 output_call_mem (rtx *operands)
9740 if (TARGET_INTERWORK && !arm_arch5)
9742 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9743 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9744 output_asm_insn ("bx%?\t%|ip", operands);
9746 else if (regno_use_in (LR_REGNUM, operands[0]))
9748 /* LR is used in the memory address. We load the address in the
9749 first instruction. It's safe to use IP as the target of the
9750 load since the call will kill it anyway. */
9751 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9753 output_asm_insn ("blx%?\t%|ip", operands);
9756 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9758 output_asm_insn ("bx%?\t%|ip", operands);
9760 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9765 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9766 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9773 /* Output a move from arm registers to an fpa registers.
9774 OPERANDS[0] is an fpa register.
9775 OPERANDS[1] is the first registers of an arm register pair. */
9777 output_mov_long_double_fpa_from_arm (rtx *operands)
9779 int arm_reg0 = REGNO (operands[1]);
9782 gcc_assert (arm_reg0 != IP_REGNUM);
9784 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9785 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9786 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9788 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9789 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9794 /* Output a move from an fpa register to arm registers.
9795 OPERANDS[0] is the first registers of an arm register pair.
9796 OPERANDS[1] is an fpa register. */
9798 output_mov_long_double_arm_from_fpa (rtx *operands)
9800 int arm_reg0 = REGNO (operands[0]);
9803 gcc_assert (arm_reg0 != IP_REGNUM);
9805 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9806 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9807 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9809 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9810 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9814 /* Output a move from arm registers to arm registers of a long double
9815 OPERANDS[0] is the destination.
9816 OPERANDS[1] is the source. */
9818 output_mov_long_double_arm_from_arm (rtx *operands)
9820 /* We have to be careful here because the two might overlap. */
9821 int dest_start = REGNO (operands[0]);
9822 int src_start = REGNO (operands[1]);
9826 if (dest_start < src_start)
9828 for (i = 0; i < 3; i++)
9830 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9831 ops[1] = gen_rtx_REG (SImode, src_start + i);
9832 output_asm_insn ("mov%?\t%0, %1", ops);
9837 for (i = 2; i >= 0; i--)
9839 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9840 ops[1] = gen_rtx_REG (SImode, src_start + i);
9841 output_asm_insn ("mov%?\t%0, %1", ops);
9849 /* Output a move from arm registers to an fpa registers.
9850 OPERANDS[0] is an fpa register.
9851 OPERANDS[1] is the first registers of an arm register pair. */
9853 output_mov_double_fpa_from_arm (rtx *operands)
9855 int arm_reg0 = REGNO (operands[1]);
9858 gcc_assert (arm_reg0 != IP_REGNUM);
9860 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9861 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9862 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9863 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9867 /* Output a move from an fpa register to arm registers.
9868 OPERANDS[0] is the first registers of an arm register pair.
9869 OPERANDS[1] is an fpa register. */
9871 output_mov_double_arm_from_fpa (rtx *operands)
9873 int arm_reg0 = REGNO (operands[0]);
9876 gcc_assert (arm_reg0 != IP_REGNUM);
9878 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9879 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9880 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9881 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9885 /* Output a move between double words.
9886 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9887 or MEM<-REG and all MEMs must be offsettable addresses. */
9889 output_move_double (rtx *operands)
9891 enum rtx_code code0 = GET_CODE (operands[0]);
9892 enum rtx_code code1 = GET_CODE (operands[1]);
9897 int reg0 = REGNO (operands[0]);
9899 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9901 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9903 switch (GET_CODE (XEXP (operands[1], 0)))
9906 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9910 gcc_assert (TARGET_LDRD);
9911 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9916 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9918 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9922 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9926 gcc_assert (TARGET_LDRD);
9927 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9932 otherops[0] = operands[0];
9933 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9934 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9936 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9938 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9940 /* Registers overlap so split out the increment. */
9941 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9942 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9946 /* IWMMXT allows offsets larger than ldrd can handle,
9947 fix these up with a pair of ldr. */
9948 if (GET_CODE (otherops[2]) == CONST_INT
9949 && (INTVAL(otherops[2]) <= -256
9950 || INTVAL(otherops[2]) >= 256))
9952 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9953 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9954 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9957 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9962 /* IWMMXT allows offsets larger than ldrd can handle,
9963 fix these up with a pair of ldr. */
9964 if (GET_CODE (otherops[2]) == CONST_INT
9965 && (INTVAL(otherops[2]) <= -256
9966 || INTVAL(otherops[2]) >= 256))
9968 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9969 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9970 otherops[0] = operands[0];
9971 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9974 /* We only allow constant increments, so this is safe. */
9975 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9981 output_asm_insn ("adr%?\t%0, %1", operands);
9982 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9985 /* ??? This needs checking for thumb2. */
9987 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9988 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9990 otherops[0] = operands[0];
9991 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9992 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9994 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9996 if (GET_CODE (otherops[2]) == CONST_INT)
9998 switch ((int) INTVAL (otherops[2]))
10001 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10006 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10011 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10016 && (GET_CODE (otherops[2]) == REG
10017 || (GET_CODE (otherops[2]) == CONST_INT
10018 && INTVAL (otherops[2]) > -256
10019 && INTVAL (otherops[2]) < 256)))
10021 if (reg_overlap_mentioned_p (otherops[0],
10024 /* Swap base and index registers over to
10025 avoid a conflict. */
10026 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
10027 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
10029 /* If both registers conflict, it will usually
10030 have been fixed by a splitter. */
10031 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10033 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10034 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10038 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10042 if (GET_CODE (otherops[2]) == CONST_INT)
10044 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10045 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10047 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10050 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10053 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10055 return "ldm%(ia%)\t%0, %M0";
10059 otherops[1] = adjust_address (operands[1], SImode, 4);
10060 /* Take care of overlapping base/data reg. */
10061 if (reg_mentioned_p (operands[0], operands[1]))
10063 output_asm_insn ("ldr%?\t%0, %1", otherops);
10064 output_asm_insn ("ldr%?\t%0, %1", operands);
10068 output_asm_insn ("ldr%?\t%0, %1", operands);
10069 output_asm_insn ("ldr%?\t%0, %1", otherops);
10076 /* Constraints should ensure this. */
10077 gcc_assert (code0 == MEM && code1 == REG);
10078 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10080 switch (GET_CODE (XEXP (operands[0], 0)))
10083 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10087 gcc_assert (TARGET_LDRD);
10088 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10093 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10095 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10099 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10103 gcc_assert (TARGET_LDRD);
10104 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10109 otherops[0] = operands[1];
10110 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10111 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10113 /* IWMMXT allows offsets larger than ldrd can handle,
10114 fix these up with a pair of ldr. */
10115 if (GET_CODE (otherops[2]) == CONST_INT
10116 && (INTVAL(otherops[2]) <= -256
10117 || INTVAL(otherops[2]) >= 256))
10120 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10121 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10123 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10124 otherops[0] = reg1;
10125 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10129 otherops[0] = reg1;
10130 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10131 otherops[0] = operands[1];
10132 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10135 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10136 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10138 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10142 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10143 if (GET_CODE (otherops[2]) == CONST_INT)
10145 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10148 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10154 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10160 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10165 && (GET_CODE (otherops[2]) == REG
10166 || (GET_CODE (otherops[2]) == CONST_INT
10167 && INTVAL (otherops[2]) > -256
10168 && INTVAL (otherops[2]) < 256)))
10170 otherops[0] = operands[1];
10171 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10172 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10178 otherops[0] = adjust_address (operands[0], SImode, 4);
10179 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10180 output_asm_insn ("str%?\t%1, %0", operands);
10181 output_asm_insn ("str%?\t%1, %0", otherops);
10188 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10189 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10192 output_move_quad (rtx *operands)
10194 if (REG_P (operands[0]))
10196 /* Load, or reg->reg move. */
10198 if (MEM_P (operands[1]))
10200 switch (GET_CODE (XEXP (operands[1], 0)))
10203 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10208 output_asm_insn ("adr%?\t%0, %1", operands);
10209 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10213 gcc_unreachable ();
10221 gcc_assert (REG_P (operands[1]));
10223 dest = REGNO (operands[0]);
10224 src = REGNO (operands[1]);
10226 /* This seems pretty dumb, but hopefully GCC won't try to do it
10229 for (i = 0; i < 4; i++)
10231 ops[0] = gen_rtx_REG (SImode, dest + i);
10232 ops[1] = gen_rtx_REG (SImode, src + i);
10233 output_asm_insn ("mov%?\t%0, %1", ops);
10236 for (i = 3; i >= 0; i--)
10238 ops[0] = gen_rtx_REG (SImode, dest + i);
10239 ops[1] = gen_rtx_REG (SImode, src + i);
10240 output_asm_insn ("mov%?\t%0, %1", ops);
10246 gcc_assert (MEM_P (operands[0]));
10247 gcc_assert (REG_P (operands[1]));
10248 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10250 switch (GET_CODE (XEXP (operands[0], 0)))
10253 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10257 gcc_unreachable ();
10264 /* Output a VFP load or store instruction. */
10267 output_move_vfp (rtx *operands)
10269 rtx reg, mem, addr, ops[2];
10270 int load = REG_P (operands[0]);
10271 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10272 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10273 const char *template;
10275 enum machine_mode mode;
10277 reg = operands[!load];
10278 mem = operands[load];
10280 mode = GET_MODE (reg);
10282 gcc_assert (REG_P (reg));
10283 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10284 gcc_assert (mode == SFmode
10288 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10289 gcc_assert (MEM_P (mem));
10291 addr = XEXP (mem, 0);
10293 switch (GET_CODE (addr))
10296 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10297 ops[0] = XEXP (addr, 0);
10302 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10303 ops[0] = XEXP (addr, 0);
10308 template = "f%s%c%%?\t%%%s0, %%1%s";
10314 sprintf (buff, template,
10315 load ? "ld" : "st",
10318 integer_p ? "\t%@ int" : "");
10319 output_asm_insn (buff, ops);
10324 /* Output a Neon quad-word load or store, or a load or store for
10325 larger structure modes. We could also support post-modify forms using
10326 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10328 WARNING: The ordering of elements in memory is weird in big-endian mode,
10329 because we use VSTM instead of VST1, to make it easy to make vector stores
10330 via ARM registers write values in the same order as stores direct from Neon
10331 registers. For example, the byte ordering of a quadword vector with 16-byte
10332 elements like this:
10334 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10336 will be (with lowest address first, h = most-significant byte,
10337 l = least-significant byte of element):
10339 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10340 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10342 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10345 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10347 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10348 layout will result as if VSTM/VLDM were used. */
10351 output_move_neon (rtx *operands)
10353 rtx reg, mem, addr, ops[2];
10354 int regno, load = REG_P (operands[0]);
10355 const char *template;
10357 enum machine_mode mode;
10359 reg = operands[!load];
10360 mem = operands[load];
10362 mode = GET_MODE (reg);
10364 gcc_assert (REG_P (reg));
10365 regno = REGNO (reg);
10366 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10367 || NEON_REGNO_OK_FOR_QUAD (regno));
10368 gcc_assert (VALID_NEON_DREG_MODE (mode)
10369 || VALID_NEON_QREG_MODE (mode)
10370 || VALID_NEON_STRUCT_MODE (mode));
10371 gcc_assert (MEM_P (mem));
10373 addr = XEXP (mem, 0);
10375 /* Strip off const from addresses like (const (plus (...))). */
10376 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10377 addr = XEXP (addr, 0);
10379 switch (GET_CODE (addr))
10382 template = "v%smia%%?\t%%0!, %%h1";
10383 ops[0] = XEXP (addr, 0);
10388 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10389 gcc_unreachable ();
10394 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10397 for (i = 0; i < nregs; i++)
10399 /* We're only using DImode here because it's a convenient size. */
10400 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10401 ops[1] = adjust_address (mem, SImode, 8 * i);
10402 if (reg_overlap_mentioned_p (ops[0], mem))
10404 gcc_assert (overlap == -1);
10409 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10410 output_asm_insn (buff, ops);
10415 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10416 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10417 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10418 output_asm_insn (buff, ops);
10425 template = "v%smia%%?\t%%m0, %%h1";
10430 sprintf (buff, template, load ? "ld" : "st");
10431 output_asm_insn (buff, ops);
10436 /* Output an ADD r, s, #n where n may be too big for one instruction.
10437 If adding zero to one register, output nothing. */
10439 output_add_immediate (rtx *operands)
10441 HOST_WIDE_INT n = INTVAL (operands[2]);
10443 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10446 output_multi_immediate (operands,
10447 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10450 output_multi_immediate (operands,
10451 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10458 /* Output a multiple immediate operation.
10459 OPERANDS is the vector of operands referred to in the output patterns.
10460 INSTR1 is the output pattern to use for the first constant.
10461 INSTR2 is the output pattern to use for subsequent constants.
10462 IMMED_OP is the index of the constant slot in OPERANDS.
10463 N is the constant value. */
10464 static const char *
10465 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10466 int immed_op, HOST_WIDE_INT n)
10468 #if HOST_BITS_PER_WIDE_INT > 32
10474 /* Quick and easy output. */
10475 operands[immed_op] = const0_rtx;
10476 output_asm_insn (instr1, operands);
10481 const char * instr = instr1;
10483 /* Note that n is never zero here (which would give no output). */
10484 for (i = 0; i < 32; i += 2)
10488 operands[immed_op] = GEN_INT (n & (255 << i));
10489 output_asm_insn (instr, operands);
10499 /* Return the name of a shifter operation. */
10500 static const char *
10501 arm_shift_nmem(enum rtx_code code)
10506 return ARM_LSL_NAME;
10522 /* Return the appropriate ARM instruction for the operation code.
10523 The returned result should not be overwritten. OP is the rtx of the
10524 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10527 arithmetic_instr (rtx op, int shift_first_arg)
10529 switch (GET_CODE (op))
10535 return shift_first_arg ? "rsb" : "sub";
10550 return arm_shift_nmem(GET_CODE(op));
10553 gcc_unreachable ();
10557 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10558 for the operation code. The returned result should not be overwritten.
10559 OP is the rtx code of the shift.
10560 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10562 static const char *
10563 shift_op (rtx op, HOST_WIDE_INT *amountp)
10566 enum rtx_code code = GET_CODE (op);
10568 switch (GET_CODE (XEXP (op, 1)))
10576 *amountp = INTVAL (XEXP (op, 1));
10580 gcc_unreachable ();
10586 gcc_assert (*amountp != -1);
10587 *amountp = 32 - *amountp;
10590 /* Fall through. */
10596 mnem = arm_shift_nmem(code);
10600 /* We never have to worry about the amount being other than a
10601 power of 2, since this case can never be reloaded from a reg. */
10602 gcc_assert (*amountp != -1);
10603 *amountp = int_log2 (*amountp);
10604 return ARM_LSL_NAME;
10607 gcc_unreachable ();
10610 if (*amountp != -1)
10612 /* This is not 100% correct, but follows from the desire to merge
10613 multiplication by a power of 2 with the recognizer for a
10614 shift. >=32 is not a valid shift for "lsl", so we must try and
10615 output a shift that produces the correct arithmetical result.
10616 Using lsr #32 is identical except for the fact that the carry bit
10617 is not set correctly if we set the flags; but we never use the
10618 carry bit from such an operation, so we can ignore that. */
10619 if (code == ROTATERT)
10620 /* Rotate is just modulo 32. */
10622 else if (*amountp != (*amountp & 31))
10624 if (code == ASHIFT)
10629 /* Shifts of 0 are no-ops. */
10637 /* Obtain the shift from the POWER of two. */
10639 static HOST_WIDE_INT
10640 int_log2 (HOST_WIDE_INT power)
10642 HOST_WIDE_INT shift = 0;
10644 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10646 gcc_assert (shift <= 31);
10653 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10654 because /bin/as is horribly restrictive. The judgement about
10655 whether or not each character is 'printable' (and can be output as
10656 is) or not (and must be printed with an octal escape) must be made
10657 with reference to the *host* character set -- the situation is
10658 similar to that discussed in the comments above pp_c_char in
10659 c-pretty-print.c. */
10661 #define MAX_ASCII_LEN 51
10664 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10667 int len_so_far = 0;
10669 fputs ("\t.ascii\t\"", stream);
10671 for (i = 0; i < len; i++)
10675 if (len_so_far >= MAX_ASCII_LEN)
10677 fputs ("\"\n\t.ascii\t\"", stream);
10683 if (c == '\\' || c == '\"')
10685 putc ('\\', stream);
10693 fprintf (stream, "\\%03o", c);
10698 fputs ("\"\n", stream);
10701 /* Compute the register save mask for registers 0 through 12
10702 inclusive. This code is used by arm_compute_save_reg_mask. */
10704 static unsigned long
10705 arm_compute_save_reg0_reg12_mask (void)
10707 unsigned long func_type = arm_current_func_type ();
10708 unsigned long save_reg_mask = 0;
10711 if (IS_INTERRUPT (func_type))
10713 unsigned int max_reg;
10714 /* Interrupt functions must not corrupt any registers,
10715 even call clobbered ones. If this is a leaf function
10716 we can just examine the registers used by the RTL, but
10717 otherwise we have to assume that whatever function is
10718 called might clobber anything, and so we have to save
10719 all the call-clobbered registers as well. */
10720 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10721 /* FIQ handlers have registers r8 - r12 banked, so
10722 we only need to check r0 - r7, Normal ISRs only
10723 bank r14 and r15, so we must check up to r12.
10724 r13 is the stack pointer which is always preserved,
10725 so we do not need to consider it here. */
10730 for (reg = 0; reg <= max_reg; reg++)
10731 if (df_regs_ever_live_p (reg)
10732 || (! current_function_is_leaf && call_used_regs[reg]))
10733 save_reg_mask |= (1 << reg);
10735 /* Also save the pic base register if necessary. */
10737 && !TARGET_SINGLE_PIC_BASE
10738 && arm_pic_register != INVALID_REGNUM
10739 && current_function_uses_pic_offset_table)
10740 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10744 /* In arm mode we handle r11 (FP) as a special case. */
10745 unsigned last_reg = TARGET_ARM ? 10 : 11;
10747 /* In the normal case we only need to save those registers
10748 which are call saved and which are used by this function. */
10749 for (reg = 0; reg <= last_reg; reg++)
10750 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10751 save_reg_mask |= (1 << reg);
10753 /* Handle the frame pointer as a special case. */
10754 if (! TARGET_APCS_FRAME
10755 && ! frame_pointer_needed
10756 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10757 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10758 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10759 else if (! TARGET_APCS_FRAME
10760 && ! frame_pointer_needed
10761 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10762 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10763 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10765 /* If we aren't loading the PIC register,
10766 don't stack it even though it may be live. */
10768 && !TARGET_SINGLE_PIC_BASE
10769 && arm_pic_register != INVALID_REGNUM
10770 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10771 || current_function_uses_pic_offset_table))
10772 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10774 /* The prologue will copy SP into R0, so save it. */
10775 if (IS_STACKALIGN (func_type))
10776 save_reg_mask |= 1;
10779 /* Save registers so the exception handler can modify them. */
10780 if (current_function_calls_eh_return)
10786 reg = EH_RETURN_DATA_REGNO (i);
10787 if (reg == INVALID_REGNUM)
10789 save_reg_mask |= 1 << reg;
10793 return save_reg_mask;
10797 /* Compute a bit mask of which registers need to be
10798 saved on the stack for the current function. */
10800 static unsigned long
10801 arm_compute_save_reg_mask (void)
10803 unsigned int save_reg_mask = 0;
10804 unsigned long func_type = arm_current_func_type ();
10807 if (IS_NAKED (func_type))
10808 /* This should never really happen. */
10811 /* If we are creating a stack frame, then we must save the frame pointer,
10812 IP (which will hold the old stack pointer), LR and the PC. */
10813 if (frame_pointer_needed && TARGET_ARM)
10815 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10818 | (1 << PC_REGNUM);
10820 /* Volatile functions do not return, so there
10821 is no need to save any other registers. */
10822 if (IS_VOLATILE (func_type))
10823 return save_reg_mask;
10825 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10827 /* Decide if we need to save the link register.
10828 Interrupt routines have their own banked link register,
10829 so they never need to save it.
10830 Otherwise if we do not use the link register we do not need to save
10831 it. If we are pushing other registers onto the stack however, we
10832 can save an instruction in the epilogue by pushing the link register
10833 now and then popping it back into the PC. This incurs extra memory
10834 accesses though, so we only do it when optimizing for size, and only
10835 if we know that we will not need a fancy return sequence. */
10836 if (df_regs_ever_live_p (LR_REGNUM)
10839 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10840 && !current_function_calls_eh_return))
10841 save_reg_mask |= 1 << LR_REGNUM;
10843 if (cfun->machine->lr_save_eliminated)
10844 save_reg_mask &= ~ (1 << LR_REGNUM);
10846 if (TARGET_REALLY_IWMMXT
10847 && ((bit_count (save_reg_mask)
10848 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10850 /* The total number of registers that are going to be pushed
10851 onto the stack is odd. We need to ensure that the stack
10852 is 64-bit aligned before we start to save iWMMXt registers,
10853 and also before we start to create locals. (A local variable
10854 might be a double or long long which we will load/store using
10855 an iWMMXt instruction). Therefore we need to push another
10856 ARM register, so that the stack will be 64-bit aligned. We
10857 try to avoid using the arg registers (r0 -r3) as they might be
10858 used to pass values in a tail call. */
10859 for (reg = 4; reg <= 12; reg++)
10860 if ((save_reg_mask & (1 << reg)) == 0)
10864 save_reg_mask |= (1 << reg);
10867 cfun->machine->sibcall_blocked = 1;
10868 save_reg_mask |= (1 << 3);
10872 /* We may need to push an additional register for use initializing the
10873 PIC base register. */
10874 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10875 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10877 reg = thumb_find_work_register (1 << 4);
10878 if (!call_used_regs[reg])
10879 save_reg_mask |= (1 << reg);
10882 return save_reg_mask;
10886 /* Compute a bit mask of which registers need to be
10887 saved on the stack for the current function. */
10888 static unsigned long
10889 thumb1_compute_save_reg_mask (void)
10891 unsigned long mask;
10895 for (reg = 0; reg < 12; reg ++)
10896 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10900 && !TARGET_SINGLE_PIC_BASE
10901 && arm_pic_register != INVALID_REGNUM
10902 && current_function_uses_pic_offset_table)
10903 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10905 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10906 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10907 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10909 /* LR will also be pushed if any lo regs are pushed. */
10910 if (mask & 0xff || thumb_force_lr_save ())
10911 mask |= (1 << LR_REGNUM);
10913 /* Make sure we have a low work register if we need one.
10914 We will need one if we are going to push a high register,
10915 but we are not currently intending to push a low register. */
10916 if ((mask & 0xff) == 0
10917 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10919 /* Use thumb_find_work_register to choose which register
10920 we will use. If the register is live then we will
10921 have to push it. Use LAST_LO_REGNUM as our fallback
10922 choice for the register to select. */
10923 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10924 /* Make sure the register returned by thumb_find_work_register is
10925 not part of the return value. */
10926 if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
10927 reg = LAST_LO_REGNUM;
10929 if (! call_used_regs[reg])
10937 /* Return the number of bytes required to save VFP registers. */
10939 arm_get_vfp_saved_size (void)
10941 unsigned int regno;
10946 /* Space for saved VFP registers. */
10947 if (TARGET_HARD_FLOAT && TARGET_VFP)
10950 for (regno = FIRST_VFP_REGNUM;
10951 regno < LAST_VFP_REGNUM;
10954 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10955 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10959 /* Workaround ARM10 VFPr1 bug. */
10960 if (count == 2 && !arm_arch6)
10962 saved += count * 8;
10971 if (count == 2 && !arm_arch6)
10973 saved += count * 8;
10980 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10981 everything bar the final return instruction. */
10983 output_return_instruction (rtx operand, int really_return, int reverse)
10985 char conditional[10];
10988 unsigned long live_regs_mask;
10989 unsigned long func_type;
10990 arm_stack_offsets *offsets;
10992 func_type = arm_current_func_type ();
10994 if (IS_NAKED (func_type))
10997 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10999 /* If this function was declared non-returning, and we have
11000 found a tail call, then we have to trust that the called
11001 function won't return. */
11006 /* Otherwise, trap an attempted return by aborting. */
11008 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11010 assemble_external_libcall (ops[1]);
11011 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11017 gcc_assert (!current_function_calls_alloca || really_return);
11019 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11021 return_used_this_function = 1;
11023 live_regs_mask = arm_compute_save_reg_mask ();
11025 if (live_regs_mask)
11027 const char * return_reg;
11029 /* If we do not have any special requirements for function exit
11030 (e.g. interworking) then we can load the return address
11031 directly into the PC. Otherwise we must load it into LR. */
11033 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11034 return_reg = reg_names[PC_REGNUM];
11036 return_reg = reg_names[LR_REGNUM];
11038 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11040 /* There are three possible reasons for the IP register
11041 being saved. 1) a stack frame was created, in which case
11042 IP contains the old stack pointer, or 2) an ISR routine
11043 corrupted it, or 3) it was saved to align the stack on
11044 iWMMXt. In case 1, restore IP into SP, otherwise just
11046 if (frame_pointer_needed)
11048 live_regs_mask &= ~ (1 << IP_REGNUM);
11049 live_regs_mask |= (1 << SP_REGNUM);
11052 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11055 /* On some ARM architectures it is faster to use LDR rather than
11056 LDM to load a single register. On other architectures, the
11057 cost is the same. In 26 bit mode, or for exception handlers,
11058 we have to use LDM to load the PC so that the CPSR is also
11060 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11061 if (live_regs_mask == (1U << reg))
11064 if (reg <= LAST_ARM_REGNUM
11065 && (reg != LR_REGNUM
11067 || ! IS_INTERRUPT (func_type)))
11069 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11070 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11077 /* Generate the load multiple instruction to restore the
11078 registers. Note we can get here, even if
11079 frame_pointer_needed is true, but only if sp already
11080 points to the base of the saved core registers. */
11081 if (live_regs_mask & (1 << SP_REGNUM))
11083 unsigned HOST_WIDE_INT stack_adjust;
11085 offsets = arm_get_frame_offsets ();
11086 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11087 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11089 if (stack_adjust && arm_arch5 && TARGET_ARM)
11090 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11093 /* If we can't use ldmib (SA110 bug),
11094 then try to pop r3 instead. */
11096 live_regs_mask |= 1 << 3;
11097 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11101 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11103 p = instr + strlen (instr);
11105 for (reg = 0; reg <= SP_REGNUM; reg++)
11106 if (live_regs_mask & (1 << reg))
11108 int l = strlen (reg_names[reg]);
11114 memcpy (p, ", ", 2);
11118 memcpy (p, "%|", 2);
11119 memcpy (p + 2, reg_names[reg], l);
11123 if (live_regs_mask & (1 << LR_REGNUM))
11125 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11126 /* If returning from an interrupt, restore the CPSR. */
11127 if (IS_INTERRUPT (func_type))
11134 output_asm_insn (instr, & operand);
11136 /* See if we need to generate an extra instruction to
11137 perform the actual function return. */
11139 && func_type != ARM_FT_INTERWORKED
11140 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11142 /* The return has already been handled
11143 by loading the LR into the PC. */
11150 switch ((int) ARM_FUNC_TYPE (func_type))
11154 /* ??? This is wrong for unified assembly syntax. */
11155 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11158 case ARM_FT_INTERWORKED:
11159 sprintf (instr, "bx%s\t%%|lr", conditional);
11162 case ARM_FT_EXCEPTION:
11163 /* ??? This is wrong for unified assembly syntax. */
11164 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11168 /* Use bx if it's available. */
11169 if (arm_arch5 || arm_arch4t)
11170 sprintf (instr, "bx%s\t%%|lr", conditional);
11172 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11176 output_asm_insn (instr, & operand);
11182 /* Write the function name into the code section, directly preceding
11183 the function prologue.
11185 Code will be output similar to this:
11187 .ascii "arm_poke_function_name", 0
11190 .word 0xff000000 + (t1 - t0)
11191 arm_poke_function_name
11193 stmfd sp!, {fp, ip, lr, pc}
11196 When performing a stack backtrace, code can inspect the value
11197 of 'pc' stored at 'fp' + 0. If the trace function then looks
11198 at location pc - 12 and the top 8 bits are set, then we know
11199 that there is a function name embedded immediately preceding this
11200 location and has length ((pc[-3]) & 0xff000000).
11202 We assume that pc is declared as a pointer to an unsigned long.
11204 It is of no benefit to output the function name if we are assembling
11205 a leaf function. These function types will not contain a stack
11206 backtrace structure, therefore it is not possible to determine the
11209 arm_poke_function_name (FILE *stream, const char *name)
11211 unsigned long alignlength;
11212 unsigned long length;
11215 length = strlen (name) + 1;
11216 alignlength = ROUND_UP_WORD (length);
11218 ASM_OUTPUT_ASCII (stream, name, length);
11219 ASM_OUTPUT_ALIGN (stream, 2);
11220 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11221 assemble_aligned_integer (UNITS_PER_WORD, x);
11224 /* Place some comments into the assembler stream
11225 describing the current function. */
11227 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11229 unsigned long func_type;
11233 thumb1_output_function_prologue (f, frame_size);
11237 /* Sanity check. */
11238 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11240 func_type = arm_current_func_type ();
11242 switch ((int) ARM_FUNC_TYPE (func_type))
11245 case ARM_FT_NORMAL:
11247 case ARM_FT_INTERWORKED:
11248 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11251 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11254 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11256 case ARM_FT_EXCEPTION:
11257 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11261 if (IS_NAKED (func_type))
11262 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11264 if (IS_VOLATILE (func_type))
11265 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11267 if (IS_NESTED (func_type))
11268 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11269 if (IS_STACKALIGN (func_type))
11270 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11272 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11273 current_function_args_size,
11274 current_function_pretend_args_size, frame_size);
11276 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11277 frame_pointer_needed,
11278 cfun->machine->uses_anonymous_args);
11280 if (cfun->machine->lr_save_eliminated)
11281 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11283 if (current_function_calls_eh_return)
11284 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11286 #ifdef AOF_ASSEMBLER
11288 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
11291 return_used_this_function = 0;
11295 arm_output_epilogue (rtx sibling)
11298 unsigned long saved_regs_mask;
11299 unsigned long func_type;
11300 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11301 frame that is $fp + 4 for a non-variadic function. */
11302 int floats_offset = 0;
11304 FILE * f = asm_out_file;
11305 unsigned int lrm_count = 0;
11306 int really_return = (sibling == NULL);
11308 arm_stack_offsets *offsets;
11310 /* If we have already generated the return instruction
11311 then it is futile to generate anything else. */
11312 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11315 func_type = arm_current_func_type ();
11317 if (IS_NAKED (func_type))
11318 /* Naked functions don't have epilogues. */
11321 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11325 /* A volatile function should never return. Call abort. */
11326 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11327 assemble_external_libcall (op);
11328 output_asm_insn ("bl\t%a0", &op);
11333 /* If we are throwing an exception, then we really must be doing a
11334 return, so we can't tail-call. */
11335 gcc_assert (!current_function_calls_eh_return || really_return);
11337 offsets = arm_get_frame_offsets ();
11338 saved_regs_mask = arm_compute_save_reg_mask ();
11341 lrm_count = bit_count (saved_regs_mask);
11343 floats_offset = offsets->saved_args;
11344 /* Compute how far away the floats will be. */
11345 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11346 if (saved_regs_mask & (1 << reg))
11347 floats_offset += 4;
11349 if (frame_pointer_needed && TARGET_ARM)
11351 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11352 int vfp_offset = offsets->frame;
11354 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11356 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11357 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11359 floats_offset += 12;
11360 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11361 reg, FP_REGNUM, floats_offset - vfp_offset);
11366 start_reg = LAST_FPA_REGNUM;
11368 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11370 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11372 floats_offset += 12;
11374 /* We can't unstack more than four registers at once. */
11375 if (start_reg - reg == 3)
11377 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11378 reg, FP_REGNUM, floats_offset - vfp_offset);
11379 start_reg = reg - 1;
11384 if (reg != start_reg)
11385 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11386 reg + 1, start_reg - reg,
11387 FP_REGNUM, floats_offset - vfp_offset);
11388 start_reg = reg - 1;
11392 /* Just in case the last register checked also needs unstacking. */
11393 if (reg != start_reg)
11394 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11395 reg + 1, start_reg - reg,
11396 FP_REGNUM, floats_offset - vfp_offset);
11399 if (TARGET_HARD_FLOAT && TARGET_VFP)
11403 /* The fldmd insns do not have base+offset addressing
11404 modes, so we use IP to hold the address. */
11405 saved_size = arm_get_vfp_saved_size ();
11407 if (saved_size > 0)
11409 floats_offset += saved_size;
11410 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11411 FP_REGNUM, floats_offset - vfp_offset);
11413 start_reg = FIRST_VFP_REGNUM;
11414 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11416 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11417 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11419 if (start_reg != reg)
11420 vfp_output_fldmd (f, IP_REGNUM,
11421 (start_reg - FIRST_VFP_REGNUM) / 2,
11422 (reg - start_reg) / 2);
11423 start_reg = reg + 2;
11426 if (start_reg != reg)
11427 vfp_output_fldmd (f, IP_REGNUM,
11428 (start_reg - FIRST_VFP_REGNUM) / 2,
11429 (reg - start_reg) / 2);
11434 /* The frame pointer is guaranteed to be non-double-word aligned.
11435 This is because it is set to (old_stack_pointer - 4) and the
11436 old_stack_pointer was double word aligned. Thus the offset to
11437 the iWMMXt registers to be loaded must also be non-double-word
11438 sized, so that the resultant address *is* double-word aligned.
11439 We can ignore floats_offset since that was already included in
11440 the live_regs_mask. */
11441 lrm_count += (lrm_count % 2 ? 2 : 1);
11443 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11444 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11446 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11447 reg, FP_REGNUM, lrm_count * 4);
11452 /* saved_regs_mask should contain the IP, which at the time of stack
11453 frame generation actually contains the old stack pointer. So a
11454 quick way to unwind the stack is just pop the IP register directly
11455 into the stack pointer. */
11456 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11457 saved_regs_mask &= ~ (1 << IP_REGNUM);
11458 saved_regs_mask |= (1 << SP_REGNUM);
11460 /* There are two registers left in saved_regs_mask - LR and PC. We
11461 only need to restore the LR register (the return address), but to
11462 save time we can load it directly into the PC, unless we need a
11463 special function exit sequence, or we are not really returning. */
11465 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11466 && !current_function_calls_eh_return)
11467 /* Delete the LR from the register mask, so that the LR on
11468 the stack is loaded into the PC in the register mask. */
11469 saved_regs_mask &= ~ (1 << LR_REGNUM);
11471 saved_regs_mask &= ~ (1 << PC_REGNUM);
11473 /* We must use SP as the base register, because SP is one of the
11474 registers being restored. If an interrupt or page fault
11475 happens in the ldm instruction, the SP might or might not
11476 have been restored. That would be bad, as then SP will no
11477 longer indicate the safe area of stack, and we can get stack
11478 corruption. Using SP as the base register means that it will
11479 be reset correctly to the original value, should an interrupt
11480 occur. If the stack pointer already points at the right
11481 place, then omit the subtraction. */
11482 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11483 || current_function_calls_alloca)
11484 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11485 4 * bit_count (saved_regs_mask));
11486 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11488 if (IS_INTERRUPT (func_type))
11489 /* Interrupt handlers will have pushed the
11490 IP onto the stack, so restore it now. */
11491 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11495 HOST_WIDE_INT amount;
11497 /* Restore stack pointer if necessary. */
11498 if (frame_pointer_needed)
11500 /* For Thumb-2 restore sp from the frame pointer.
11501 Operand restrictions mean we have to increment FP, then copy
11503 amount = offsets->locals_base - offsets->saved_regs;
11504 operands[0] = hard_frame_pointer_rtx;
11508 operands[0] = stack_pointer_rtx;
11509 amount = offsets->outgoing_args - offsets->saved_regs;
11514 operands[1] = operands[0];
11515 operands[2] = GEN_INT (amount);
11516 output_add_immediate (operands);
11518 if (frame_pointer_needed)
11519 asm_fprintf (f, "\tmov\t%r, %r\n",
11520 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11522 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11524 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11525 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11526 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11531 start_reg = FIRST_FPA_REGNUM;
11533 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11535 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11537 if (reg - start_reg == 3)
11539 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11540 start_reg, SP_REGNUM);
11541 start_reg = reg + 1;
11546 if (reg != start_reg)
11547 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11548 start_reg, reg - start_reg,
11551 start_reg = reg + 1;
11555 /* Just in case the last register checked also needs unstacking. */
11556 if (reg != start_reg)
11557 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11558 start_reg, reg - start_reg, SP_REGNUM);
11561 if (TARGET_HARD_FLOAT && TARGET_VFP)
11563 start_reg = FIRST_VFP_REGNUM;
11564 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11566 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11567 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11569 if (start_reg != reg)
11570 vfp_output_fldmd (f, SP_REGNUM,
11571 (start_reg - FIRST_VFP_REGNUM) / 2,
11572 (reg - start_reg) / 2);
11573 start_reg = reg + 2;
11576 if (start_reg != reg)
11577 vfp_output_fldmd (f, SP_REGNUM,
11578 (start_reg - FIRST_VFP_REGNUM) / 2,
11579 (reg - start_reg) / 2);
11582 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11583 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11584 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11586 /* If we can, restore the LR into the PC. */
11587 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11588 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11589 && !IS_STACKALIGN (func_type)
11591 && current_function_pretend_args_size == 0
11592 && saved_regs_mask & (1 << LR_REGNUM)
11593 && !current_function_calls_eh_return)
11595 saved_regs_mask &= ~ (1 << LR_REGNUM);
11596 saved_regs_mask |= (1 << PC_REGNUM);
11597 rfe = IS_INTERRUPT (func_type);
11602 /* Load the registers off the stack. If we only have one register
11603 to load use the LDR instruction - it is faster. For Thumb-2
11604 always use pop and the assembler will pick the best instruction.*/
11605 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11606 && !IS_INTERRUPT(func_type))
11608 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11610 else if (saved_regs_mask)
11612 if (saved_regs_mask & (1 << SP_REGNUM))
11613 /* Note - write back to the stack register is not enabled
11614 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11615 in the list of registers and if we add writeback the
11616 instruction becomes UNPREDICTABLE. */
11617 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11619 else if (TARGET_ARM)
11620 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11623 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11626 if (current_function_pretend_args_size)
11628 /* Unwind the pre-pushed regs. */
11629 operands[0] = operands[1] = stack_pointer_rtx;
11630 operands[2] = GEN_INT (current_function_pretend_args_size);
11631 output_add_immediate (operands);
11635 /* We may have already restored PC directly from the stack. */
11636 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11639 /* Stack adjustment for exception handler. */
11640 if (current_function_calls_eh_return)
11641 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11642 ARM_EH_STACKADJ_REGNUM);
11644 /* Generate the return instruction. */
11645 switch ((int) ARM_FUNC_TYPE (func_type))
11649 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11652 case ARM_FT_EXCEPTION:
11653 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11656 case ARM_FT_INTERWORKED:
11657 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11661 if (IS_STACKALIGN (func_type))
11663 /* See comment in arm_expand_prologue. */
11664 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11666 if (arm_arch5 || arm_arch4t)
11667 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11669 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11677 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11678 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11680 arm_stack_offsets *offsets;
11686 /* Emit any call-via-reg trampolines that are needed for v4t support
11687 of call_reg and call_value_reg type insns. */
11688 for (regno = 0; regno < LR_REGNUM; regno++)
11690 rtx label = cfun->machine->call_via[regno];
11694 switch_to_section (function_section (current_function_decl));
11695 targetm.asm_out.internal_label (asm_out_file, "L",
11696 CODE_LABEL_NUMBER (label));
11697 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11701 /* ??? Probably not safe to set this here, since it assumes that a
11702 function will be emitted as assembly immediately after we generate
11703 RTL for it. This does not happen for inline functions. */
11704 return_used_this_function = 0;
11706 else /* TARGET_32BIT */
11708 /* We need to take into account any stack-frame rounding. */
11709 offsets = arm_get_frame_offsets ();
11711 gcc_assert (!use_return_insn (FALSE, NULL)
11712 || !return_used_this_function
11713 || offsets->saved_regs == offsets->outgoing_args
11714 || frame_pointer_needed);
11716 /* Reset the ARM-specific per-function variables. */
11717 after_arm_reorg = 0;
11721 /* Generate and emit an insn that we will recognize as a push_multi.
11722 Unfortunately, since this insn does not reflect very well the actual
11723 semantics of the operation, we need to annotate the insn for the benefit
11724 of DWARF2 frame unwind information. */
11726 emit_multi_reg_push (unsigned long mask)
11729 int num_dwarf_regs;
11733 int dwarf_par_index;
11736 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11737 if (mask & (1 << i))
11740 gcc_assert (num_regs && num_regs <= 16);
11742 /* We don't record the PC in the dwarf frame information. */
11743 num_dwarf_regs = num_regs;
11744 if (mask & (1 << PC_REGNUM))
11747 /* For the body of the insn we are going to generate an UNSPEC in
11748 parallel with several USEs. This allows the insn to be recognized
11749 by the push_multi pattern in the arm.md file. The insn looks
11750 something like this:
11753 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11754 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11755 (use (reg:SI 11 fp))
11756 (use (reg:SI 12 ip))
11757 (use (reg:SI 14 lr))
11758 (use (reg:SI 15 pc))
11761 For the frame note however, we try to be more explicit and actually
11762 show each register being stored into the stack frame, plus a (single)
11763 decrement of the stack pointer. We do it this way in order to be
11764 friendly to the stack unwinding code, which only wants to see a single
11765 stack decrement per instruction. The RTL we generate for the note looks
11766 something like this:
11769 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11770 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11771 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11772 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11773 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11776 This sequence is used both by the code to support stack unwinding for
11777 exceptions handlers and the code to generate dwarf2 frame debugging. */
11779 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11780 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11781 dwarf_par_index = 1;
11783 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11785 if (mask & (1 << i))
11787 reg = gen_rtx_REG (SImode, i);
11789 XVECEXP (par, 0, 0)
11790 = gen_rtx_SET (VOIDmode,
11791 gen_frame_mem (BLKmode,
11792 gen_rtx_PRE_DEC (BLKmode,
11793 stack_pointer_rtx)),
11794 gen_rtx_UNSPEC (BLKmode,
11795 gen_rtvec (1, reg),
11796 UNSPEC_PUSH_MULT));
11798 if (i != PC_REGNUM)
11800 tmp = gen_rtx_SET (VOIDmode,
11801 gen_frame_mem (SImode, stack_pointer_rtx),
11803 RTX_FRAME_RELATED_P (tmp) = 1;
11804 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11812 for (j = 1, i++; j < num_regs; i++)
11814 if (mask & (1 << i))
11816 reg = gen_rtx_REG (SImode, i);
11818 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11820 if (i != PC_REGNUM)
11823 = gen_rtx_SET (VOIDmode,
11824 gen_frame_mem (SImode,
11825 plus_constant (stack_pointer_rtx,
11828 RTX_FRAME_RELATED_P (tmp) = 1;
11829 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11836 par = emit_insn (par);
11838 tmp = gen_rtx_SET (VOIDmode,
11840 plus_constant (stack_pointer_rtx, -4 * num_regs));
11841 RTX_FRAME_RELATED_P (tmp) = 1;
11842 XVECEXP (dwarf, 0, 0) = tmp;
11844 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11849 /* Calculate the size of the return value that is passed in registers. */
11851 arm_size_return_regs (void)
11853 enum machine_mode mode;
11855 if (current_function_return_rtx != 0)
11856 mode = GET_MODE (current_function_return_rtx);
11858 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11860 return GET_MODE_SIZE (mode);
11864 emit_sfm (int base_reg, int count)
11871 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11872 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11874 reg = gen_rtx_REG (XFmode, base_reg++);
11876 XVECEXP (par, 0, 0)
11877 = gen_rtx_SET (VOIDmode,
11878 gen_frame_mem (BLKmode,
11879 gen_rtx_PRE_DEC (BLKmode,
11880 stack_pointer_rtx)),
11881 gen_rtx_UNSPEC (BLKmode,
11882 gen_rtvec (1, reg),
11883 UNSPEC_PUSH_MULT));
11884 tmp = gen_rtx_SET (VOIDmode,
11885 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11886 RTX_FRAME_RELATED_P (tmp) = 1;
11887 XVECEXP (dwarf, 0, 1) = tmp;
11889 for (i = 1; i < count; i++)
11891 reg = gen_rtx_REG (XFmode, base_reg++);
11892 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11894 tmp = gen_rtx_SET (VOIDmode,
11895 gen_frame_mem (XFmode,
11896 plus_constant (stack_pointer_rtx,
11899 RTX_FRAME_RELATED_P (tmp) = 1;
11900 XVECEXP (dwarf, 0, i + 1) = tmp;
11903 tmp = gen_rtx_SET (VOIDmode,
11905 plus_constant (stack_pointer_rtx, -12 * count));
11907 RTX_FRAME_RELATED_P (tmp) = 1;
11908 XVECEXP (dwarf, 0, 0) = tmp;
11910 par = emit_insn (par);
11911 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11917 /* Return true if the current function needs to save/restore LR. */
11920 thumb_force_lr_save (void)
11922 return !cfun->machine->lr_save_eliminated
11923 && (!leaf_function_p ()
11924 || thumb_far_jump_used_p ()
11925 || df_regs_ever_live_p (LR_REGNUM));
11929 /* Compute the distance from register FROM to register TO.
11930 These can be the arg pointer (26), the soft frame pointer (25),
11931 the stack pointer (13) or the hard frame pointer (11).
11932 In thumb mode r7 is used as the soft frame pointer, if needed.
11933 Typical stack layout looks like this:
11935 old stack pointer -> | |
11938 | | saved arguments for
11939 | | vararg functions
11942 hard FP & arg pointer -> | | \
11950 soft frame pointer -> | | /
11955 locals base pointer -> | | /
11960 current stack pointer -> | | /
11963 For a given function some or all of these stack components
11964 may not be needed, giving rise to the possibility of
11965 eliminating some of the registers.
11967 The values returned by this function must reflect the behavior
11968 of arm_expand_prologue() and arm_compute_save_reg_mask().
11970 The sign of the number returned reflects the direction of stack
11971 growth, so the values are positive for all eliminations except
11972 from the soft frame pointer to the hard frame pointer.
11974 SFP may point just inside the local variables block to ensure correct
11978 /* Calculate stack offsets. These are used to calculate register elimination
11979 offsets and in prologue/epilogue code. */
11981 static arm_stack_offsets *
11982 arm_get_frame_offsets (void)
11984 struct arm_stack_offsets *offsets;
11985 unsigned long func_type;
11988 HOST_WIDE_INT frame_size;
11990 offsets = &cfun->machine->stack_offsets;
11992 /* We need to know if we are a leaf function. Unfortunately, it
11993 is possible to be called after start_sequence has been called,
11994 which causes get_insns to return the insns for the sequence,
11995 not the function, which will cause leaf_function_p to return
11996 the incorrect result.
11998 to know about leaf functions once reload has completed, and the
11999 frame size cannot be changed after that time, so we can safely
12000 use the cached value. */
12002 if (reload_completed)
12005 /* Initially this is the size of the local variables. It will translated
12006 into an offset once we have determined the size of preceding data. */
12007 frame_size = ROUND_UP_WORD (get_frame_size ());
12009 leaf = leaf_function_p ();
12011 /* Space for variadic functions. */
12012 offsets->saved_args = current_function_pretend_args_size;
12014 /* In Thumb mode this is incorrect, but never used. */
12015 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12019 unsigned int regno;
12021 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
12023 /* We know that SP will be doubleword aligned on entry, and we must
12024 preserve that condition at any subroutine call. We also require the
12025 soft frame pointer to be doubleword aligned. */
12027 if (TARGET_REALLY_IWMMXT)
12029 /* Check for the call-saved iWMMXt registers. */
12030 for (regno = FIRST_IWMMXT_REGNUM;
12031 regno <= LAST_IWMMXT_REGNUM;
12033 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12037 func_type = arm_current_func_type ();
12038 if (! IS_VOLATILE (func_type))
12040 /* Space for saved FPA registers. */
12041 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12042 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12045 /* Space for saved VFP registers. */
12046 if (TARGET_HARD_FLOAT && TARGET_VFP)
12047 saved += arm_get_vfp_saved_size ();
12050 else /* TARGET_THUMB1 */
12052 saved = bit_count (thumb1_compute_save_reg_mask ()) * 4;
12053 if (TARGET_BACKTRACE)
12057 /* Saved registers include the stack frame. */
12058 offsets->saved_regs = offsets->saved_args + saved;
12059 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12060 /* A leaf function does not need any stack alignment if it has nothing
12062 if (leaf && frame_size == 0)
12064 offsets->outgoing_args = offsets->soft_frame;
12065 offsets->locals_base = offsets->soft_frame;
12069 /* Ensure SFP has the correct alignment. */
12070 if (ARM_DOUBLEWORD_ALIGN
12071 && (offsets->soft_frame & 7))
12072 offsets->soft_frame += 4;
12074 offsets->locals_base = offsets->soft_frame + frame_size;
12075 offsets->outgoing_args = (offsets->locals_base
12076 + current_function_outgoing_args_size);
12078 if (ARM_DOUBLEWORD_ALIGN)
12080 /* Ensure SP remains doubleword aligned. */
12081 if (offsets->outgoing_args & 7)
12082 offsets->outgoing_args += 4;
12083 gcc_assert (!(offsets->outgoing_args & 7));
12090 /* Calculate the relative offsets for the different stack pointers. Positive
12091 offsets are in the direction of stack growth. */
12094 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12096 arm_stack_offsets *offsets;
12098 offsets = arm_get_frame_offsets ();
12100 /* OK, now we have enough information to compute the distances.
12101 There must be an entry in these switch tables for each pair
12102 of registers in ELIMINABLE_REGS, even if some of the entries
12103 seem to be redundant or useless. */
12106 case ARG_POINTER_REGNUM:
12109 case THUMB_HARD_FRAME_POINTER_REGNUM:
12112 case FRAME_POINTER_REGNUM:
12113 /* This is the reverse of the soft frame pointer
12114 to hard frame pointer elimination below. */
12115 return offsets->soft_frame - offsets->saved_args;
12117 case ARM_HARD_FRAME_POINTER_REGNUM:
12118 /* If there is no stack frame then the hard
12119 frame pointer and the arg pointer coincide. */
12120 if (offsets->frame == offsets->saved_regs)
12122 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12123 return (frame_pointer_needed
12124 && cfun->static_chain_decl != NULL
12125 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12127 case STACK_POINTER_REGNUM:
12128 /* If nothing has been pushed on the stack at all
12129 then this will return -4. This *is* correct! */
12130 return offsets->outgoing_args - (offsets->saved_args + 4);
12133 gcc_unreachable ();
12135 gcc_unreachable ();
12137 case FRAME_POINTER_REGNUM:
12140 case THUMB_HARD_FRAME_POINTER_REGNUM:
12143 case ARM_HARD_FRAME_POINTER_REGNUM:
12144 /* The hard frame pointer points to the top entry in the
12145 stack frame. The soft frame pointer to the bottom entry
12146 in the stack frame. If there is no stack frame at all,
12147 then they are identical. */
12149 return offsets->frame - offsets->soft_frame;
12151 case STACK_POINTER_REGNUM:
12152 return offsets->outgoing_args - offsets->soft_frame;
12155 gcc_unreachable ();
12157 gcc_unreachable ();
12160 /* You cannot eliminate from the stack pointer.
12161 In theory you could eliminate from the hard frame
12162 pointer to the stack pointer, but this will never
12163 happen, since if a stack frame is not needed the
12164 hard frame pointer will never be used. */
12165 gcc_unreachable ();
12170 /* Emit RTL to save coprocessor registers on function entry. Returns the
12171 number of bytes pushed. */
12174 arm_save_coproc_regs(void)
12176 int saved_size = 0;
12178 unsigned start_reg;
12181 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12182 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12184 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12185 insn = gen_rtx_MEM (V2SImode, insn);
12186 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12187 RTX_FRAME_RELATED_P (insn) = 1;
12191 /* Save any floating point call-saved registers used by this
12193 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12195 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12196 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12198 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12199 insn = gen_rtx_MEM (XFmode, insn);
12200 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12201 RTX_FRAME_RELATED_P (insn) = 1;
12207 start_reg = LAST_FPA_REGNUM;
12209 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12211 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12213 if (start_reg - reg == 3)
12215 insn = emit_sfm (reg, 4);
12216 RTX_FRAME_RELATED_P (insn) = 1;
12218 start_reg = reg - 1;
12223 if (start_reg != reg)
12225 insn = emit_sfm (reg + 1, start_reg - reg);
12226 RTX_FRAME_RELATED_P (insn) = 1;
12227 saved_size += (start_reg - reg) * 12;
12229 start_reg = reg - 1;
12233 if (start_reg != reg)
12235 insn = emit_sfm (reg + 1, start_reg - reg);
12236 saved_size += (start_reg - reg) * 12;
12237 RTX_FRAME_RELATED_P (insn) = 1;
12240 if (TARGET_HARD_FLOAT && TARGET_VFP)
12242 start_reg = FIRST_VFP_REGNUM;
12244 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12246 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12247 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12249 if (start_reg != reg)
12250 saved_size += vfp_emit_fstmd (start_reg,
12251 (reg - start_reg) / 2);
12252 start_reg = reg + 2;
12255 if (start_reg != reg)
12256 saved_size += vfp_emit_fstmd (start_reg,
12257 (reg - start_reg) / 2);
12263 /* Set the Thumb frame pointer from the stack pointer. */
12266 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12268 HOST_WIDE_INT amount;
12271 amount = offsets->outgoing_args - offsets->locals_base;
12273 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12274 stack_pointer_rtx, GEN_INT (amount)));
12277 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12278 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12279 hard_frame_pointer_rtx,
12280 stack_pointer_rtx));
12281 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12282 plus_constant (stack_pointer_rtx, amount));
12283 RTX_FRAME_RELATED_P (dwarf) = 1;
12284 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12288 RTX_FRAME_RELATED_P (insn) = 1;
12291 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12294 arm_expand_prologue (void)
12299 unsigned long live_regs_mask;
12300 unsigned long func_type;
12302 int saved_pretend_args = 0;
12303 int saved_regs = 0;
12304 unsigned HOST_WIDE_INT args_to_push;
12305 arm_stack_offsets *offsets;
12307 func_type = arm_current_func_type ();
12309 /* Naked functions don't have prologues. */
12310 if (IS_NAKED (func_type))
12313 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12314 args_to_push = current_function_pretend_args_size;
12316 /* Compute which register we will have to save onto the stack. */
12317 live_regs_mask = arm_compute_save_reg_mask ();
12319 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12321 if (IS_STACKALIGN (func_type))
12326 /* Handle a word-aligned stack pointer. We generate the following:
12331 <save and restore r0 in normal prologue/epilogue>
12335 The unwinder doesn't need to know about the stack realignment.
12336 Just tell it we saved SP in r0. */
12337 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12339 r0 = gen_rtx_REG (SImode, 0);
12340 r1 = gen_rtx_REG (SImode, 1);
12341 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12342 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12343 insn = gen_movsi (r0, stack_pointer_rtx);
12344 RTX_FRAME_RELATED_P (insn) = 1;
12345 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12346 dwarf, REG_NOTES (insn));
12348 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12349 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12352 if (frame_pointer_needed && TARGET_ARM)
12354 if (IS_INTERRUPT (func_type))
12356 /* Interrupt functions must not corrupt any registers.
12357 Creating a frame pointer however, corrupts the IP
12358 register, so we must push it first. */
12359 insn = emit_multi_reg_push (1 << IP_REGNUM);
12361 /* Do not set RTX_FRAME_RELATED_P on this insn.
12362 The dwarf stack unwinding code only wants to see one
12363 stack decrement per function, and this is not it. If
12364 this instruction is labeled as being part of the frame
12365 creation sequence then dwarf2out_frame_debug_expr will
12366 die when it encounters the assignment of IP to FP
12367 later on, since the use of SP here establishes SP as
12368 the CFA register and not IP.
12370 Anyway this instruction is not really part of the stack
12371 frame creation although it is part of the prologue. */
12373 else if (IS_NESTED (func_type))
12375 /* The Static chain register is the same as the IP register
12376 used as a scratch register during stack frame creation.
12377 To get around this need to find somewhere to store IP
12378 whilst the frame is being created. We try the following
12381 1. The last argument register.
12382 2. A slot on the stack above the frame. (This only
12383 works if the function is not a varargs function).
12384 3. Register r3, after pushing the argument registers
12387 Note - we only need to tell the dwarf2 backend about the SP
12388 adjustment in the second variant; the static chain register
12389 doesn't need to be unwound, as it doesn't contain a value
12390 inherited from the caller. */
12392 if (df_regs_ever_live_p (3) == false)
12393 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12394 else if (args_to_push == 0)
12398 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12399 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12402 /* Just tell the dwarf backend that we adjusted SP. */
12403 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12404 plus_constant (stack_pointer_rtx,
12406 RTX_FRAME_RELATED_P (insn) = 1;
12407 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12408 dwarf, REG_NOTES (insn));
12412 /* Store the args on the stack. */
12413 if (cfun->machine->uses_anonymous_args)
12414 insn = emit_multi_reg_push
12415 ((0xf0 >> (args_to_push / 4)) & 0xf);
12418 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12419 GEN_INT (- args_to_push)));
12421 RTX_FRAME_RELATED_P (insn) = 1;
12423 saved_pretend_args = 1;
12424 fp_offset = args_to_push;
12427 /* Now reuse r3 to preserve IP. */
12428 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12432 insn = emit_set_insn (ip_rtx,
12433 plus_constant (stack_pointer_rtx, fp_offset));
12434 RTX_FRAME_RELATED_P (insn) = 1;
12439 /* Push the argument registers, or reserve space for them. */
12440 if (cfun->machine->uses_anonymous_args)
12441 insn = emit_multi_reg_push
12442 ((0xf0 >> (args_to_push / 4)) & 0xf);
12445 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12446 GEN_INT (- args_to_push)));
12447 RTX_FRAME_RELATED_P (insn) = 1;
12450 /* If this is an interrupt service routine, and the link register
12451 is going to be pushed, and we are not creating a stack frame,
12452 (which would involve an extra push of IP and a pop in the epilogue)
12453 subtracting four from LR now will mean that the function return
12454 can be done with a single instruction. */
12455 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12456 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12457 && ! frame_pointer_needed
12460 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12462 emit_set_insn (lr, plus_constant (lr, -4));
12465 if (live_regs_mask)
12467 insn = emit_multi_reg_push (live_regs_mask);
12468 saved_regs += bit_count (live_regs_mask) * 4;
12469 RTX_FRAME_RELATED_P (insn) = 1;
12472 if (! IS_VOLATILE (func_type))
12473 saved_regs += arm_save_coproc_regs ();
12475 if (frame_pointer_needed && TARGET_ARM)
12477 /* Create the new frame pointer. */
12479 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12480 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12481 RTX_FRAME_RELATED_P (insn) = 1;
12483 if (IS_NESTED (func_type))
12485 /* Recover the static chain register. */
12486 if (!df_regs_ever_live_p (3)
12487 || saved_pretend_args)
12488 insn = gen_rtx_REG (SImode, 3);
12489 else /* if (current_function_pretend_args_size == 0) */
12491 insn = plus_constant (hard_frame_pointer_rtx, 4);
12492 insn = gen_frame_mem (SImode, insn);
12494 emit_set_insn (ip_rtx, insn);
12495 /* Add a USE to stop propagate_one_insn() from barfing. */
12496 emit_insn (gen_prologue_use (ip_rtx));
12501 offsets = arm_get_frame_offsets ();
12502 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12504 /* This add can produce multiple insns for a large constant, so we
12505 need to get tricky. */
12506 rtx last = get_last_insn ();
12508 amount = GEN_INT (offsets->saved_args + saved_regs
12509 - offsets->outgoing_args);
12511 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12515 last = last ? NEXT_INSN (last) : get_insns ();
12516 RTX_FRAME_RELATED_P (last) = 1;
12518 while (last != insn);
12520 /* If the frame pointer is needed, emit a special barrier that
12521 will prevent the scheduler from moving stores to the frame
12522 before the stack adjustment. */
12523 if (frame_pointer_needed)
12524 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12525 hard_frame_pointer_rtx));
12529 if (frame_pointer_needed && TARGET_THUMB2)
12530 thumb_set_frame_pointer (offsets);
12532 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12534 unsigned long mask;
12536 mask = live_regs_mask;
12537 mask &= THUMB2_WORK_REGS;
12538 if (!IS_NESTED (func_type))
12539 mask |= (1 << IP_REGNUM);
12540 arm_load_pic_register (mask);
12543 /* If we are profiling, make sure no instructions are scheduled before
12544 the call to mcount. Similarly if the user has requested no
12545 scheduling in the prolog. Similarly if we want non-call exceptions
12546 using the EABI unwinder, to prevent faulting instructions from being
12547 swapped with a stack adjustment. */
12548 if (current_function_profile || !TARGET_SCHED_PROLOG
12549 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12550 emit_insn (gen_blockage ());
12552 /* If the link register is being kept alive, with the return address in it,
12553 then make sure that it does not get reused by the ce2 pass. */
12554 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12555 cfun->machine->lr_save_eliminated = 1;
12558 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12560 arm_print_condition (FILE *stream)
12562 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12564 /* Branch conversion is not implemented for Thumb-2. */
12567 output_operand_lossage ("predicated Thumb instruction");
12570 if (current_insn_predicate != NULL)
12572 output_operand_lossage
12573 ("predicated instruction in conditional sequence");
12577 fputs (arm_condition_codes[arm_current_cc], stream);
12579 else if (current_insn_predicate)
12581 enum arm_cond_code code;
12585 output_operand_lossage ("predicated Thumb instruction");
12589 code = get_arm_condition_code (current_insn_predicate);
12590 fputs (arm_condition_codes[code], stream);
12595 /* If CODE is 'd', then the X is a condition operand and the instruction
12596 should only be executed if the condition is true.
12597 if CODE is 'D', then the X is a condition operand and the instruction
12598 should only be executed if the condition is false: however, if the mode
12599 of the comparison is CCFPEmode, then always execute the instruction -- we
12600 do this because in these circumstances !GE does not necessarily imply LT;
12601 in these cases the instruction pattern will take care to make sure that
12602 an instruction containing %d will follow, thereby undoing the effects of
12603 doing this instruction unconditionally.
12604 If CODE is 'N' then X is a floating point operand that must be negated
12606 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12607 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12609 arm_print_operand (FILE *stream, rtx x, int code)
12614 fputs (ASM_COMMENT_START, stream);
12618 fputs (user_label_prefix, stream);
12622 fputs (REGISTER_PREFIX, stream);
12626 arm_print_condition (stream);
12630 /* Nothing in unified syntax, otherwise the current condition code. */
12631 if (!TARGET_UNIFIED_ASM)
12632 arm_print_condition (stream);
12636 /* The current condition code in unified syntax, otherwise nothing. */
12637 if (TARGET_UNIFIED_ASM)
12638 arm_print_condition (stream);
12642 /* The current condition code for a condition code setting instruction.
12643 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12644 if (TARGET_UNIFIED_ASM)
12646 fputc('s', stream);
12647 arm_print_condition (stream);
12651 arm_print_condition (stream);
12652 fputc('s', stream);
12657 /* If the instruction is conditionally executed then print
12658 the current condition code, otherwise print 's'. */
12659 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12660 if (current_insn_predicate)
12661 arm_print_condition (stream);
12663 fputc('s', stream);
12666 /* %# is a "break" sequence. It doesn't output anything, but is used to
12667 seperate e.g. operand numbers from following text, if that text consists
12668 of further digits which we don't want to be part of the operand
12676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12677 r = REAL_VALUE_NEGATE (r);
12678 fprintf (stream, "%s", fp_const_from_val (&r));
12682 /* An integer without a preceding # sign. */
12684 gcc_assert (GET_CODE (x) == CONST_INT);
12685 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12689 if (GET_CODE (x) == CONST_INT)
12692 val = ARM_SIGN_EXTEND (~INTVAL (x));
12693 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12697 putc ('~', stream);
12698 output_addr_const (stream, x);
12703 /* The low 16 bits of an immediate constant. */
12704 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12708 fprintf (stream, "%s", arithmetic_instr (x, 1));
12711 /* Truncate Cirrus shift counts. */
12713 if (GET_CODE (x) == CONST_INT)
12715 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12718 arm_print_operand (stream, x, 0);
12722 fprintf (stream, "%s", arithmetic_instr (x, 0));
12730 if (!shift_operator (x, SImode))
12732 output_operand_lossage ("invalid shift operand");
12736 shift = shift_op (x, &val);
12740 fprintf (stream, ", %s ", shift);
12742 arm_print_operand (stream, XEXP (x, 1), 0);
12744 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12749 /* An explanation of the 'Q', 'R' and 'H' register operands:
12751 In a pair of registers containing a DI or DF value the 'Q'
12752 operand returns the register number of the register containing
12753 the least significant part of the value. The 'R' operand returns
12754 the register number of the register containing the most
12755 significant part of the value.
12757 The 'H' operand returns the higher of the two register numbers.
12758 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12759 same as the 'Q' operand, since the most significant part of the
12760 value is held in the lower number register. The reverse is true
12761 on systems where WORDS_BIG_ENDIAN is false.
12763 The purpose of these operands is to distinguish between cases
12764 where the endian-ness of the values is important (for example
12765 when they are added together), and cases where the endian-ness
12766 is irrelevant, but the order of register operations is important.
12767 For example when loading a value from memory into a register
12768 pair, the endian-ness does not matter. Provided that the value
12769 from the lower memory address is put into the lower numbered
12770 register, and the value from the higher address is put into the
12771 higher numbered register, the load will work regardless of whether
12772 the value being loaded is big-wordian or little-wordian. The
12773 order of the two register loads can matter however, if the address
12774 of the memory location is actually held in one of the registers
12775 being overwritten by the load. */
12777 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12779 output_operand_lossage ("invalid operand for code '%c'", code);
12783 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12787 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12789 output_operand_lossage ("invalid operand for code '%c'", code);
12793 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12797 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12799 output_operand_lossage ("invalid operand for code '%c'", code);
12803 asm_fprintf (stream, "%r", REGNO (x) + 1);
12807 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12809 output_operand_lossage ("invalid operand for code '%c'", code);
12813 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12817 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12819 output_operand_lossage ("invalid operand for code '%c'", code);
12823 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12827 asm_fprintf (stream, "%r",
12828 GET_CODE (XEXP (x, 0)) == REG
12829 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12833 asm_fprintf (stream, "{%r-%r}",
12835 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12838 /* Like 'M', but writing doubleword vector registers, for use by Neon
12842 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12843 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12845 asm_fprintf (stream, "{d%d}", regno);
12847 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12852 /* CONST_TRUE_RTX means always -- that's the default. */
12853 if (x == const_true_rtx)
12856 if (!COMPARISON_P (x))
12858 output_operand_lossage ("invalid operand for code '%c'", code);
12862 fputs (arm_condition_codes[get_arm_condition_code (x)],
12867 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12868 want to do that. */
12869 if (x == const_true_rtx)
12871 output_operand_lossage ("instruction never executed");
12874 if (!COMPARISON_P (x))
12876 output_operand_lossage ("invalid operand for code '%c'", code);
12880 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12881 (get_arm_condition_code (x))],
12885 /* Cirrus registers can be accessed in a variety of ways:
12886 single floating point (f)
12887 double floating point (d)
12889 64bit integer (dx). */
12890 case 'W': /* Cirrus register in F mode. */
12891 case 'X': /* Cirrus register in D mode. */
12892 case 'Y': /* Cirrus register in FX mode. */
12893 case 'Z': /* Cirrus register in DX mode. */
12894 gcc_assert (GET_CODE (x) == REG
12895 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12897 fprintf (stream, "mv%s%s",
12899 : code == 'X' ? "d"
12900 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12904 /* Print cirrus register in the mode specified by the register's mode. */
12907 int mode = GET_MODE (x);
12909 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12911 output_operand_lossage ("invalid operand for code '%c'", code);
12915 fprintf (stream, "mv%s%s",
12916 mode == DFmode ? "d"
12917 : mode == SImode ? "fx"
12918 : mode == DImode ? "dx"
12919 : "f", reg_names[REGNO (x)] + 2);
12925 if (GET_CODE (x) != REG
12926 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
12927 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
12928 /* Bad value for wCG register number. */
12930 output_operand_lossage ("invalid operand for code '%c'", code);
12935 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
12938 /* Print an iWMMXt control register name. */
12940 if (GET_CODE (x) != CONST_INT
12942 || INTVAL (x) >= 16)
12943 /* Bad value for wC register number. */
12945 output_operand_lossage ("invalid operand for code '%c'", code);
12951 static const char * wc_reg_names [16] =
12953 "wCID", "wCon", "wCSSF", "wCASF",
12954 "wC4", "wC5", "wC6", "wC7",
12955 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
12956 "wC12", "wC13", "wC14", "wC15"
12959 fprintf (stream, wc_reg_names [INTVAL (x)]);
12963 /* Print a VFP/Neon double precision or quad precision register name. */
12967 int mode = GET_MODE (x);
12968 int is_quad = (code == 'q');
12971 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
12973 output_operand_lossage ("invalid operand for code '%c'", code);
12977 if (GET_CODE (x) != REG
12978 || !IS_VFP_REGNUM (REGNO (x)))
12980 output_operand_lossage ("invalid operand for code '%c'", code);
12985 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
12986 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
12988 output_operand_lossage ("invalid operand for code '%c'", code);
12992 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
12993 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
12997 /* These two codes print the low/high doubleword register of a Neon quad
12998 register, respectively. For pair-structure types, can also print
12999 low/high quadword registers. */
13003 int mode = GET_MODE (x);
13006 if ((GET_MODE_SIZE (mode) != 16
13007 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13009 output_operand_lossage ("invalid operand for code '%c'", code);
13014 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13016 output_operand_lossage ("invalid operand for code '%c'", code);
13020 if (GET_MODE_SIZE (mode) == 16)
13021 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13022 + (code == 'f' ? 1 : 0));
13024 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13025 + (code == 'f' ? 1 : 0));
13029 /* Print a VFPv3 floating-point constant, represented as an integer
13033 int index = vfp3_const_double_index (x);
13034 gcc_assert (index != -1);
13035 fprintf (stream, "%d", index);
13039 /* Print bits representing opcode features for Neon.
13041 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13042 and polynomials as unsigned.
13044 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13046 Bit 2 is 1 for rounding functions, 0 otherwise. */
13048 /* Identify the type as 's', 'u', 'p' or 'f'. */
13051 HOST_WIDE_INT bits = INTVAL (x);
13052 fputc ("uspf"[bits & 3], stream);
13056 /* Likewise, but signed and unsigned integers are both 'i'. */
13059 HOST_WIDE_INT bits = INTVAL (x);
13060 fputc ("iipf"[bits & 3], stream);
13064 /* As for 'T', but emit 'u' instead of 'p'. */
13067 HOST_WIDE_INT bits = INTVAL (x);
13068 fputc ("usuf"[bits & 3], stream);
13072 /* Bit 2: rounding (vs none). */
13075 HOST_WIDE_INT bits = INTVAL (x);
13076 fputs ((bits & 4) != 0 ? "r" : "", stream);
13083 output_operand_lossage ("missing operand");
13087 switch (GET_CODE (x))
13090 asm_fprintf (stream, "%r", REGNO (x));
13094 output_memory_reference_mode = GET_MODE (x);
13095 output_address (XEXP (x, 0));
13102 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13103 sizeof (fpstr), 0, 1);
13104 fprintf (stream, "#%s", fpstr);
13107 fprintf (stream, "#%s", fp_immediate_constant (x));
13111 gcc_assert (GET_CODE (x) != NEG);
13112 fputc ('#', stream);
13113 output_addr_const (stream, x);
13119 #ifndef AOF_ASSEMBLER
13120 /* Target hook for assembling integer objects. The ARM version needs to
13121 handle word-sized values specially. */
13123 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13125 enum machine_mode mode;
13127 if (size == UNITS_PER_WORD && aligned_p)
13129 fputs ("\t.word\t", asm_out_file);
13130 output_addr_const (asm_out_file, x);
13132 /* Mark symbols as position independent. We only do this in the
13133 .text segment, not in the .data segment. */
13134 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13135 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13137 /* See legitimize_pic_address for an explanation of the
13138 TARGET_VXWORKS_RTP check. */
13139 if (TARGET_VXWORKS_RTP
13140 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13141 fputs ("(GOT)", asm_out_file);
13143 fputs ("(GOTOFF)", asm_out_file);
13145 fputc ('\n', asm_out_file);
13149 mode = GET_MODE (x);
13151 if (arm_vector_mode_supported_p (mode))
13154 unsigned int invmask = 0, parts_per_word;
13156 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13158 units = CONST_VECTOR_NUNITS (x);
13159 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13161 /* For big-endian Neon vectors, we must permute the vector to the form
13162 which, when loaded by a VLDR or VLDM instruction, will give a vector
13163 with the elements in the right order. */
13164 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13166 parts_per_word = UNITS_PER_WORD / size;
13167 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13168 support those anywhere yet. */
13169 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13172 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13173 for (i = 0; i < units; i++)
13175 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13177 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13180 for (i = 0; i < units; i++)
13182 rtx elt = CONST_VECTOR_ELT (x, i);
13183 REAL_VALUE_TYPE rval;
13185 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13188 (rval, GET_MODE_INNER (mode),
13189 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13195 return default_assemble_integer (x, size, aligned_p);
13199 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13203 if (!TARGET_AAPCS_BASED)
13206 default_named_section_asm_out_constructor
13207 : default_named_section_asm_out_destructor) (symbol, priority);
13211 /* Put these in the .init_array section, using a special relocation. */
13212 if (priority != DEFAULT_INIT_PRIORITY)
13215 sprintf (buf, "%s.%.5u",
13216 is_ctor ? ".init_array" : ".fini_array",
13218 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13225 switch_to_section (s);
13226 assemble_align (POINTER_SIZE);
13227 fputs ("\t.word\t", asm_out_file);
13228 output_addr_const (asm_out_file, symbol);
13229 fputs ("(target1)\n", asm_out_file);
13232 /* Add a function to the list of static constructors. */
13235 arm_elf_asm_constructor (rtx symbol, int priority)
13237 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13240 /* Add a function to the list of static destructors. */
13243 arm_elf_asm_destructor (rtx symbol, int priority)
13245 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13249 /* A finite state machine takes care of noticing whether or not instructions
13250 can be conditionally executed, and thus decrease execution time and code
13251 size by deleting branch instructions. The fsm is controlled by
13252 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13254 /* The state of the fsm controlling condition codes are:
13255 0: normal, do nothing special
13256 1: make ASM_OUTPUT_OPCODE not output this instruction
13257 2: make ASM_OUTPUT_OPCODE not output this instruction
13258 3: make instructions conditional
13259 4: make instructions conditional
13261 State transitions (state->state by whom under condition):
13262 0 -> 1 final_prescan_insn if the `target' is a label
13263 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13264 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13265 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13266 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13267 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13268 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13269 (the target insn is arm_target_insn).
13271 If the jump clobbers the conditions then we use states 2 and 4.
13273 A similar thing can be done with conditional return insns.
13275 XXX In case the `target' is an unconditional branch, this conditionalising
13276 of the instructions always reduces code size, but not always execution
13277 time. But then, I want to reduce the code size to somewhere near what
13278 /bin/cc produces. */
13280 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13281 instructions. When a COND_EXEC instruction is seen the subsequent
13282 instructions are scanned so that multiple conditional instructions can be
13283 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13284 specify the length and true/false mask for the IT block. These will be
13285 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13287 /* Returns the index of the ARM condition code string in
13288 `arm_condition_codes'. COMPARISON should be an rtx like
13289 `(eq (...) (...))'. */
13290 static enum arm_cond_code
13291 get_arm_condition_code (rtx comparison)
13293 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13295 enum rtx_code comp_code = GET_CODE (comparison);
13297 if (GET_MODE_CLASS (mode) != MODE_CC)
13298 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13299 XEXP (comparison, 1));
13303 case CC_DNEmode: code = ARM_NE; goto dominance;
13304 case CC_DEQmode: code = ARM_EQ; goto dominance;
13305 case CC_DGEmode: code = ARM_GE; goto dominance;
13306 case CC_DGTmode: code = ARM_GT; goto dominance;
13307 case CC_DLEmode: code = ARM_LE; goto dominance;
13308 case CC_DLTmode: code = ARM_LT; goto dominance;
13309 case CC_DGEUmode: code = ARM_CS; goto dominance;
13310 case CC_DGTUmode: code = ARM_HI; goto dominance;
13311 case CC_DLEUmode: code = ARM_LS; goto dominance;
13312 case CC_DLTUmode: code = ARM_CC;
13315 gcc_assert (comp_code == EQ || comp_code == NE);
13317 if (comp_code == EQ)
13318 return ARM_INVERSE_CONDITION_CODE (code);
13324 case NE: return ARM_NE;
13325 case EQ: return ARM_EQ;
13326 case GE: return ARM_PL;
13327 case LT: return ARM_MI;
13328 default: gcc_unreachable ();
13334 case NE: return ARM_NE;
13335 case EQ: return ARM_EQ;
13336 default: gcc_unreachable ();
13342 case NE: return ARM_MI;
13343 case EQ: return ARM_PL;
13344 default: gcc_unreachable ();
13349 /* These encodings assume that AC=1 in the FPA system control
13350 byte. This allows us to handle all cases except UNEQ and
13354 case GE: return ARM_GE;
13355 case GT: return ARM_GT;
13356 case LE: return ARM_LS;
13357 case LT: return ARM_MI;
13358 case NE: return ARM_NE;
13359 case EQ: return ARM_EQ;
13360 case ORDERED: return ARM_VC;
13361 case UNORDERED: return ARM_VS;
13362 case UNLT: return ARM_LT;
13363 case UNLE: return ARM_LE;
13364 case UNGT: return ARM_HI;
13365 case UNGE: return ARM_PL;
13366 /* UNEQ and LTGT do not have a representation. */
13367 case UNEQ: /* Fall through. */
13368 case LTGT: /* Fall through. */
13369 default: gcc_unreachable ();
13375 case NE: return ARM_NE;
13376 case EQ: return ARM_EQ;
13377 case GE: return ARM_LE;
13378 case GT: return ARM_LT;
13379 case LE: return ARM_GE;
13380 case LT: return ARM_GT;
13381 case GEU: return ARM_LS;
13382 case GTU: return ARM_CC;
13383 case LEU: return ARM_CS;
13384 case LTU: return ARM_HI;
13385 default: gcc_unreachable ();
13391 case LTU: return ARM_CS;
13392 case GEU: return ARM_CC;
13393 default: gcc_unreachable ();
13399 case NE: return ARM_NE;
13400 case EQ: return ARM_EQ;
13401 case GE: return ARM_GE;
13402 case GT: return ARM_GT;
13403 case LE: return ARM_LE;
13404 case LT: return ARM_LT;
13405 case GEU: return ARM_CS;
13406 case GTU: return ARM_HI;
13407 case LEU: return ARM_LS;
13408 case LTU: return ARM_CC;
13409 default: gcc_unreachable ();
13412 default: gcc_unreachable ();
13416 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13419 thumb2_final_prescan_insn (rtx insn)
13421 rtx first_insn = insn;
13422 rtx body = PATTERN (insn);
13424 enum arm_cond_code code;
13428 /* Remove the previous insn from the count of insns to be output. */
13429 if (arm_condexec_count)
13430 arm_condexec_count--;
13432 /* Nothing to do if we are already inside a conditional block. */
13433 if (arm_condexec_count)
13436 if (GET_CODE (body) != COND_EXEC)
13439 /* Conditional jumps are implemented directly. */
13440 if (GET_CODE (insn) == JUMP_INSN)
13443 predicate = COND_EXEC_TEST (body);
13444 arm_current_cc = get_arm_condition_code (predicate);
13446 n = get_attr_ce_count (insn);
13447 arm_condexec_count = 1;
13448 arm_condexec_mask = (1 << n) - 1;
13449 arm_condexec_masklen = n;
13450 /* See if subsequent instructions can be combined into the same block. */
13453 insn = next_nonnote_insn (insn);
13455 /* Jumping into the middle of an IT block is illegal, so a label or
13456 barrier terminates the block. */
13457 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13460 body = PATTERN (insn);
13461 /* USE and CLOBBER aren't really insns, so just skip them. */
13462 if (GET_CODE (body) == USE
13463 || GET_CODE (body) == CLOBBER)
13466 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13467 if (GET_CODE (body) != COND_EXEC)
13469 /* Allow up to 4 conditionally executed instructions in a block. */
13470 n = get_attr_ce_count (insn);
13471 if (arm_condexec_masklen + n > 4)
13474 predicate = COND_EXEC_TEST (body);
13475 code = get_arm_condition_code (predicate);
13476 mask = (1 << n) - 1;
13477 if (arm_current_cc == code)
13478 arm_condexec_mask |= (mask << arm_condexec_masklen);
13479 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13482 arm_condexec_count++;
13483 arm_condexec_masklen += n;
13485 /* A jump must be the last instruction in a conditional block. */
13486 if (GET_CODE(insn) == JUMP_INSN)
13489 /* Restore recog_data (getting the attributes of other insns can
13490 destroy this array, but final.c assumes that it remains intact
13491 across this call). */
13492 extract_constrain_insn_cached (first_insn);
13496 arm_final_prescan_insn (rtx insn)
13498 /* BODY will hold the body of INSN. */
13499 rtx body = PATTERN (insn);
13501 /* This will be 1 if trying to repeat the trick, and things need to be
13502 reversed if it appears to fail. */
13505 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13506 taken are clobbered, even if the rtl suggests otherwise. It also
13507 means that we have to grub around within the jump expression to find
13508 out what the conditions are when the jump isn't taken. */
13509 int jump_clobbers = 0;
13511 /* If we start with a return insn, we only succeed if we find another one. */
13512 int seeking_return = 0;
13514 /* START_INSN will hold the insn from where we start looking. This is the
13515 first insn after the following code_label if REVERSE is true. */
13516 rtx start_insn = insn;
13518 /* If in state 4, check if the target branch is reached, in order to
13519 change back to state 0. */
13520 if (arm_ccfsm_state == 4)
13522 if (insn == arm_target_insn)
13524 arm_target_insn = NULL;
13525 arm_ccfsm_state = 0;
13530 /* If in state 3, it is possible to repeat the trick, if this insn is an
13531 unconditional branch to a label, and immediately following this branch
13532 is the previous target label which is only used once, and the label this
13533 branch jumps to is not too far off. */
13534 if (arm_ccfsm_state == 3)
13536 if (simplejump_p (insn))
13538 start_insn = next_nonnote_insn (start_insn);
13539 if (GET_CODE (start_insn) == BARRIER)
13541 /* XXX Isn't this always a barrier? */
13542 start_insn = next_nonnote_insn (start_insn);
13544 if (GET_CODE (start_insn) == CODE_LABEL
13545 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13546 && LABEL_NUSES (start_insn) == 1)
13551 else if (GET_CODE (body) == RETURN)
13553 start_insn = next_nonnote_insn (start_insn);
13554 if (GET_CODE (start_insn) == BARRIER)
13555 start_insn = next_nonnote_insn (start_insn);
13556 if (GET_CODE (start_insn) == CODE_LABEL
13557 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13558 && LABEL_NUSES (start_insn) == 1)
13561 seeking_return = 1;
13570 gcc_assert (!arm_ccfsm_state || reverse);
13571 if (GET_CODE (insn) != JUMP_INSN)
13574 /* This jump might be paralleled with a clobber of the condition codes
13575 the jump should always come first */
13576 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13577 body = XVECEXP (body, 0, 0);
13580 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13581 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13584 int fail = FALSE, succeed = FALSE;
13585 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13586 int then_not_else = TRUE;
13587 rtx this_insn = start_insn, label = 0;
13589 /* If the jump cannot be done with one instruction, we cannot
13590 conditionally execute the instruction in the inverse case. */
13591 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13597 /* Register the insn jumped to. */
13600 if (!seeking_return)
13601 label = XEXP (SET_SRC (body), 0);
13603 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13604 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13605 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13607 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13608 then_not_else = FALSE;
13610 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13611 seeking_return = 1;
13612 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13614 seeking_return = 1;
13615 then_not_else = FALSE;
13618 gcc_unreachable ();
13620 /* See how many insns this branch skips, and what kind of insns. If all
13621 insns are okay, and the label or unconditional branch to the same
13622 label is not too far away, succeed. */
13623 for (insns_skipped = 0;
13624 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13628 this_insn = next_nonnote_insn (this_insn);
13632 switch (GET_CODE (this_insn))
13635 /* Succeed if it is the target label, otherwise fail since
13636 control falls in from somewhere else. */
13637 if (this_insn == label)
13641 arm_ccfsm_state = 2;
13642 this_insn = next_nonnote_insn (this_insn);
13645 arm_ccfsm_state = 1;
13653 /* Succeed if the following insn is the target label.
13655 If return insns are used then the last insn in a function
13656 will be a barrier. */
13657 this_insn = next_nonnote_insn (this_insn);
13658 if (this_insn && this_insn == label)
13662 arm_ccfsm_state = 2;
13663 this_insn = next_nonnote_insn (this_insn);
13666 arm_ccfsm_state = 1;
13674 /* The AAPCS says that conditional calls should not be
13675 used since they make interworking inefficient (the
13676 linker can't transform BL<cond> into BLX). That's
13677 only a problem if the machine has BLX. */
13684 /* Succeed if the following insn is the target label, or
13685 if the following two insns are a barrier and the
13687 this_insn = next_nonnote_insn (this_insn);
13688 if (this_insn && GET_CODE (this_insn) == BARRIER)
13689 this_insn = next_nonnote_insn (this_insn);
13691 if (this_insn && this_insn == label
13692 && insns_skipped < max_insns_skipped)
13696 arm_ccfsm_state = 2;
13697 this_insn = next_nonnote_insn (this_insn);
13700 arm_ccfsm_state = 1;
13708 /* If this is an unconditional branch to the same label, succeed.
13709 If it is to another label, do nothing. If it is conditional,
13711 /* XXX Probably, the tests for SET and the PC are
13714 scanbody = PATTERN (this_insn);
13715 if (GET_CODE (scanbody) == SET
13716 && GET_CODE (SET_DEST (scanbody)) == PC)
13718 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13719 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13721 arm_ccfsm_state = 2;
13724 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13727 /* Fail if a conditional return is undesirable (e.g. on a
13728 StrongARM), but still allow this if optimizing for size. */
13729 else if (GET_CODE (scanbody) == RETURN
13730 && !use_return_insn (TRUE, NULL)
13733 else if (GET_CODE (scanbody) == RETURN
13736 arm_ccfsm_state = 2;
13739 else if (GET_CODE (scanbody) == PARALLEL)
13741 switch (get_attr_conds (this_insn))
13751 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13756 /* Instructions using or affecting the condition codes make it
13758 scanbody = PATTERN (this_insn);
13759 if (!(GET_CODE (scanbody) == SET
13760 || GET_CODE (scanbody) == PARALLEL)
13761 || get_attr_conds (this_insn) != CONDS_NOCOND)
13764 /* A conditional cirrus instruction must be followed by
13765 a non Cirrus instruction. However, since we
13766 conditionalize instructions in this function and by
13767 the time we get here we can't add instructions
13768 (nops), because shorten_branches() has already been
13769 called, we will disable conditionalizing Cirrus
13770 instructions to be safe. */
13771 if (GET_CODE (scanbody) != USE
13772 && GET_CODE (scanbody) != CLOBBER
13773 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13783 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13784 arm_target_label = CODE_LABEL_NUMBER (label);
13787 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13789 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13791 this_insn = next_nonnote_insn (this_insn);
13792 gcc_assert (!this_insn
13793 || (GET_CODE (this_insn) != BARRIER
13794 && GET_CODE (this_insn) != CODE_LABEL));
13798 /* Oh, dear! we ran off the end.. give up. */
13799 extract_constrain_insn_cached (insn);
13800 arm_ccfsm_state = 0;
13801 arm_target_insn = NULL;
13804 arm_target_insn = this_insn;
13808 gcc_assert (!reverse);
13810 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13812 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13813 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13814 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13815 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13819 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13822 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13826 if (reverse || then_not_else)
13827 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13830 /* Restore recog_data (getting the attributes of other insns can
13831 destroy this array, but final.c assumes that it remains intact
13832 across this call. */
13833 extract_constrain_insn_cached (insn);
13837 /* Output IT instructions. */
13839 thumb2_asm_output_opcode (FILE * stream)
13844 if (arm_condexec_mask)
13846 for (n = 0; n < arm_condexec_masklen; n++)
13847 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13849 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13850 arm_condition_codes[arm_current_cc]);
13851 arm_condexec_mask = 0;
13855 /* Returns true if REGNO is a valid register
13856 for holding a quantity of type MODE. */
13858 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13860 if (GET_MODE_CLASS (mode) == MODE_CC)
13861 return (regno == CC_REGNUM
13862 || (TARGET_HARD_FLOAT && TARGET_VFP
13863 && regno == VFPCC_REGNUM));
13866 /* For the Thumb we only allow values bigger than SImode in
13867 registers 0 - 6, so that there is always a second low
13868 register available to hold the upper part of the value.
13869 We probably we ought to ensure that the register is the
13870 start of an even numbered register pair. */
13871 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13873 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13874 && IS_CIRRUS_REGNUM (regno))
13875 /* We have outlawed SI values in Cirrus registers because they
13876 reside in the lower 32 bits, but SF values reside in the
13877 upper 32 bits. This causes gcc all sorts of grief. We can't
13878 even split the registers into pairs because Cirrus SI values
13879 get sign extended to 64bits-- aldyh. */
13880 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13882 if (TARGET_HARD_FLOAT && TARGET_VFP
13883 && IS_VFP_REGNUM (regno))
13885 if (mode == SFmode || mode == SImode)
13886 return VFP_REGNO_OK_FOR_SINGLE (regno);
13888 if (mode == DFmode)
13889 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13892 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13893 || (VALID_NEON_QREG_MODE (mode)
13894 && NEON_REGNO_OK_FOR_QUAD (regno))
13895 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13896 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13897 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13898 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13899 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13904 if (TARGET_REALLY_IWMMXT)
13906 if (IS_IWMMXT_GR_REGNUM (regno))
13907 return mode == SImode;
13909 if (IS_IWMMXT_REGNUM (regno))
13910 return VALID_IWMMXT_REG_MODE (mode);
13913 /* We allow any value to be stored in the general registers.
13914 Restrict doubleword quantities to even register pairs so that we can
13915 use ldrd. Do not allow Neon structure opaque modes in general registers;
13916 they would use too many. */
13917 if (regno <= LAST_ARM_REGNUM)
13918 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
13919 && !VALID_NEON_STRUCT_MODE (mode);
13921 if (regno == FRAME_POINTER_REGNUM
13922 || regno == ARG_POINTER_REGNUM)
13923 /* We only allow integers in the fake hard registers. */
13924 return GET_MODE_CLASS (mode) == MODE_INT;
13926 /* The only registers left are the FPA registers
13927 which we only allow to hold FP values. */
13928 return (TARGET_HARD_FLOAT && TARGET_FPA
13929 && GET_MODE_CLASS (mode) == MODE_FLOAT
13930 && regno >= FIRST_FPA_REGNUM
13931 && regno <= LAST_FPA_REGNUM);
13934 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
13935 not used in arm mode. */
13937 arm_regno_class (int regno)
13941 if (regno == STACK_POINTER_REGNUM)
13943 if (regno == CC_REGNUM)
13950 if (TARGET_THUMB2 && regno < 8)
13953 if ( regno <= LAST_ARM_REGNUM
13954 || regno == FRAME_POINTER_REGNUM
13955 || regno == ARG_POINTER_REGNUM)
13956 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
13958 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
13959 return TARGET_THUMB2 ? CC_REG : NO_REGS;
13961 if (IS_CIRRUS_REGNUM (regno))
13962 return CIRRUS_REGS;
13964 if (IS_VFP_REGNUM (regno))
13966 if (regno <= D7_VFP_REGNUM)
13967 return VFP_D0_D7_REGS;
13968 else if (regno <= LAST_LO_VFP_REGNUM)
13969 return VFP_LO_REGS;
13971 return VFP_HI_REGS;
13974 if (IS_IWMMXT_REGNUM (regno))
13975 return IWMMXT_REGS;
13977 if (IS_IWMMXT_GR_REGNUM (regno))
13978 return IWMMXT_GR_REGS;
13983 /* Handle a special case when computing the offset
13984 of an argument from the frame pointer. */
13986 arm_debugger_arg_offset (int value, rtx addr)
13990 /* We are only interested if dbxout_parms() failed to compute the offset. */
13994 /* We can only cope with the case where the address is held in a register. */
13995 if (GET_CODE (addr) != REG)
13998 /* If we are using the frame pointer to point at the argument, then
13999 an offset of 0 is correct. */
14000 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14003 /* If we are using the stack pointer to point at the
14004 argument, then an offset of 0 is correct. */
14005 /* ??? Check this is consistent with thumb2 frame layout. */
14006 if ((TARGET_THUMB || !frame_pointer_needed)
14007 && REGNO (addr) == SP_REGNUM)
14010 /* Oh dear. The argument is pointed to by a register rather
14011 than being held in a register, or being stored at a known
14012 offset from the frame pointer. Since GDB only understands
14013 those two kinds of argument we must translate the address
14014 held in the register into an offset from the frame pointer.
14015 We do this by searching through the insns for the function
14016 looking to see where this register gets its value. If the
14017 register is initialized from the frame pointer plus an offset
14018 then we are in luck and we can continue, otherwise we give up.
14020 This code is exercised by producing debugging information
14021 for a function with arguments like this:
14023 double func (double a, double b, int c, double d) {return d;}
14025 Without this code the stab for parameter 'd' will be set to
14026 an offset of 0 from the frame pointer, rather than 8. */
14028 /* The if() statement says:
14030 If the insn is a normal instruction
14031 and if the insn is setting the value in a register
14032 and if the register being set is the register holding the address of the argument
14033 and if the address is computing by an addition
14034 that involves adding to a register
14035 which is the frame pointer
14040 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14042 if ( GET_CODE (insn) == INSN
14043 && GET_CODE (PATTERN (insn)) == SET
14044 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14045 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14046 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14047 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14048 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14051 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14060 warning (0, "unable to compute real location of stacked parameter");
14061 value = 8; /* XXX magic hack */
14067 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14070 if ((MASK) & insn_flags) \
14071 add_builtin_function ((NAME), (TYPE), (CODE), \
14072 BUILT_IN_MD, NULL, NULL_TREE); \
14076 struct builtin_description
14078 const unsigned int mask;
14079 const enum insn_code icode;
14080 const char * const name;
14081 const enum arm_builtins code;
14082 const enum rtx_code comparison;
14083 const unsigned int flag;
14086 static const struct builtin_description bdesc_2arg[] =
14088 #define IWMMXT_BUILTIN(code, string, builtin) \
14089 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14090 ARM_BUILTIN_##builtin, 0, 0 },
14092 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14093 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14094 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14095 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14096 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14097 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14098 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14099 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14100 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14101 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14102 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14103 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14104 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14105 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14106 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14107 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14108 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14109 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14110 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14111 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14112 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14113 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14114 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14115 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14116 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14117 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14118 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14119 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14120 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14121 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14122 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14123 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14124 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14125 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14126 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14127 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14128 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14129 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14130 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14131 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14132 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14133 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14134 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14135 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14136 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14137 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14138 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14139 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14140 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14141 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14142 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14143 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14144 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14145 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14146 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14147 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14148 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14149 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14151 #define IWMMXT_BUILTIN2(code, builtin) \
14152 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14154 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14155 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14156 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14157 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14158 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14159 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14160 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14161 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14162 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14163 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14164 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14165 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14166 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14167 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14168 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14169 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14170 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14171 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14172 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14173 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14174 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14175 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14176 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14177 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14178 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14179 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14180 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14181 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14182 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14183 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14184 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14185 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14188 static const struct builtin_description bdesc_1arg[] =
14190 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14191 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14192 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14193 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14194 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14195 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14196 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14197 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14198 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14199 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14200 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14201 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14202 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14203 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14204 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14205 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14206 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14207 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14210 /* Set up all the iWMMXt builtins. This is
14211 not called if TARGET_IWMMXT is zero. */
14214 arm_init_iwmmxt_builtins (void)
14216 const struct builtin_description * d;
14218 tree endlink = void_list_node;
14220 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14221 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14222 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14225 = build_function_type (integer_type_node,
14226 tree_cons (NULL_TREE, integer_type_node, endlink));
14227 tree v8qi_ftype_v8qi_v8qi_int
14228 = build_function_type (V8QI_type_node,
14229 tree_cons (NULL_TREE, V8QI_type_node,
14230 tree_cons (NULL_TREE, V8QI_type_node,
14231 tree_cons (NULL_TREE,
14234 tree v4hi_ftype_v4hi_int
14235 = build_function_type (V4HI_type_node,
14236 tree_cons (NULL_TREE, V4HI_type_node,
14237 tree_cons (NULL_TREE, integer_type_node,
14239 tree v2si_ftype_v2si_int
14240 = build_function_type (V2SI_type_node,
14241 tree_cons (NULL_TREE, V2SI_type_node,
14242 tree_cons (NULL_TREE, integer_type_node,
14244 tree v2si_ftype_di_di
14245 = build_function_type (V2SI_type_node,
14246 tree_cons (NULL_TREE, long_long_integer_type_node,
14247 tree_cons (NULL_TREE, long_long_integer_type_node,
14249 tree di_ftype_di_int
14250 = build_function_type (long_long_integer_type_node,
14251 tree_cons (NULL_TREE, long_long_integer_type_node,
14252 tree_cons (NULL_TREE, integer_type_node,
14254 tree di_ftype_di_int_int
14255 = build_function_type (long_long_integer_type_node,
14256 tree_cons (NULL_TREE, long_long_integer_type_node,
14257 tree_cons (NULL_TREE, integer_type_node,
14258 tree_cons (NULL_TREE,
14261 tree int_ftype_v8qi
14262 = build_function_type (integer_type_node,
14263 tree_cons (NULL_TREE, V8QI_type_node,
14265 tree int_ftype_v4hi
14266 = build_function_type (integer_type_node,
14267 tree_cons (NULL_TREE, V4HI_type_node,
14269 tree int_ftype_v2si
14270 = build_function_type (integer_type_node,
14271 tree_cons (NULL_TREE, V2SI_type_node,
14273 tree int_ftype_v8qi_int
14274 = build_function_type (integer_type_node,
14275 tree_cons (NULL_TREE, V8QI_type_node,
14276 tree_cons (NULL_TREE, integer_type_node,
14278 tree int_ftype_v4hi_int
14279 = build_function_type (integer_type_node,
14280 tree_cons (NULL_TREE, V4HI_type_node,
14281 tree_cons (NULL_TREE, integer_type_node,
14283 tree int_ftype_v2si_int
14284 = build_function_type (integer_type_node,
14285 tree_cons (NULL_TREE, V2SI_type_node,
14286 tree_cons (NULL_TREE, integer_type_node,
14288 tree v8qi_ftype_v8qi_int_int
14289 = build_function_type (V8QI_type_node,
14290 tree_cons (NULL_TREE, V8QI_type_node,
14291 tree_cons (NULL_TREE, integer_type_node,
14292 tree_cons (NULL_TREE,
14295 tree v4hi_ftype_v4hi_int_int
14296 = build_function_type (V4HI_type_node,
14297 tree_cons (NULL_TREE, V4HI_type_node,
14298 tree_cons (NULL_TREE, integer_type_node,
14299 tree_cons (NULL_TREE,
14302 tree v2si_ftype_v2si_int_int
14303 = build_function_type (V2SI_type_node,
14304 tree_cons (NULL_TREE, V2SI_type_node,
14305 tree_cons (NULL_TREE, integer_type_node,
14306 tree_cons (NULL_TREE,
14309 /* Miscellaneous. */
14310 tree v8qi_ftype_v4hi_v4hi
14311 = build_function_type (V8QI_type_node,
14312 tree_cons (NULL_TREE, V4HI_type_node,
14313 tree_cons (NULL_TREE, V4HI_type_node,
14315 tree v4hi_ftype_v2si_v2si
14316 = build_function_type (V4HI_type_node,
14317 tree_cons (NULL_TREE, V2SI_type_node,
14318 tree_cons (NULL_TREE, V2SI_type_node,
14320 tree v2si_ftype_v4hi_v4hi
14321 = build_function_type (V2SI_type_node,
14322 tree_cons (NULL_TREE, V4HI_type_node,
14323 tree_cons (NULL_TREE, V4HI_type_node,
14325 tree v2si_ftype_v8qi_v8qi
14326 = build_function_type (V2SI_type_node,
14327 tree_cons (NULL_TREE, V8QI_type_node,
14328 tree_cons (NULL_TREE, V8QI_type_node,
14330 tree v4hi_ftype_v4hi_di
14331 = build_function_type (V4HI_type_node,
14332 tree_cons (NULL_TREE, V4HI_type_node,
14333 tree_cons (NULL_TREE,
14334 long_long_integer_type_node,
14336 tree v2si_ftype_v2si_di
14337 = build_function_type (V2SI_type_node,
14338 tree_cons (NULL_TREE, V2SI_type_node,
14339 tree_cons (NULL_TREE,
14340 long_long_integer_type_node,
14342 tree void_ftype_int_int
14343 = build_function_type (void_type_node,
14344 tree_cons (NULL_TREE, integer_type_node,
14345 tree_cons (NULL_TREE, integer_type_node,
14348 = build_function_type (long_long_unsigned_type_node, endlink);
14350 = build_function_type (long_long_integer_type_node,
14351 tree_cons (NULL_TREE, V8QI_type_node,
14354 = build_function_type (long_long_integer_type_node,
14355 tree_cons (NULL_TREE, V4HI_type_node,
14358 = build_function_type (long_long_integer_type_node,
14359 tree_cons (NULL_TREE, V2SI_type_node,
14361 tree v2si_ftype_v4hi
14362 = build_function_type (V2SI_type_node,
14363 tree_cons (NULL_TREE, V4HI_type_node,
14365 tree v4hi_ftype_v8qi
14366 = build_function_type (V4HI_type_node,
14367 tree_cons (NULL_TREE, V8QI_type_node,
14370 tree di_ftype_di_v4hi_v4hi
14371 = build_function_type (long_long_unsigned_type_node,
14372 tree_cons (NULL_TREE,
14373 long_long_unsigned_type_node,
14374 tree_cons (NULL_TREE, V4HI_type_node,
14375 tree_cons (NULL_TREE,
14379 tree di_ftype_v4hi_v4hi
14380 = build_function_type (long_long_unsigned_type_node,
14381 tree_cons (NULL_TREE, V4HI_type_node,
14382 tree_cons (NULL_TREE, V4HI_type_node,
14385 /* Normal vector binops. */
14386 tree v8qi_ftype_v8qi_v8qi
14387 = build_function_type (V8QI_type_node,
14388 tree_cons (NULL_TREE, V8QI_type_node,
14389 tree_cons (NULL_TREE, V8QI_type_node,
14391 tree v4hi_ftype_v4hi_v4hi
14392 = build_function_type (V4HI_type_node,
14393 tree_cons (NULL_TREE, V4HI_type_node,
14394 tree_cons (NULL_TREE, V4HI_type_node,
14396 tree v2si_ftype_v2si_v2si
14397 = build_function_type (V2SI_type_node,
14398 tree_cons (NULL_TREE, V2SI_type_node,
14399 tree_cons (NULL_TREE, V2SI_type_node,
14401 tree di_ftype_di_di
14402 = build_function_type (long_long_unsigned_type_node,
14403 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14404 tree_cons (NULL_TREE,
14405 long_long_unsigned_type_node,
14408 /* Add all builtins that are more or less simple operations on two
14410 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14412 /* Use one of the operands; the target can have a different mode for
14413 mask-generating compares. */
14414 enum machine_mode mode;
14420 mode = insn_data[d->icode].operand[1].mode;
14425 type = v8qi_ftype_v8qi_v8qi;
14428 type = v4hi_ftype_v4hi_v4hi;
14431 type = v2si_ftype_v2si_v2si;
14434 type = di_ftype_di_di;
14438 gcc_unreachable ();
14441 def_mbuiltin (d->mask, d->name, type, d->code);
14444 /* Add the remaining MMX insns with somewhat more complicated types. */
14445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14491 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14492 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14494 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14495 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14496 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14498 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14499 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14500 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14502 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14503 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14504 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14507 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14510 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14511 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14512 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14515 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14516 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14520 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14522 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14523 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14524 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14525 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14529 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14532 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14537 arm_init_tls_builtins (void)
14540 tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
14541 tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
14543 ftype = build_function_type (ptr_type_node, void_list_node);
14544 add_builtin_function ("__builtin_thread_pointer", ftype,
14545 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14546 NULL, const_nothrow);
14563 } neon_builtin_type_bits;
14565 #define v8qi_UP T_V8QI
14566 #define v4hi_UP T_V4HI
14567 #define v2si_UP T_V2SI
14568 #define v2sf_UP T_V2SF
14570 #define v16qi_UP T_V16QI
14571 #define v8hi_UP T_V8HI
14572 #define v4si_UP T_V4SI
14573 #define v4sf_UP T_V4SF
14574 #define v2di_UP T_V2DI
14579 #define UP(X) X##_UP
14614 NEON_LOADSTRUCTLANE,
14616 NEON_STORESTRUCTLANE,
14625 const neon_itype itype;
14626 const neon_builtin_type_bits bits;
14627 const enum insn_code codes[T_MAX];
14628 const unsigned int num_vars;
14629 unsigned int base_fcode;
14630 } neon_builtin_datum;
14632 #define CF(N,X) CODE_FOR_neon_##N##X
14634 #define VAR1(T, N, A) \
14635 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14636 #define VAR2(T, N, A, B) \
14637 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14638 #define VAR3(T, N, A, B, C) \
14639 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14640 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14641 #define VAR4(T, N, A, B, C, D) \
14642 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14643 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14644 #define VAR5(T, N, A, B, C, D, E) \
14645 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14646 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14647 #define VAR6(T, N, A, B, C, D, E, F) \
14648 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14649 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14650 #define VAR7(T, N, A, B, C, D, E, F, G) \
14651 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14652 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14654 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14655 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14657 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14658 CF (N, G), CF (N, H) }, 8, 0
14659 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14660 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14661 | UP (H) | UP (I), \
14662 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14663 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14664 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14665 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14666 | UP (H) | UP (I) | UP (J), \
14667 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14668 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14670 /* The mode entries in the following table correspond to the "key" type of the
14671 instruction variant, i.e. equivalent to that which would be specified after
14672 the assembler mnemonic, which usually refers to the last vector operand.
14673 (Signed/unsigned/polynomial types are not differentiated between though, and
14674 are all mapped onto the same mode for a given element size.) The modes
14675 listed per instruction should be the same as those defined for that
14676 instruction's pattern in neon.md.
14677 WARNING: Variants should be listed in the same increasing order as
14678 neon_builtin_type_bits. */
14680 static neon_builtin_datum neon_builtin_data[] =
14682 { VAR10 (BINOP, vadd,
14683 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14684 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14685 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14686 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14687 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14688 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14689 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14690 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14691 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14692 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14693 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14694 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14695 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14696 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14697 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14698 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14699 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14700 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14701 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14702 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14703 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14704 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14705 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14706 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14707 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14708 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14709 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14710 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14711 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14712 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14713 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14714 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14715 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14716 { VAR10 (BINOP, vsub,
14717 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14718 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14719 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14720 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14721 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14722 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14723 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14724 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14725 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14726 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14727 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14728 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14729 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14730 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14731 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14732 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14733 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14734 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14735 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14736 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14737 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14738 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14739 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14740 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14741 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14742 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14743 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14744 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14745 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14746 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14747 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14748 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14749 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14750 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14751 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14752 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14753 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14754 /* FIXME: vget_lane supports more variants than this! */
14755 { VAR10 (GETLANE, vget_lane,
14756 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14757 { VAR10 (SETLANE, vset_lane,
14758 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14759 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14760 { VAR10 (DUP, vdup_n,
14761 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14762 { VAR10 (DUPLANE, vdup_lane,
14763 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14764 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14765 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14766 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14767 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14768 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14769 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14770 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14771 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14772 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14773 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14774 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14775 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14776 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14777 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14778 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14779 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14780 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14781 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14782 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14783 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14784 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14785 { VAR10 (BINOP, vext,
14786 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14787 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14788 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14789 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14790 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14791 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14792 { VAR10 (SELECT, vbsl,
14793 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14794 { VAR1 (VTBL, vtbl1, v8qi) },
14795 { VAR1 (VTBL, vtbl2, v8qi) },
14796 { VAR1 (VTBL, vtbl3, v8qi) },
14797 { VAR1 (VTBL, vtbl4, v8qi) },
14798 { VAR1 (VTBX, vtbx1, v8qi) },
14799 { VAR1 (VTBX, vtbx2, v8qi) },
14800 { VAR1 (VTBX, vtbx3, v8qi) },
14801 { VAR1 (VTBX, vtbx4, v8qi) },
14802 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14803 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14804 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14805 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14806 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14807 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14808 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14809 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14810 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14811 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14812 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14813 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14814 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14815 { VAR10 (LOAD1, vld1,
14816 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14817 { VAR10 (LOAD1LANE, vld1_lane,
14818 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14819 { VAR10 (LOAD1, vld1_dup,
14820 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14821 { VAR10 (STORE1, vst1,
14822 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14823 { VAR10 (STORE1LANE, vst1_lane,
14824 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14825 { VAR9 (LOADSTRUCT,
14826 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14827 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14828 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14829 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14830 { VAR9 (STORESTRUCT, vst2,
14831 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14832 { VAR7 (STORESTRUCTLANE, vst2_lane,
14833 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14834 { VAR9 (LOADSTRUCT,
14835 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14836 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14837 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14838 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14839 { VAR9 (STORESTRUCT, vst3,
14840 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14841 { VAR7 (STORESTRUCTLANE, vst3_lane,
14842 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14843 { VAR9 (LOADSTRUCT, vld4,
14844 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14845 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14846 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14847 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14848 { VAR9 (STORESTRUCT, vst4,
14849 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14850 { VAR7 (STORESTRUCTLANE, vst4_lane,
14851 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14852 { VAR10 (LOGICBINOP, vand,
14853 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14854 { VAR10 (LOGICBINOP, vorr,
14855 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14856 { VAR10 (BINOP, veor,
14857 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14858 { VAR10 (LOGICBINOP, vbic,
14859 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14860 { VAR10 (LOGICBINOP, vorn,
14861 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14877 arm_init_neon_builtins (void)
14879 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14881 /* Create distinguished type nodes for NEON vector element types,
14882 and pointers to values of such types, so we can detect them later. */
14883 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14884 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14885 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14886 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14887 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14888 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14889 tree neon_float_type_node = make_node (REAL_TYPE);
14890 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
14891 layout_type (neon_float_type_node);
14893 /* Define typedefs which exactly correspond to the modes we are basing vector
14894 types on. If you change these names you'll need to change
14895 the table used by arm_mangle_type too. */
14896 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
14897 "__builtin_neon_qi");
14898 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
14899 "__builtin_neon_hi");
14900 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
14901 "__builtin_neon_si");
14902 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
14903 "__builtin_neon_sf");
14904 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
14905 "__builtin_neon_di");
14907 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
14908 "__builtin_neon_poly8");
14909 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
14910 "__builtin_neon_poly16");
14912 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14913 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14914 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14915 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14916 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14918 /* Next create constant-qualified versions of the above types. */
14919 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14921 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14923 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14925 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14927 tree const_float_node = build_qualified_type (neon_float_type_node,
14930 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14931 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14932 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14933 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14934 tree const_float_pointer_node = build_pointer_type (const_float_node);
14936 /* Now create vector types based on our NEON element types. */
14937 /* 64-bit vectors. */
14938 tree V8QI_type_node =
14939 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
14940 tree V4HI_type_node =
14941 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
14942 tree V2SI_type_node =
14943 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
14944 tree V2SF_type_node =
14945 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
14946 /* 128-bit vectors. */
14947 tree V16QI_type_node =
14948 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
14949 tree V8HI_type_node =
14950 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
14951 tree V4SI_type_node =
14952 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
14953 tree V4SF_type_node =
14954 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
14955 tree V2DI_type_node =
14956 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
14958 /* Unsigned integer types for various mode sizes. */
14959 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
14960 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
14961 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
14962 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
14964 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
14965 "__builtin_neon_uqi");
14966 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
14967 "__builtin_neon_uhi");
14968 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
14969 "__builtin_neon_usi");
14970 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
14971 "__builtin_neon_udi");
14973 /* Opaque integer types for structures of vectors. */
14974 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
14975 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
14976 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
14977 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
14979 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
14980 "__builtin_neon_ti");
14981 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
14982 "__builtin_neon_ei");
14983 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
14984 "__builtin_neon_oi");
14985 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
14986 "__builtin_neon_ci");
14987 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
14988 "__builtin_neon_xi");
14990 /* Pointers to vector types. */
14991 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
14992 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
14993 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
14994 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
14995 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
14996 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
14997 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
14998 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
14999 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15001 /* Operations which return results as pairs. */
15002 tree void_ftype_pv8qi_v8qi_v8qi =
15003 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15004 V8QI_type_node, NULL);
15005 tree void_ftype_pv4hi_v4hi_v4hi =
15006 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15007 V4HI_type_node, NULL);
15008 tree void_ftype_pv2si_v2si_v2si =
15009 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15010 V2SI_type_node, NULL);
15011 tree void_ftype_pv2sf_v2sf_v2sf =
15012 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15013 V2SF_type_node, NULL);
15014 tree void_ftype_pdi_di_di =
15015 build_function_type_list (void_type_node, intDI_pointer_node,
15016 neon_intDI_type_node, neon_intDI_type_node, NULL);
15017 tree void_ftype_pv16qi_v16qi_v16qi =
15018 build_function_type_list (void_type_node, V16QI_pointer_node,
15019 V16QI_type_node, V16QI_type_node, NULL);
15020 tree void_ftype_pv8hi_v8hi_v8hi =
15021 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15022 V8HI_type_node, NULL);
15023 tree void_ftype_pv4si_v4si_v4si =
15024 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15025 V4SI_type_node, NULL);
15026 tree void_ftype_pv4sf_v4sf_v4sf =
15027 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15028 V4SF_type_node, NULL);
15029 tree void_ftype_pv2di_v2di_v2di =
15030 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15031 V2DI_type_node, NULL);
15033 tree reinterp_ftype_dreg[5][5];
15034 tree reinterp_ftype_qreg[5][5];
15035 tree dreg_types[5], qreg_types[5];
15037 dreg_types[0] = V8QI_type_node;
15038 dreg_types[1] = V4HI_type_node;
15039 dreg_types[2] = V2SI_type_node;
15040 dreg_types[3] = V2SF_type_node;
15041 dreg_types[4] = neon_intDI_type_node;
15043 qreg_types[0] = V16QI_type_node;
15044 qreg_types[1] = V8HI_type_node;
15045 qreg_types[2] = V4SI_type_node;
15046 qreg_types[3] = V4SF_type_node;
15047 qreg_types[4] = V2DI_type_node;
15049 for (i = 0; i < 5; i++)
15052 for (j = 0; j < 5; j++)
15054 reinterp_ftype_dreg[i][j]
15055 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15056 reinterp_ftype_qreg[i][j]
15057 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15061 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15063 neon_builtin_datum *d = &neon_builtin_data[i];
15064 unsigned int j, codeidx = 0;
15066 d->base_fcode = fcode;
15068 for (j = 0; j < T_MAX; j++)
15070 const char* const modenames[] = {
15071 "v8qi", "v4hi", "v2si", "v2sf", "di",
15072 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15076 enum insn_code icode;
15077 int is_load = 0, is_store = 0;
15079 if ((d->bits & (1 << j)) == 0)
15082 icode = d->codes[codeidx++];
15087 case NEON_LOAD1LANE:
15088 case NEON_LOADSTRUCT:
15089 case NEON_LOADSTRUCTLANE:
15091 /* Fall through. */
15093 case NEON_STORE1LANE:
15094 case NEON_STORESTRUCT:
15095 case NEON_STORESTRUCTLANE:
15098 /* Fall through. */
15101 case NEON_LOGICBINOP:
15102 case NEON_SHIFTINSERT:
15109 case NEON_SHIFTIMM:
15110 case NEON_SHIFTACC:
15116 case NEON_LANEMULL:
15117 case NEON_LANEMULH:
15119 case NEON_SCALARMUL:
15120 case NEON_SCALARMULL:
15121 case NEON_SCALARMULH:
15122 case NEON_SCALARMAC:
15128 tree return_type = void_type_node, args = void_list_node;
15130 /* Build a function type directly from the insn_data for this
15131 builtin. The build_function_type() function takes care of
15132 removing duplicates for us. */
15133 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15137 if (is_load && k == 1)
15139 /* Neon load patterns always have the memory operand
15140 (a SImode pointer) in the operand 1 position. We
15141 want a const pointer to the element type in that
15143 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15149 eltype = const_intQI_pointer_node;
15154 eltype = const_intHI_pointer_node;
15159 eltype = const_intSI_pointer_node;
15164 eltype = const_float_pointer_node;
15169 eltype = const_intDI_pointer_node;
15172 default: gcc_unreachable ();
15175 else if (is_store && k == 0)
15177 /* Similarly, Neon store patterns use operand 0 as
15178 the memory location to store to (a SImode pointer).
15179 Use a pointer to the element type of the store in
15181 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15187 eltype = intQI_pointer_node;
15192 eltype = intHI_pointer_node;
15197 eltype = intSI_pointer_node;
15202 eltype = float_pointer_node;
15207 eltype = intDI_pointer_node;
15210 default: gcc_unreachable ();
15215 switch (insn_data[icode].operand[k].mode)
15217 case VOIDmode: eltype = void_type_node; break;
15219 case QImode: eltype = neon_intQI_type_node; break;
15220 case HImode: eltype = neon_intHI_type_node; break;
15221 case SImode: eltype = neon_intSI_type_node; break;
15222 case SFmode: eltype = neon_float_type_node; break;
15223 case DImode: eltype = neon_intDI_type_node; break;
15224 case TImode: eltype = intTI_type_node; break;
15225 case EImode: eltype = intEI_type_node; break;
15226 case OImode: eltype = intOI_type_node; break;
15227 case CImode: eltype = intCI_type_node; break;
15228 case XImode: eltype = intXI_type_node; break;
15229 /* 64-bit vectors. */
15230 case V8QImode: eltype = V8QI_type_node; break;
15231 case V4HImode: eltype = V4HI_type_node; break;
15232 case V2SImode: eltype = V2SI_type_node; break;
15233 case V2SFmode: eltype = V2SF_type_node; break;
15234 /* 128-bit vectors. */
15235 case V16QImode: eltype = V16QI_type_node; break;
15236 case V8HImode: eltype = V8HI_type_node; break;
15237 case V4SImode: eltype = V4SI_type_node; break;
15238 case V4SFmode: eltype = V4SF_type_node; break;
15239 case V2DImode: eltype = V2DI_type_node; break;
15240 default: gcc_unreachable ();
15244 if (k == 0 && !is_store)
15245 return_type = eltype;
15247 args = tree_cons (NULL_TREE, eltype, args);
15250 ftype = build_function_type (return_type, args);
15254 case NEON_RESULTPAIR:
15256 switch (insn_data[icode].operand[1].mode)
15258 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15259 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15260 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15261 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15262 case DImode: ftype = void_ftype_pdi_di_di; break;
15263 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15264 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15265 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15266 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15267 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15268 default: gcc_unreachable ();
15273 case NEON_REINTERP:
15275 /* We iterate over 5 doubleword types, then 5 quadword
15278 switch (insn_data[icode].operand[0].mode)
15280 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15281 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15282 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15283 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15284 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15285 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15286 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15287 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15288 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15289 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15290 default: gcc_unreachable ();
15296 gcc_unreachable ();
15299 gcc_assert (ftype != NULL);
15301 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15303 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15310 arm_init_builtins (void)
15312 arm_init_tls_builtins ();
15314 if (TARGET_REALLY_IWMMXT)
15315 arm_init_iwmmxt_builtins ();
15318 arm_init_neon_builtins ();
15321 /* Errors in the source file can cause expand_expr to return const0_rtx
15322 where we expect a vector. To avoid crashing, use one of the vector
15323 clear instructions. */
15326 safe_vector_operand (rtx x, enum machine_mode mode)
15328 if (x != const0_rtx)
15330 x = gen_reg_rtx (mode);
15332 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15333 : gen_rtx_SUBREG (DImode, x, 0)));
15337 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15340 arm_expand_binop_builtin (enum insn_code icode,
15341 tree exp, rtx target)
15344 tree arg0 = CALL_EXPR_ARG (exp, 0);
15345 tree arg1 = CALL_EXPR_ARG (exp, 1);
15346 rtx op0 = expand_normal (arg0);
15347 rtx op1 = expand_normal (arg1);
15348 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15349 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15350 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15352 if (VECTOR_MODE_P (mode0))
15353 op0 = safe_vector_operand (op0, mode0);
15354 if (VECTOR_MODE_P (mode1))
15355 op1 = safe_vector_operand (op1, mode1);
15358 || GET_MODE (target) != tmode
15359 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15360 target = gen_reg_rtx (tmode);
15362 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15364 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15365 op0 = copy_to_mode_reg (mode0, op0);
15366 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15367 op1 = copy_to_mode_reg (mode1, op1);
15369 pat = GEN_FCN (icode) (target, op0, op1);
15376 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15379 arm_expand_unop_builtin (enum insn_code icode,
15380 tree exp, rtx target, int do_load)
15383 tree arg0 = CALL_EXPR_ARG (exp, 0);
15384 rtx op0 = expand_normal (arg0);
15385 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15386 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15389 || GET_MODE (target) != tmode
15390 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15391 target = gen_reg_rtx (tmode);
15393 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15396 if (VECTOR_MODE_P (mode0))
15397 op0 = safe_vector_operand (op0, mode0);
15399 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15400 op0 = copy_to_mode_reg (mode0, op0);
15403 pat = GEN_FCN (icode) (target, op0);
15411 neon_builtin_compare (const void *a, const void *b)
15413 const neon_builtin_datum *key = a;
15414 const neon_builtin_datum *memb = b;
15415 unsigned int soughtcode = key->base_fcode;
15417 if (soughtcode >= memb->base_fcode
15418 && soughtcode < memb->base_fcode + memb->num_vars)
15420 else if (soughtcode < memb->base_fcode)
15426 static enum insn_code
15427 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15429 neon_builtin_datum key, *found;
15432 key.base_fcode = fcode;
15433 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15434 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15435 gcc_assert (found);
15436 idx = fcode - (int) found->base_fcode;
15437 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15440 *itype = found->itype;
15442 return found->codes[idx];
15446 NEON_ARG_COPY_TO_REG,
15451 #define NEON_MAX_BUILTIN_ARGS 5
15453 /* Expand a Neon builtin. */
15455 arm_expand_neon_args (rtx target, int icode, int have_retval,
15460 tree arg[NEON_MAX_BUILTIN_ARGS];
15461 rtx op[NEON_MAX_BUILTIN_ARGS];
15462 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15463 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15468 || GET_MODE (target) != tmode
15469 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15470 target = gen_reg_rtx (tmode);
15472 va_start (ap, exp);
15476 builtin_arg thisarg = va_arg (ap, int);
15478 if (thisarg == NEON_ARG_STOP)
15482 arg[argc] = CALL_EXPR_ARG (exp, argc);
15483 op[argc] = expand_normal (arg[argc]);
15484 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15488 case NEON_ARG_COPY_TO_REG:
15489 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15490 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15491 (op[argc], mode[argc]))
15492 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15495 case NEON_ARG_CONSTANT:
15496 /* FIXME: This error message is somewhat unhelpful. */
15497 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15498 (op[argc], mode[argc]))
15499 error ("argument must be a constant");
15502 case NEON_ARG_STOP:
15503 gcc_unreachable ();
15516 pat = GEN_FCN (icode) (target, op[0]);
15520 pat = GEN_FCN (icode) (target, op[0], op[1]);
15524 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15528 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15532 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15536 gcc_unreachable ();
15542 pat = GEN_FCN (icode) (op[0]);
15546 pat = GEN_FCN (icode) (op[0], op[1]);
15550 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15554 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15558 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15562 gcc_unreachable ();
15573 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15574 constants defined per-instruction or per instruction-variant. Instead, the
15575 required info is looked up in the table neon_builtin_data. */
15577 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15580 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15587 return arm_expand_neon_args (target, icode, 1, exp,
15588 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15592 case NEON_SCALARMUL:
15593 case NEON_SCALARMULL:
15594 case NEON_SCALARMULH:
15595 case NEON_SHIFTINSERT:
15596 case NEON_LOGICBINOP:
15597 return arm_expand_neon_args (target, icode, 1, exp,
15598 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15602 return arm_expand_neon_args (target, icode, 1, exp,
15603 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15604 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15608 case NEON_SHIFTIMM:
15609 return arm_expand_neon_args (target, icode, 1, exp,
15610 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15614 return arm_expand_neon_args (target, icode, 1, exp,
15615 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15619 case NEON_REINTERP:
15620 return arm_expand_neon_args (target, icode, 1, exp,
15621 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15625 return arm_expand_neon_args (target, icode, 1, exp,
15626 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15628 case NEON_RESULTPAIR:
15629 return arm_expand_neon_args (target, icode, 0, exp,
15630 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15634 case NEON_LANEMULL:
15635 case NEON_LANEMULH:
15636 return arm_expand_neon_args (target, icode, 1, exp,
15637 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15638 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15641 return arm_expand_neon_args (target, icode, 1, exp,
15642 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15643 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15645 case NEON_SHIFTACC:
15646 return arm_expand_neon_args (target, icode, 1, exp,
15647 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15648 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15650 case NEON_SCALARMAC:
15651 return arm_expand_neon_args (target, icode, 1, exp,
15652 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15653 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15657 return arm_expand_neon_args (target, icode, 1, exp,
15658 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15662 case NEON_LOADSTRUCT:
15663 return arm_expand_neon_args (target, icode, 1, exp,
15664 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15666 case NEON_LOAD1LANE:
15667 case NEON_LOADSTRUCTLANE:
15668 return arm_expand_neon_args (target, icode, 1, exp,
15669 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15673 case NEON_STORESTRUCT:
15674 return arm_expand_neon_args (target, icode, 0, exp,
15675 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15677 case NEON_STORE1LANE:
15678 case NEON_STORESTRUCTLANE:
15679 return arm_expand_neon_args (target, icode, 0, exp,
15680 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15684 gcc_unreachable ();
15687 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15689 neon_reinterpret (rtx dest, rtx src)
15691 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15694 /* Emit code to place a Neon pair result in memory locations (with equal
15697 neon_emit_pair_result_insn (enum machine_mode mode,
15698 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15701 rtx mem = gen_rtx_MEM (mode, destaddr);
15702 rtx tmp1 = gen_reg_rtx (mode);
15703 rtx tmp2 = gen_reg_rtx (mode);
15705 emit_insn (intfn (tmp1, op1, tmp2, op2));
15707 emit_move_insn (mem, tmp1);
15708 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15709 emit_move_insn (mem, tmp2);
15712 /* Set up operands for a register copy from src to dest, taking care not to
15713 clobber registers in the process.
15714 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15715 be called with a large N, so that should be OK. */
15718 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15720 unsigned int copied = 0, opctr = 0;
15721 unsigned int done = (1 << count) - 1;
15724 while (copied != done)
15726 for (i = 0; i < count; i++)
15730 for (j = 0; good && j < count; j++)
15731 if (i != j && (copied & (1 << j)) == 0
15732 && reg_overlap_mentioned_p (src[j], dest[i]))
15737 operands[opctr++] = dest[i];
15738 operands[opctr++] = src[i];
15744 gcc_assert (opctr == count * 2);
15747 /* Expand an expression EXP that calls a built-in function,
15748 with result going to TARGET if that's convenient
15749 (and in mode MODE if that's convenient).
15750 SUBTARGET may be used as the target for computing one of EXP's operands.
15751 IGNORE is nonzero if the value is to be ignored. */
15754 arm_expand_builtin (tree exp,
15756 rtx subtarget ATTRIBUTE_UNUSED,
15757 enum machine_mode mode ATTRIBUTE_UNUSED,
15758 int ignore ATTRIBUTE_UNUSED)
15760 const struct builtin_description * d;
15761 enum insn_code icode;
15762 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15770 int fcode = DECL_FUNCTION_CODE (fndecl);
15772 enum machine_mode tmode;
15773 enum machine_mode mode0;
15774 enum machine_mode mode1;
15775 enum machine_mode mode2;
15777 if (fcode >= ARM_BUILTIN_NEON_BASE)
15778 return arm_expand_neon_builtin (fcode, exp, target);
15782 case ARM_BUILTIN_TEXTRMSB:
15783 case ARM_BUILTIN_TEXTRMUB:
15784 case ARM_BUILTIN_TEXTRMSH:
15785 case ARM_BUILTIN_TEXTRMUH:
15786 case ARM_BUILTIN_TEXTRMSW:
15787 case ARM_BUILTIN_TEXTRMUW:
15788 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15789 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15790 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15791 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15792 : CODE_FOR_iwmmxt_textrmw);
15794 arg0 = CALL_EXPR_ARG (exp, 0);
15795 arg1 = CALL_EXPR_ARG (exp, 1);
15796 op0 = expand_normal (arg0);
15797 op1 = expand_normal (arg1);
15798 tmode = insn_data[icode].operand[0].mode;
15799 mode0 = insn_data[icode].operand[1].mode;
15800 mode1 = insn_data[icode].operand[2].mode;
15802 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15803 op0 = copy_to_mode_reg (mode0, op0);
15804 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15806 /* @@@ better error message */
15807 error ("selector must be an immediate");
15808 return gen_reg_rtx (tmode);
15811 || GET_MODE (target) != tmode
15812 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15813 target = gen_reg_rtx (tmode);
15814 pat = GEN_FCN (icode) (target, op0, op1);
15820 case ARM_BUILTIN_TINSRB:
15821 case ARM_BUILTIN_TINSRH:
15822 case ARM_BUILTIN_TINSRW:
15823 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15824 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15825 : CODE_FOR_iwmmxt_tinsrw);
15826 arg0 = CALL_EXPR_ARG (exp, 0);
15827 arg1 = CALL_EXPR_ARG (exp, 1);
15828 arg2 = CALL_EXPR_ARG (exp, 2);
15829 op0 = expand_normal (arg0);
15830 op1 = expand_normal (arg1);
15831 op2 = expand_normal (arg2);
15832 tmode = insn_data[icode].operand[0].mode;
15833 mode0 = insn_data[icode].operand[1].mode;
15834 mode1 = insn_data[icode].operand[2].mode;
15835 mode2 = insn_data[icode].operand[3].mode;
15837 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15838 op0 = copy_to_mode_reg (mode0, op0);
15839 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15840 op1 = copy_to_mode_reg (mode1, op1);
15841 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15843 /* @@@ better error message */
15844 error ("selector must be an immediate");
15848 || GET_MODE (target) != tmode
15849 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15850 target = gen_reg_rtx (tmode);
15851 pat = GEN_FCN (icode) (target, op0, op1, op2);
15857 case ARM_BUILTIN_SETWCX:
15858 arg0 = CALL_EXPR_ARG (exp, 0);
15859 arg1 = CALL_EXPR_ARG (exp, 1);
15860 op0 = force_reg (SImode, expand_normal (arg0));
15861 op1 = expand_normal (arg1);
15862 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15865 case ARM_BUILTIN_GETWCX:
15866 arg0 = CALL_EXPR_ARG (exp, 0);
15867 op0 = expand_normal (arg0);
15868 target = gen_reg_rtx (SImode);
15869 emit_insn (gen_iwmmxt_tmrc (target, op0));
15872 case ARM_BUILTIN_WSHUFH:
15873 icode = CODE_FOR_iwmmxt_wshufh;
15874 arg0 = CALL_EXPR_ARG (exp, 0);
15875 arg1 = CALL_EXPR_ARG (exp, 1);
15876 op0 = expand_normal (arg0);
15877 op1 = expand_normal (arg1);
15878 tmode = insn_data[icode].operand[0].mode;
15879 mode1 = insn_data[icode].operand[1].mode;
15880 mode2 = insn_data[icode].operand[2].mode;
15882 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15883 op0 = copy_to_mode_reg (mode1, op0);
15884 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15886 /* @@@ better error message */
15887 error ("mask must be an immediate");
15891 || GET_MODE (target) != tmode
15892 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15893 target = gen_reg_rtx (tmode);
15894 pat = GEN_FCN (icode) (target, op0, op1);
15900 case ARM_BUILTIN_WSADB:
15901 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15902 case ARM_BUILTIN_WSADH:
15903 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15904 case ARM_BUILTIN_WSADBZ:
15905 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15906 case ARM_BUILTIN_WSADHZ:
15907 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15909 /* Several three-argument builtins. */
15910 case ARM_BUILTIN_WMACS:
15911 case ARM_BUILTIN_WMACU:
15912 case ARM_BUILTIN_WALIGN:
15913 case ARM_BUILTIN_TMIA:
15914 case ARM_BUILTIN_TMIAPH:
15915 case ARM_BUILTIN_TMIATT:
15916 case ARM_BUILTIN_TMIATB:
15917 case ARM_BUILTIN_TMIABT:
15918 case ARM_BUILTIN_TMIABB:
15919 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
15920 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
15921 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
15922 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
15923 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
15924 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
15925 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
15926 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
15927 : CODE_FOR_iwmmxt_walign);
15928 arg0 = CALL_EXPR_ARG (exp, 0);
15929 arg1 = CALL_EXPR_ARG (exp, 1);
15930 arg2 = CALL_EXPR_ARG (exp, 2);
15931 op0 = expand_normal (arg0);
15932 op1 = expand_normal (arg1);
15933 op2 = expand_normal (arg2);
15934 tmode = insn_data[icode].operand[0].mode;
15935 mode0 = insn_data[icode].operand[1].mode;
15936 mode1 = insn_data[icode].operand[2].mode;
15937 mode2 = insn_data[icode].operand[3].mode;
15939 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15940 op0 = copy_to_mode_reg (mode0, op0);
15941 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15942 op1 = copy_to_mode_reg (mode1, op1);
15943 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15944 op2 = copy_to_mode_reg (mode2, op2);
15946 || GET_MODE (target) != tmode
15947 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15948 target = gen_reg_rtx (tmode);
15949 pat = GEN_FCN (icode) (target, op0, op1, op2);
15955 case ARM_BUILTIN_WZERO:
15956 target = gen_reg_rtx (DImode);
15957 emit_insn (gen_iwmmxt_clrdi (target));
15960 case ARM_BUILTIN_THREAD_POINTER:
15961 return arm_load_tp (target);
15967 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15968 if (d->code == (const enum arm_builtins) fcode)
15969 return arm_expand_binop_builtin (d->icode, exp, target);
15971 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15972 if (d->code == (const enum arm_builtins) fcode)
15973 return arm_expand_unop_builtin (d->icode, exp, target, 0);
15975 /* @@@ Should really do something sensible here. */
15979 /* Return the number (counting from 0) of
15980 the least significant set bit in MASK. */
15983 number_of_first_bit_set (unsigned mask)
15988 (mask & (1 << bit)) == 0;
15995 /* Emit code to push or pop registers to or from the stack. F is the
15996 assembly file. MASK is the registers to push or pop. PUSH is
15997 nonzero if we should push, and zero if we should pop. For debugging
15998 output, if pushing, adjust CFA_OFFSET by the amount of space added
15999 to the stack. REAL_REGS should have the same number of bits set as
16000 MASK, and will be used instead (in the same order) to describe which
16001 registers were saved - this is used to mark the save slots when we
16002 push high registers after moving them to low registers. */
16004 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16005 unsigned long real_regs)
16008 int lo_mask = mask & 0xFF;
16009 int pushed_words = 0;
16013 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16015 /* Special case. Do not generate a POP PC statement here, do it in
16017 thumb_exit (f, -1);
16021 if (ARM_EABI_UNWIND_TABLES && push)
16023 fprintf (f, "\t.save\t{");
16024 for (regno = 0; regno < 15; regno++)
16026 if (real_regs & (1 << regno))
16028 if (real_regs & ((1 << regno) -1))
16030 asm_fprintf (f, "%r", regno);
16033 fprintf (f, "}\n");
16036 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16038 /* Look at the low registers first. */
16039 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16043 asm_fprintf (f, "%r", regno);
16045 if ((lo_mask & ~1) != 0)
16052 if (push && (mask & (1 << LR_REGNUM)))
16054 /* Catch pushing the LR. */
16058 asm_fprintf (f, "%r", LR_REGNUM);
16062 else if (!push && (mask & (1 << PC_REGNUM)))
16064 /* Catch popping the PC. */
16065 if (TARGET_INTERWORK || TARGET_BACKTRACE
16066 || current_function_calls_eh_return)
16068 /* The PC is never poped directly, instead
16069 it is popped into r3 and then BX is used. */
16070 fprintf (f, "}\n");
16072 thumb_exit (f, -1);
16081 asm_fprintf (f, "%r", PC_REGNUM);
16085 fprintf (f, "}\n");
16087 if (push && pushed_words && dwarf2out_do_frame ())
16089 char *l = dwarf2out_cfi_label ();
16090 int pushed_mask = real_regs;
16092 *cfa_offset += pushed_words * 4;
16093 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16096 pushed_mask = real_regs;
16097 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16099 if (pushed_mask & 1)
16100 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16105 /* Generate code to return from a thumb function.
16106 If 'reg_containing_return_addr' is -1, then the return address is
16107 actually on the stack, at the stack pointer. */
16109 thumb_exit (FILE *f, int reg_containing_return_addr)
16111 unsigned regs_available_for_popping;
16112 unsigned regs_to_pop;
16114 unsigned available;
16118 int restore_a4 = FALSE;
16120 /* Compute the registers we need to pop. */
16124 if (reg_containing_return_addr == -1)
16126 regs_to_pop |= 1 << LR_REGNUM;
16130 if (TARGET_BACKTRACE)
16132 /* Restore the (ARM) frame pointer and stack pointer. */
16133 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16137 /* If there is nothing to pop then just emit the BX instruction and
16139 if (pops_needed == 0)
16141 if (current_function_calls_eh_return)
16142 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16144 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16147 /* Otherwise if we are not supporting interworking and we have not created
16148 a backtrace structure and the function was not entered in ARM mode then
16149 just pop the return address straight into the PC. */
16150 else if (!TARGET_INTERWORK
16151 && !TARGET_BACKTRACE
16152 && !is_called_in_ARM_mode (current_function_decl)
16153 && !current_function_calls_eh_return)
16155 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16159 /* Find out how many of the (return) argument registers we can corrupt. */
16160 regs_available_for_popping = 0;
16162 /* If returning via __builtin_eh_return, the bottom three registers
16163 all contain information needed for the return. */
16164 if (current_function_calls_eh_return)
16168 /* If we can deduce the registers used from the function's
16169 return value. This is more reliable that examining
16170 df_regs_ever_live_p () because that will be set if the register is
16171 ever used in the function, not just if the register is used
16172 to hold a return value. */
16174 if (current_function_return_rtx != 0)
16175 mode = GET_MODE (current_function_return_rtx);
16177 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16179 size = GET_MODE_SIZE (mode);
16183 /* In a void function we can use any argument register.
16184 In a function that returns a structure on the stack
16185 we can use the second and third argument registers. */
16186 if (mode == VOIDmode)
16187 regs_available_for_popping =
16188 (1 << ARG_REGISTER (1))
16189 | (1 << ARG_REGISTER (2))
16190 | (1 << ARG_REGISTER (3));
16192 regs_available_for_popping =
16193 (1 << ARG_REGISTER (2))
16194 | (1 << ARG_REGISTER (3));
16196 else if (size <= 4)
16197 regs_available_for_popping =
16198 (1 << ARG_REGISTER (2))
16199 | (1 << ARG_REGISTER (3));
16200 else if (size <= 8)
16201 regs_available_for_popping =
16202 (1 << ARG_REGISTER (3));
16205 /* Match registers to be popped with registers into which we pop them. */
16206 for (available = regs_available_for_popping,
16207 required = regs_to_pop;
16208 required != 0 && available != 0;
16209 available &= ~(available & - available),
16210 required &= ~(required & - required))
16213 /* If we have any popping registers left over, remove them. */
16215 regs_available_for_popping &= ~available;
16217 /* Otherwise if we need another popping register we can use
16218 the fourth argument register. */
16219 else if (pops_needed)
16221 /* If we have not found any free argument registers and
16222 reg a4 contains the return address, we must move it. */
16223 if (regs_available_for_popping == 0
16224 && reg_containing_return_addr == LAST_ARG_REGNUM)
16226 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16227 reg_containing_return_addr = LR_REGNUM;
16229 else if (size > 12)
16231 /* Register a4 is being used to hold part of the return value,
16232 but we have dire need of a free, low register. */
16235 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16238 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16240 /* The fourth argument register is available. */
16241 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16247 /* Pop as many registers as we can. */
16248 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16249 regs_available_for_popping);
16251 /* Process the registers we popped. */
16252 if (reg_containing_return_addr == -1)
16254 /* The return address was popped into the lowest numbered register. */
16255 regs_to_pop &= ~(1 << LR_REGNUM);
16257 reg_containing_return_addr =
16258 number_of_first_bit_set (regs_available_for_popping);
16260 /* Remove this register for the mask of available registers, so that
16261 the return address will not be corrupted by further pops. */
16262 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16265 /* If we popped other registers then handle them here. */
16266 if (regs_available_for_popping)
16270 /* Work out which register currently contains the frame pointer. */
16271 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16273 /* Move it into the correct place. */
16274 asm_fprintf (f, "\tmov\t%r, %r\n",
16275 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16277 /* (Temporarily) remove it from the mask of popped registers. */
16278 regs_available_for_popping &= ~(1 << frame_pointer);
16279 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16281 if (regs_available_for_popping)
16285 /* We popped the stack pointer as well,
16286 find the register that contains it. */
16287 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16289 /* Move it into the stack register. */
16290 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16292 /* At this point we have popped all necessary registers, so
16293 do not worry about restoring regs_available_for_popping
16294 to its correct value:
16296 assert (pops_needed == 0)
16297 assert (regs_available_for_popping == (1 << frame_pointer))
16298 assert (regs_to_pop == (1 << STACK_POINTER)) */
16302 /* Since we have just move the popped value into the frame
16303 pointer, the popping register is available for reuse, and
16304 we know that we still have the stack pointer left to pop. */
16305 regs_available_for_popping |= (1 << frame_pointer);
16309 /* If we still have registers left on the stack, but we no longer have
16310 any registers into which we can pop them, then we must move the return
16311 address into the link register and make available the register that
16313 if (regs_available_for_popping == 0 && pops_needed > 0)
16315 regs_available_for_popping |= 1 << reg_containing_return_addr;
16317 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16318 reg_containing_return_addr);
16320 reg_containing_return_addr = LR_REGNUM;
16323 /* If we have registers left on the stack then pop some more.
16324 We know that at most we will want to pop FP and SP. */
16325 if (pops_needed > 0)
16330 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16331 regs_available_for_popping);
16333 /* We have popped either FP or SP.
16334 Move whichever one it is into the correct register. */
16335 popped_into = number_of_first_bit_set (regs_available_for_popping);
16336 move_to = number_of_first_bit_set (regs_to_pop);
16338 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16340 regs_to_pop &= ~(1 << move_to);
16345 /* If we still have not popped everything then we must have only
16346 had one register available to us and we are now popping the SP. */
16347 if (pops_needed > 0)
16351 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16352 regs_available_for_popping);
16354 popped_into = number_of_first_bit_set (regs_available_for_popping);
16356 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16358 assert (regs_to_pop == (1 << STACK_POINTER))
16359 assert (pops_needed == 1)
16363 /* If necessary restore the a4 register. */
16366 if (reg_containing_return_addr != LR_REGNUM)
16368 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16369 reg_containing_return_addr = LR_REGNUM;
16372 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16375 if (current_function_calls_eh_return)
16376 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16378 /* Return to caller. */
16379 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16384 thumb1_final_prescan_insn (rtx insn)
16386 if (flag_print_asm_name)
16387 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16388 INSN_ADDRESSES (INSN_UID (insn)));
16392 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16394 unsigned HOST_WIDE_INT mask = 0xff;
16397 if (val == 0) /* XXX */
16400 for (i = 0; i < 25; i++)
16401 if ((val & (mask << i)) == val)
16407 /* Returns nonzero if the current function contains,
16408 or might contain a far jump. */
16410 thumb_far_jump_used_p (void)
16414 /* This test is only important for leaf functions. */
16415 /* assert (!leaf_function_p ()); */
16417 /* If we have already decided that far jumps may be used,
16418 do not bother checking again, and always return true even if
16419 it turns out that they are not being used. Once we have made
16420 the decision that far jumps are present (and that hence the link
16421 register will be pushed onto the stack) we cannot go back on it. */
16422 if (cfun->machine->far_jump_used)
16425 /* If this function is not being called from the prologue/epilogue
16426 generation code then it must be being called from the
16427 INITIAL_ELIMINATION_OFFSET macro. */
16428 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16430 /* In this case we know that we are being asked about the elimination
16431 of the arg pointer register. If that register is not being used,
16432 then there are no arguments on the stack, and we do not have to
16433 worry that a far jump might force the prologue to push the link
16434 register, changing the stack offsets. In this case we can just
16435 return false, since the presence of far jumps in the function will
16436 not affect stack offsets.
16438 If the arg pointer is live (or if it was live, but has now been
16439 eliminated and so set to dead) then we do have to test to see if
16440 the function might contain a far jump. This test can lead to some
16441 false negatives, since before reload is completed, then length of
16442 branch instructions is not known, so gcc defaults to returning their
16443 longest length, which in turn sets the far jump attribute to true.
16445 A false negative will not result in bad code being generated, but it
16446 will result in a needless push and pop of the link register. We
16447 hope that this does not occur too often.
16449 If we need doubleword stack alignment this could affect the other
16450 elimination offsets so we can't risk getting it wrong. */
16451 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16452 cfun->machine->arg_pointer_live = 1;
16453 else if (!cfun->machine->arg_pointer_live)
16457 /* Check to see if the function contains a branch
16458 insn with the far jump attribute set. */
16459 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16461 if (GET_CODE (insn) == JUMP_INSN
16462 /* Ignore tablejump patterns. */
16463 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16464 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16465 && get_attr_far_jump (insn) == FAR_JUMP_YES
16468 /* Record the fact that we have decided that
16469 the function does use far jumps. */
16470 cfun->machine->far_jump_used = 1;
16478 /* Return nonzero if FUNC must be entered in ARM mode. */
16480 is_called_in_ARM_mode (tree func)
16482 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16484 /* Ignore the problem about functions whose address is taken. */
16485 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16489 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16495 /* The bits which aren't usefully expanded as rtl. */
16497 thumb_unexpanded_epilogue (void)
16500 unsigned long live_regs_mask = 0;
16501 int high_regs_pushed = 0;
16502 int had_to_push_lr;
16505 if (return_used_this_function)
16508 if (IS_NAKED (arm_current_func_type ()))
16511 live_regs_mask = thumb1_compute_save_reg_mask ();
16512 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16514 /* If we can deduce the registers used from the function's return value.
16515 This is more reliable that examining df_regs_ever_live_p () because that
16516 will be set if the register is ever used in the function, not just if
16517 the register is used to hold a return value. */
16518 size = arm_size_return_regs ();
16520 /* The prolog may have pushed some high registers to use as
16521 work registers. e.g. the testsuite file:
16522 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16523 compiles to produce:
16524 push {r4, r5, r6, r7, lr}
16528 as part of the prolog. We have to undo that pushing here. */
16530 if (high_regs_pushed)
16532 unsigned long mask = live_regs_mask & 0xff;
16535 /* The available low registers depend on the size of the value we are
16543 /* Oh dear! We have no low registers into which we can pop
16546 ("no low registers available for popping high registers");
16548 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16549 if (live_regs_mask & (1 << next_hi_reg))
16552 while (high_regs_pushed)
16554 /* Find lo register(s) into which the high register(s) can
16556 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16558 if (mask & (1 << regno))
16559 high_regs_pushed--;
16560 if (high_regs_pushed == 0)
16564 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16566 /* Pop the values into the low register(s). */
16567 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16569 /* Move the value(s) into the high registers. */
16570 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16572 if (mask & (1 << regno))
16574 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16577 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16578 if (live_regs_mask & (1 << next_hi_reg))
16583 live_regs_mask &= ~0x0f00;
16586 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16587 live_regs_mask &= 0xff;
16589 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16591 /* Pop the return address into the PC. */
16592 if (had_to_push_lr)
16593 live_regs_mask |= 1 << PC_REGNUM;
16595 /* Either no argument registers were pushed or a backtrace
16596 structure was created which includes an adjusted stack
16597 pointer, so just pop everything. */
16598 if (live_regs_mask)
16599 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16602 /* We have either just popped the return address into the
16603 PC or it is was kept in LR for the entire function. */
16604 if (!had_to_push_lr)
16605 thumb_exit (asm_out_file, LR_REGNUM);
16609 /* Pop everything but the return address. */
16610 if (live_regs_mask)
16611 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16614 if (had_to_push_lr)
16618 /* We have no free low regs, so save one. */
16619 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16623 /* Get the return address into a temporary register. */
16624 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16625 1 << LAST_ARG_REGNUM);
16629 /* Move the return address to lr. */
16630 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16632 /* Restore the low register. */
16633 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16638 regno = LAST_ARG_REGNUM;
16643 /* Remove the argument registers that were pushed onto the stack. */
16644 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16645 SP_REGNUM, SP_REGNUM,
16646 current_function_pretend_args_size);
16648 thumb_exit (asm_out_file, regno);
16654 /* Functions to save and restore machine-specific function data. */
16655 static struct machine_function *
16656 arm_init_machine_status (void)
16658 struct machine_function *machine;
16659 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16661 #if ARM_FT_UNKNOWN != 0
16662 machine->func_type = ARM_FT_UNKNOWN;
16667 /* Return an RTX indicating where the return address to the
16668 calling function can be found. */
16670 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16675 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16678 /* Do anything needed before RTL is emitted for each function. */
16680 arm_init_expanders (void)
16682 /* Arrange to initialize and mark the machine per-function status. */
16683 init_machine_status = arm_init_machine_status;
16685 /* This is to stop the combine pass optimizing away the alignment
16686 adjustment of va_arg. */
16687 /* ??? It is claimed that this should not be necessary. */
16689 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16693 /* Like arm_compute_initial_elimination offset. Simpler because there
16694 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16695 to point at the base of the local variables after static stack
16696 space for a function has been allocated. */
16699 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16701 arm_stack_offsets *offsets;
16703 offsets = arm_get_frame_offsets ();
16707 case ARG_POINTER_REGNUM:
16710 case STACK_POINTER_REGNUM:
16711 return offsets->outgoing_args - offsets->saved_args;
16713 case FRAME_POINTER_REGNUM:
16714 return offsets->soft_frame - offsets->saved_args;
16716 case ARM_HARD_FRAME_POINTER_REGNUM:
16717 return offsets->saved_regs - offsets->saved_args;
16719 case THUMB_HARD_FRAME_POINTER_REGNUM:
16720 return offsets->locals_base - offsets->saved_args;
16723 gcc_unreachable ();
16727 case FRAME_POINTER_REGNUM:
16730 case STACK_POINTER_REGNUM:
16731 return offsets->outgoing_args - offsets->soft_frame;
16733 case ARM_HARD_FRAME_POINTER_REGNUM:
16734 return offsets->saved_regs - offsets->soft_frame;
16736 case THUMB_HARD_FRAME_POINTER_REGNUM:
16737 return offsets->locals_base - offsets->soft_frame;
16740 gcc_unreachable ();
16745 gcc_unreachable ();
16749 /* Generate the rest of a function's prologue. */
16751 thumb1_expand_prologue (void)
16755 HOST_WIDE_INT amount;
16756 arm_stack_offsets *offsets;
16757 unsigned long func_type;
16759 unsigned long live_regs_mask;
16761 func_type = arm_current_func_type ();
16763 /* Naked functions don't have prologues. */
16764 if (IS_NAKED (func_type))
16767 if (IS_INTERRUPT (func_type))
16769 error ("interrupt Service Routines cannot be coded in Thumb mode");
16773 live_regs_mask = thumb1_compute_save_reg_mask ();
16774 /* Load the pic register before setting the frame pointer,
16775 so we can use r7 as a temporary work register. */
16776 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16777 arm_load_pic_register (live_regs_mask);
16779 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16780 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16781 stack_pointer_rtx);
16783 offsets = arm_get_frame_offsets ();
16784 amount = offsets->outgoing_args - offsets->saved_regs;
16789 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16790 GEN_INT (- amount)));
16791 RTX_FRAME_RELATED_P (insn) = 1;
16797 /* The stack decrement is too big for an immediate value in a single
16798 insn. In theory we could issue multiple subtracts, but after
16799 three of them it becomes more space efficient to place the full
16800 value in the constant pool and load into a register. (Also the
16801 ARM debugger really likes to see only one stack decrement per
16802 function). So instead we look for a scratch register into which
16803 we can load the decrement, and then we subtract this from the
16804 stack pointer. Unfortunately on the thumb the only available
16805 scratch registers are the argument registers, and we cannot use
16806 these as they may hold arguments to the function. Instead we
16807 attempt to locate a call preserved register which is used by this
16808 function. If we can find one, then we know that it will have
16809 been pushed at the start of the prologue and so we can corrupt
16811 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16812 if (live_regs_mask & (1 << regno)
16813 && !(frame_pointer_needed
16814 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16817 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16819 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16821 /* Choose an arbitrary, non-argument low register. */
16822 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16824 /* Save it by copying it into a high, scratch register. */
16825 emit_insn (gen_movsi (spare, reg));
16826 /* Add a USE to stop propagate_one_insn() from barfing. */
16827 emit_insn (gen_prologue_use (spare));
16829 /* Decrement the stack. */
16830 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16831 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16832 stack_pointer_rtx, reg));
16833 RTX_FRAME_RELATED_P (insn) = 1;
16834 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16835 plus_constant (stack_pointer_rtx,
16837 RTX_FRAME_RELATED_P (dwarf) = 1;
16839 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16842 /* Restore the low register's original value. */
16843 emit_insn (gen_movsi (reg, spare));
16845 /* Emit a USE of the restored scratch register, so that flow
16846 analysis will not consider the restore redundant. The
16847 register won't be used again in this function and isn't
16848 restored by the epilogue. */
16849 emit_insn (gen_prologue_use (reg));
16853 reg = gen_rtx_REG (SImode, regno);
16855 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16857 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16858 stack_pointer_rtx, reg));
16859 RTX_FRAME_RELATED_P (insn) = 1;
16860 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16861 plus_constant (stack_pointer_rtx,
16863 RTX_FRAME_RELATED_P (dwarf) = 1;
16865 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16871 if (frame_pointer_needed)
16872 thumb_set_frame_pointer (offsets);
16874 /* If we are profiling, make sure no instructions are scheduled before
16875 the call to mcount. Similarly if the user has requested no
16876 scheduling in the prolog. Similarly if we want non-call exceptions
16877 using the EABI unwinder, to prevent faulting instructions from being
16878 swapped with a stack adjustment. */
16879 if (current_function_profile || !TARGET_SCHED_PROLOG
16880 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16881 emit_insn (gen_blockage ());
16883 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16884 if (live_regs_mask & 0xff)
16885 cfun->machine->lr_save_eliminated = 0;
16890 thumb1_expand_epilogue (void)
16892 HOST_WIDE_INT amount;
16893 arm_stack_offsets *offsets;
16896 /* Naked functions don't have prologues. */
16897 if (IS_NAKED (arm_current_func_type ()))
16900 offsets = arm_get_frame_offsets ();
16901 amount = offsets->outgoing_args - offsets->saved_regs;
16903 if (frame_pointer_needed)
16905 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16906 amount = offsets->locals_base - offsets->saved_regs;
16909 gcc_assert (amount >= 0);
16913 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16914 GEN_INT (amount)));
16917 /* r3 is always free in the epilogue. */
16918 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
16920 emit_insn (gen_movsi (reg, GEN_INT (amount)));
16921 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
16925 /* Emit a USE (stack_pointer_rtx), so that
16926 the stack adjustment will not be deleted. */
16927 emit_insn (gen_prologue_use (stack_pointer_rtx));
16929 if (current_function_profile || !TARGET_SCHED_PROLOG)
16930 emit_insn (gen_blockage ());
16932 /* Emit a clobber for each insn that will be restored in the epilogue,
16933 so that flow2 will get register lifetimes correct. */
16934 for (regno = 0; regno < 13; regno++)
16935 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
16936 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
16938 if (! df_regs_ever_live_p (LR_REGNUM))
16939 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
16943 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
16945 unsigned long live_regs_mask = 0;
16946 unsigned long l_mask;
16947 unsigned high_regs_pushed = 0;
16948 int cfa_offset = 0;
16951 if (IS_NAKED (arm_current_func_type ()))
16954 if (is_called_in_ARM_mode (current_function_decl))
16958 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
16959 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
16961 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
16963 /* Generate code sequence to switch us into Thumb mode. */
16964 /* The .code 32 directive has already been emitted by
16965 ASM_DECLARE_FUNCTION_NAME. */
16966 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
16967 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
16969 /* Generate a label, so that the debugger will notice the
16970 change in instruction sets. This label is also used by
16971 the assembler to bypass the ARM code when this function
16972 is called from a Thumb encoded function elsewhere in the
16973 same file. Hence the definition of STUB_NAME here must
16974 agree with the definition in gas/config/tc-arm.c. */
16976 #define STUB_NAME ".real_start_of"
16978 fprintf (f, "\t.code\t16\n");
16980 if (arm_dllexport_name_p (name))
16981 name = arm_strip_name_encoding (name);
16983 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
16984 fprintf (f, "\t.thumb_func\n");
16985 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
16988 if (current_function_pretend_args_size)
16990 /* Output unwind directive for the stack adjustment. */
16991 if (ARM_EABI_UNWIND_TABLES)
16992 fprintf (f, "\t.pad #%d\n",
16993 current_function_pretend_args_size);
16995 if (cfun->machine->uses_anonymous_args)
16999 fprintf (f, "\tpush\t{");
17001 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
17003 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17004 regno <= LAST_ARG_REGNUM;
17006 asm_fprintf (f, "%r%s", regno,
17007 regno == LAST_ARG_REGNUM ? "" : ", ");
17009 fprintf (f, "}\n");
17012 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17013 SP_REGNUM, SP_REGNUM,
17014 current_function_pretend_args_size);
17016 /* We don't need to record the stores for unwinding (would it
17017 help the debugger any if we did?), but record the change in
17018 the stack pointer. */
17019 if (dwarf2out_do_frame ())
17021 char *l = dwarf2out_cfi_label ();
17023 cfa_offset = cfa_offset + current_function_pretend_args_size;
17024 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17028 /* Get the registers we are going to push. */
17029 live_regs_mask = thumb1_compute_save_reg_mask ();
17030 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17031 l_mask = live_regs_mask & 0x40ff;
17032 /* Then count how many other high registers will need to be pushed. */
17033 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17035 if (TARGET_BACKTRACE)
17038 unsigned work_register;
17040 /* We have been asked to create a stack backtrace structure.
17041 The code looks like this:
17045 0 sub SP, #16 Reserve space for 4 registers.
17046 2 push {R7} Push low registers.
17047 4 add R7, SP, #20 Get the stack pointer before the push.
17048 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17049 8 mov R7, PC Get hold of the start of this code plus 12.
17050 10 str R7, [SP, #16] Store it.
17051 12 mov R7, FP Get hold of the current frame pointer.
17052 14 str R7, [SP, #4] Store it.
17053 16 mov R7, LR Get hold of the current return address.
17054 18 str R7, [SP, #12] Store it.
17055 20 add R7, SP, #16 Point at the start of the backtrace structure.
17056 22 mov FP, R7 Put this value into the frame pointer. */
17058 work_register = thumb_find_work_register (live_regs_mask);
17060 if (ARM_EABI_UNWIND_TABLES)
17061 asm_fprintf (f, "\t.pad #16\n");
17064 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17065 SP_REGNUM, SP_REGNUM);
17067 if (dwarf2out_do_frame ())
17069 char *l = dwarf2out_cfi_label ();
17071 cfa_offset = cfa_offset + 16;
17072 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17077 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17078 offset = bit_count (l_mask) * UNITS_PER_WORD;
17083 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17084 offset + 16 + current_function_pretend_args_size);
17086 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17089 /* Make sure that the instruction fetching the PC is in the right place
17090 to calculate "start of backtrace creation code + 12". */
17093 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17094 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17096 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17097 ARM_HARD_FRAME_POINTER_REGNUM);
17098 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17103 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17104 ARM_HARD_FRAME_POINTER_REGNUM);
17105 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17107 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17108 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17112 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17113 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17115 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17117 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17118 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17120 /* Optimization: If we are not pushing any low registers but we are going
17121 to push some high registers then delay our first push. This will just
17122 be a push of LR and we can combine it with the push of the first high
17124 else if ((l_mask & 0xff) != 0
17125 || (high_regs_pushed == 0 && l_mask))
17126 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17128 if (high_regs_pushed)
17130 unsigned pushable_regs;
17131 unsigned next_hi_reg;
17133 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17134 if (live_regs_mask & (1 << next_hi_reg))
17137 pushable_regs = l_mask & 0xff;
17139 if (pushable_regs == 0)
17140 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17142 while (high_regs_pushed > 0)
17144 unsigned long real_regs_mask = 0;
17146 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17148 if (pushable_regs & (1 << regno))
17150 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17152 high_regs_pushed --;
17153 real_regs_mask |= (1 << next_hi_reg);
17155 if (high_regs_pushed)
17157 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17159 if (live_regs_mask & (1 << next_hi_reg))
17164 pushable_regs &= ~((1 << regno) - 1);
17170 /* If we had to find a work register and we have not yet
17171 saved the LR then add it to the list of regs to push. */
17172 if (l_mask == (1 << LR_REGNUM))
17174 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17176 real_regs_mask | (1 << LR_REGNUM));
17180 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17185 /* Handle the case of a double word load into a low register from
17186 a computed memory address. The computed address may involve a
17187 register which is overwritten by the load. */
17189 thumb_load_double_from_address (rtx *operands)
17197 gcc_assert (GET_CODE (operands[0]) == REG);
17198 gcc_assert (GET_CODE (operands[1]) == MEM);
17200 /* Get the memory address. */
17201 addr = XEXP (operands[1], 0);
17203 /* Work out how the memory address is computed. */
17204 switch (GET_CODE (addr))
17207 operands[2] = adjust_address (operands[1], SImode, 4);
17209 if (REGNO (operands[0]) == REGNO (addr))
17211 output_asm_insn ("ldr\t%H0, %2", operands);
17212 output_asm_insn ("ldr\t%0, %1", operands);
17216 output_asm_insn ("ldr\t%0, %1", operands);
17217 output_asm_insn ("ldr\t%H0, %2", operands);
17222 /* Compute <address> + 4 for the high order load. */
17223 operands[2] = adjust_address (operands[1], SImode, 4);
17225 output_asm_insn ("ldr\t%0, %1", operands);
17226 output_asm_insn ("ldr\t%H0, %2", operands);
17230 arg1 = XEXP (addr, 0);
17231 arg2 = XEXP (addr, 1);
17233 if (CONSTANT_P (arg1))
17234 base = arg2, offset = arg1;
17236 base = arg1, offset = arg2;
17238 gcc_assert (GET_CODE (base) == REG);
17240 /* Catch the case of <address> = <reg> + <reg> */
17241 if (GET_CODE (offset) == REG)
17243 int reg_offset = REGNO (offset);
17244 int reg_base = REGNO (base);
17245 int reg_dest = REGNO (operands[0]);
17247 /* Add the base and offset registers together into the
17248 higher destination register. */
17249 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17250 reg_dest + 1, reg_base, reg_offset);
17252 /* Load the lower destination register from the address in
17253 the higher destination register. */
17254 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17255 reg_dest, reg_dest + 1);
17257 /* Load the higher destination register from its own address
17259 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17260 reg_dest + 1, reg_dest + 1);
17264 /* Compute <address> + 4 for the high order load. */
17265 operands[2] = adjust_address (operands[1], SImode, 4);
17267 /* If the computed address is held in the low order register
17268 then load the high order register first, otherwise always
17269 load the low order register first. */
17270 if (REGNO (operands[0]) == REGNO (base))
17272 output_asm_insn ("ldr\t%H0, %2", operands);
17273 output_asm_insn ("ldr\t%0, %1", operands);
17277 output_asm_insn ("ldr\t%0, %1", operands);
17278 output_asm_insn ("ldr\t%H0, %2", operands);
17284 /* With no registers to worry about we can just load the value
17286 operands[2] = adjust_address (operands[1], SImode, 4);
17288 output_asm_insn ("ldr\t%H0, %2", operands);
17289 output_asm_insn ("ldr\t%0, %1", operands);
17293 gcc_unreachable ();
17300 thumb_output_move_mem_multiple (int n, rtx *operands)
17307 if (REGNO (operands[4]) > REGNO (operands[5]))
17310 operands[4] = operands[5];
17313 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17314 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17318 if (REGNO (operands[4]) > REGNO (operands[5]))
17321 operands[4] = operands[5];
17324 if (REGNO (operands[5]) > REGNO (operands[6]))
17327 operands[5] = operands[6];
17330 if (REGNO (operands[4]) > REGNO (operands[5]))
17333 operands[4] = operands[5];
17337 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17338 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17342 gcc_unreachable ();
17348 /* Output a call-via instruction for thumb state. */
17350 thumb_call_via_reg (rtx reg)
17352 int regno = REGNO (reg);
17355 gcc_assert (regno < LR_REGNUM);
17357 /* If we are in the normal text section we can use a single instance
17358 per compilation unit. If we are doing function sections, then we need
17359 an entry per section, since we can't rely on reachability. */
17360 if (in_section == text_section)
17362 thumb_call_reg_needed = 1;
17364 if (thumb_call_via_label[regno] == NULL)
17365 thumb_call_via_label[regno] = gen_label_rtx ();
17366 labelp = thumb_call_via_label + regno;
17370 if (cfun->machine->call_via[regno] == NULL)
17371 cfun->machine->call_via[regno] = gen_label_rtx ();
17372 labelp = cfun->machine->call_via + regno;
17375 output_asm_insn ("bl\t%a0", labelp);
17379 /* Routines for generating rtl. */
17381 thumb_expand_movmemqi (rtx *operands)
17383 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17384 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17385 HOST_WIDE_INT len = INTVAL (operands[2]);
17386 HOST_WIDE_INT offset = 0;
17390 emit_insn (gen_movmem12b (out, in, out, in));
17396 emit_insn (gen_movmem8b (out, in, out, in));
17402 rtx reg = gen_reg_rtx (SImode);
17403 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17404 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17411 rtx reg = gen_reg_rtx (HImode);
17412 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17413 plus_constant (in, offset))));
17414 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17422 rtx reg = gen_reg_rtx (QImode);
17423 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17424 plus_constant (in, offset))));
17425 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17431 thumb_reload_out_hi (rtx *operands)
17433 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17436 /* Handle reading a half-word from memory during reload. */
17438 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17440 gcc_unreachable ();
17443 /* Return the length of a function name prefix
17444 that starts with the character 'c'. */
17446 arm_get_strip_length (int c)
17450 ARM_NAME_ENCODING_LENGTHS
17455 /* Return a pointer to a function's name with any
17456 and all prefix encodings stripped from it. */
17458 arm_strip_name_encoding (const char *name)
17462 while ((skip = arm_get_strip_length (* name)))
17468 /* If there is a '*' anywhere in the name's prefix, then
17469 emit the stripped name verbatim, otherwise prepend an
17470 underscore if leading underscores are being used. */
17472 arm_asm_output_labelref (FILE *stream, const char *name)
17477 while ((skip = arm_get_strip_length (* name)))
17479 verbatim |= (*name == '*');
17484 fputs (name, stream);
17486 asm_fprintf (stream, "%U%s", name);
17490 arm_file_start (void)
17494 if (TARGET_UNIFIED_ASM)
17495 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17499 const char *fpu_name;
17500 if (arm_select[0].string)
17501 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17502 else if (arm_select[1].string)
17503 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17505 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17506 all_cores[arm_default_cpu].name);
17508 if (TARGET_SOFT_FLOAT)
17511 fpu_name = "softvfp";
17513 fpu_name = "softfpa";
17517 int set_float_abi_attributes = 0;
17518 switch (arm_fpu_arch)
17523 case FPUTYPE_FPA_EMU2:
17526 case FPUTYPE_FPA_EMU3:
17529 case FPUTYPE_MAVERICK:
17530 fpu_name = "maverick";
17534 set_float_abi_attributes = 1;
17538 set_float_abi_attributes = 1;
17542 set_float_abi_attributes = 1;
17547 if (set_float_abi_attributes)
17549 if (TARGET_HARD_FLOAT)
17550 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17551 if (TARGET_HARD_FLOAT_ABI)
17552 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17555 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17557 /* Some of these attributes only apply when the corresponding features
17558 are used. However we don't have any easy way of figuring this out.
17559 Conservatively record the setting that would have been used. */
17561 /* Tag_ABI_PCS_wchar_t. */
17562 asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
17563 (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
17565 /* Tag_ABI_FP_rounding. */
17566 if (flag_rounding_math)
17567 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17568 if (!flag_unsafe_math_optimizations)
17570 /* Tag_ABI_FP_denomal. */
17571 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17572 /* Tag_ABI_FP_exceptions. */
17573 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17575 /* Tag_ABI_FP_user_exceptions. */
17576 if (flag_signaling_nans)
17577 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17578 /* Tag_ABI_FP_number_model. */
17579 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17580 flag_finite_math_only ? 1 : 3);
17582 /* Tag_ABI_align8_needed. */
17583 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17584 /* Tag_ABI_align8_preserved. */
17585 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17586 /* Tag_ABI_enum_size. */
17587 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17588 flag_short_enums ? 1 : 2);
17590 /* Tag_ABI_optimization_goals. */
17593 else if (optimize >= 2)
17599 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17601 default_file_start();
17605 arm_file_end (void)
17609 if (NEED_INDICATE_EXEC_STACK)
17610 /* Add .note.GNU-stack. */
17611 file_end_indicate_exec_stack ();
17613 if (! thumb_call_reg_needed)
17616 switch_to_section (text_section);
17617 asm_fprintf (asm_out_file, "\t.code 16\n");
17618 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17620 for (regno = 0; regno < LR_REGNUM; regno++)
17622 rtx label = thumb_call_via_label[regno];
17626 targetm.asm_out.internal_label (asm_out_file, "L",
17627 CODE_LABEL_NUMBER (label));
17628 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17635 #ifdef AOF_ASSEMBLER
17636 /* Special functions only needed when producing AOF syntax assembler. */
17640 struct pic_chain * next;
17641 const char * symname;
17644 static struct pic_chain * aof_pic_chain = NULL;
17647 aof_pic_entry (rtx x)
17649 struct pic_chain ** chainp;
17652 if (aof_pic_label == NULL_RTX)
17654 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
17657 for (offset = 0, chainp = &aof_pic_chain; *chainp;
17658 offset += 4, chainp = &(*chainp)->next)
17659 if ((*chainp)->symname == XSTR (x, 0))
17660 return plus_constant (aof_pic_label, offset);
17662 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
17663 (*chainp)->next = NULL;
17664 (*chainp)->symname = XSTR (x, 0);
17665 return plus_constant (aof_pic_label, offset);
17669 aof_dump_pic_table (FILE *f)
17671 struct pic_chain * chain;
17673 if (aof_pic_chain == NULL)
17676 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
17677 PIC_OFFSET_TABLE_REGNUM,
17678 PIC_OFFSET_TABLE_REGNUM);
17679 fputs ("|x$adcons|\n", f);
17681 for (chain = aof_pic_chain; chain; chain = chain->next)
17683 fputs ("\tDCD\t", f);
17684 assemble_name (f, chain->symname);
17689 int arm_text_section_count = 1;
17691 /* A get_unnamed_section callback for switching to the text section. */
17694 aof_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17696 fprintf (asm_out_file, "\tAREA |C$$code%d|, CODE, READONLY",
17697 arm_text_section_count++);
17699 fprintf (asm_out_file, ", PIC, REENTRANT");
17700 fprintf (asm_out_file, "\n");
17703 static int arm_data_section_count = 1;
17705 /* A get_unnamed_section callback for switching to the data section. */
17708 aof_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17710 fprintf (asm_out_file, "\tAREA |C$$data%d|, DATA\n",
17711 arm_data_section_count++);
17714 /* Implement TARGET_ASM_INIT_SECTIONS.
17716 AOF Assembler syntax is a nightmare when it comes to areas, since once
17717 we change from one area to another, we can't go back again. Instead,
17718 we must create a new area with the same attributes and add the new output
17719 to that. Unfortunately, there is nothing we can do here to guarantee that
17720 two areas with the same attributes will be linked adjacently in the
17721 resulting executable, so we have to be careful not to do pc-relative
17722 addressing across such boundaries. */
17725 aof_asm_init_sections (void)
17727 text_section = get_unnamed_section (SECTION_CODE,
17728 aof_output_text_section_asm_op, NULL);
17729 data_section = get_unnamed_section (SECTION_WRITE,
17730 aof_output_data_section_asm_op, NULL);
17731 readonly_data_section = text_section;
17735 zero_init_section (void)
17737 static int zero_init_count = 1;
17739 fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n", zero_init_count++);
17743 /* The AOF assembler is religiously strict about declarations of
17744 imported and exported symbols, so that it is impossible to declare
17745 a function as imported near the beginning of the file, and then to
17746 export it later on. It is, however, possible to delay the decision
17747 until all the functions in the file have been compiled. To get
17748 around this, we maintain a list of the imports and exports, and
17749 delete from it any that are subsequently defined. At the end of
17750 compilation we spit the remainder of the list out before the END
17755 struct import * next;
17759 static struct import * imports_list = NULL;
17762 aof_add_import (const char *name)
17764 struct import * new;
17766 for (new = imports_list; new; new = new->next)
17767 if (new->name == name)
17770 new = (struct import *) xmalloc (sizeof (struct import));
17771 new->next = imports_list;
17772 imports_list = new;
17777 aof_delete_import (const char *name)
17779 struct import ** old;
17781 for (old = &imports_list; *old; old = & (*old)->next)
17783 if ((*old)->name == name)
17785 *old = (*old)->next;
17791 int arm_main_function = 0;
17794 aof_dump_imports (FILE *f)
17796 /* The AOF assembler needs this to cause the startup code to be extracted
17797 from the library. Brining in __main causes the whole thing to work
17799 if (arm_main_function)
17801 switch_to_section (text_section);
17802 fputs ("\tIMPORT __main\n", f);
17803 fputs ("\tDCD __main\n", f);
17806 /* Now dump the remaining imports. */
17807 while (imports_list)
17809 fprintf (f, "\tIMPORT\t");
17810 assemble_name (f, imports_list->name);
17812 imports_list = imports_list->next;
17817 aof_globalize_label (FILE *stream, const char *name)
17819 default_globalize_label (stream, name);
17820 if (! strcmp (name, "main"))
17821 arm_main_function = 1;
17825 aof_file_start (void)
17827 fputs ("__r0\tRN\t0\n", asm_out_file);
17828 fputs ("__a1\tRN\t0\n", asm_out_file);
17829 fputs ("__a2\tRN\t1\n", asm_out_file);
17830 fputs ("__a3\tRN\t2\n", asm_out_file);
17831 fputs ("__a4\tRN\t3\n", asm_out_file);
17832 fputs ("__v1\tRN\t4\n", asm_out_file);
17833 fputs ("__v2\tRN\t5\n", asm_out_file);
17834 fputs ("__v3\tRN\t6\n", asm_out_file);
17835 fputs ("__v4\tRN\t7\n", asm_out_file);
17836 fputs ("__v5\tRN\t8\n", asm_out_file);
17837 fputs ("__v6\tRN\t9\n", asm_out_file);
17838 fputs ("__sl\tRN\t10\n", asm_out_file);
17839 fputs ("__fp\tRN\t11\n", asm_out_file);
17840 fputs ("__ip\tRN\t12\n", asm_out_file);
17841 fputs ("__sp\tRN\t13\n", asm_out_file);
17842 fputs ("__lr\tRN\t14\n", asm_out_file);
17843 fputs ("__pc\tRN\t15\n", asm_out_file);
17844 fputs ("__f0\tFN\t0\n", asm_out_file);
17845 fputs ("__f1\tFN\t1\n", asm_out_file);
17846 fputs ("__f2\tFN\t2\n", asm_out_file);
17847 fputs ("__f3\tFN\t3\n", asm_out_file);
17848 fputs ("__f4\tFN\t4\n", asm_out_file);
17849 fputs ("__f5\tFN\t5\n", asm_out_file);
17850 fputs ("__f6\tFN\t6\n", asm_out_file);
17851 fputs ("__f7\tFN\t7\n", asm_out_file);
17852 switch_to_section (text_section);
17856 aof_file_end (void)
17859 aof_dump_pic_table (asm_out_file);
17861 aof_dump_imports (asm_out_file);
17862 fputs ("\tEND\n", asm_out_file);
17864 #endif /* AOF_ASSEMBLER */
17867 /* Symbols in the text segment can be accessed without indirecting via the
17868 constant pool; it may take an extra binary operation, but this is still
17869 faster than indirecting via memory. Don't do this when not optimizing,
17870 since we won't be calculating al of the offsets necessary to do this
17874 arm_encode_section_info (tree decl, rtx rtl, int first)
17876 /* This doesn't work with AOF syntax, since the string table may be in
17877 a different AREA. */
17878 #ifndef AOF_ASSEMBLER
17879 if (optimize > 0 && TREE_CONSTANT (decl))
17880 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17883 default_encode_section_info (decl, rtl, first);
17885 #endif /* !ARM_PE */
17888 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17890 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17891 && !strcmp (prefix, "L"))
17893 arm_ccfsm_state = 0;
17894 arm_target_insn = NULL;
17896 default_internal_label (stream, prefix, labelno);
17899 /* Output code to add DELTA to the first argument, and then jump
17900 to FUNCTION. Used for C++ multiple inheritance. */
17902 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17903 HOST_WIDE_INT delta,
17904 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17907 static int thunk_label = 0;
17910 int mi_delta = delta;
17911 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17913 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17916 mi_delta = - mi_delta;
17917 /* When generating 16-bit thumb code, thunks are entered in arm mode. */
17920 int labelno = thunk_label++;
17921 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17922 fputs ("\tldr\tr12, ", file);
17923 assemble_name (file, label);
17924 fputc ('\n', file);
17927 /* If we are generating PIC, the ldr instruction below loads
17928 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17929 the address of the add + 8, so we have:
17931 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17934 Note that we have "+ 1" because some versions of GNU ld
17935 don't set the low bit of the result for R_ARM_REL32
17936 relocations against thumb function symbols. */
17937 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17938 assemble_name (file, labelpc);
17939 fputs (":\n", file);
17940 fputs ("\tadd\tr12, pc, r12\n", file);
17943 /* TODO: Use movw/movt for large constants when available. */
17944 while (mi_delta != 0)
17946 if ((mi_delta & (3 << shift)) == 0)
17950 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17951 mi_op, this_regno, this_regno,
17952 mi_delta & (0xff << shift));
17953 mi_delta &= ~(0xff << shift);
17959 fprintf (file, "\tbx\tr12\n");
17960 ASM_OUTPUT_ALIGN (file, 2);
17961 assemble_name (file, label);
17962 fputs (":\n", file);
17965 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17966 rtx tem = XEXP (DECL_RTL (function), 0);
17967 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17968 tem = gen_rtx_MINUS (GET_MODE (tem),
17970 gen_rtx_SYMBOL_REF (Pmode,
17971 ggc_strdup (labelpc)));
17972 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17975 /* Output ".word .LTHUNKn". */
17976 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17980 fputs ("\tb\t", file);
17981 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17982 if (NEED_PLT_RELOC)
17983 fputs ("(PLT)", file);
17984 fputc ('\n', file);
17989 arm_emit_vector_const (FILE *file, rtx x)
17992 const char * pattern;
17994 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17996 switch (GET_MODE (x))
17998 case V2SImode: pattern = "%08x"; break;
17999 case V4HImode: pattern = "%04x"; break;
18000 case V8QImode: pattern = "%02x"; break;
18001 default: gcc_unreachable ();
18004 fprintf (file, "0x");
18005 for (i = CONST_VECTOR_NUNITS (x); i--;)
18009 element = CONST_VECTOR_ELT (x, i);
18010 fprintf (file, pattern, INTVAL (element));
18017 arm_output_load_gr (rtx *operands)
18024 if (GET_CODE (operands [1]) != MEM
18025 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18026 || GET_CODE (reg = XEXP (sum, 0)) != REG
18027 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18028 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18029 return "wldrw%?\t%0, %1";
18031 /* Fix up an out-of-range load of a GR register. */
18032 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18033 wcgr = operands[0];
18035 output_asm_insn ("ldr%?\t%0, %1", operands);
18037 operands[0] = wcgr;
18039 output_asm_insn ("tmcr%?\t%0, %1", operands);
18040 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18045 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18047 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18048 named arg and all anonymous args onto the stack.
18049 XXX I know the prologue shouldn't be pushing registers, but it is faster
18053 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18054 enum machine_mode mode ATTRIBUTE_UNUSED,
18055 tree type ATTRIBUTE_UNUSED,
18057 int second_time ATTRIBUTE_UNUSED)
18059 cfun->machine->uses_anonymous_args = 1;
18060 if (cum->nregs < NUM_ARG_REGS)
18061 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
18064 /* Return nonzero if the CONSUMER instruction (a store) does not need
18065 PRODUCER's value to calculate the address. */
18068 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18070 rtx value = PATTERN (producer);
18071 rtx addr = PATTERN (consumer);
18073 if (GET_CODE (value) == COND_EXEC)
18074 value = COND_EXEC_CODE (value);
18075 if (GET_CODE (value) == PARALLEL)
18076 value = XVECEXP (value, 0, 0);
18077 value = XEXP (value, 0);
18078 if (GET_CODE (addr) == COND_EXEC)
18079 addr = COND_EXEC_CODE (addr);
18080 if (GET_CODE (addr) == PARALLEL)
18081 addr = XVECEXP (addr, 0, 0);
18082 addr = XEXP (addr, 0);
18084 return !reg_overlap_mentioned_p (value, addr);
18087 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18088 have an early register shift value or amount dependency on the
18089 result of PRODUCER. */
18092 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18094 rtx value = PATTERN (producer);
18095 rtx op = PATTERN (consumer);
18098 if (GET_CODE (value) == COND_EXEC)
18099 value = COND_EXEC_CODE (value);
18100 if (GET_CODE (value) == PARALLEL)
18101 value = XVECEXP (value, 0, 0);
18102 value = XEXP (value, 0);
18103 if (GET_CODE (op) == COND_EXEC)
18104 op = COND_EXEC_CODE (op);
18105 if (GET_CODE (op) == PARALLEL)
18106 op = XVECEXP (op, 0, 0);
18109 early_op = XEXP (op, 0);
18110 /* This is either an actual independent shift, or a shift applied to
18111 the first operand of another operation. We want the whole shift
18113 if (GET_CODE (early_op) == REG)
18116 return !reg_overlap_mentioned_p (value, early_op);
18119 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18120 have an early register shift value dependency on the result of
18124 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18126 rtx value = PATTERN (producer);
18127 rtx op = PATTERN (consumer);
18130 if (GET_CODE (value) == COND_EXEC)
18131 value = COND_EXEC_CODE (value);
18132 if (GET_CODE (value) == PARALLEL)
18133 value = XVECEXP (value, 0, 0);
18134 value = XEXP (value, 0);
18135 if (GET_CODE (op) == COND_EXEC)
18136 op = COND_EXEC_CODE (op);
18137 if (GET_CODE (op) == PARALLEL)
18138 op = XVECEXP (op, 0, 0);
18141 early_op = XEXP (op, 0);
18143 /* This is either an actual independent shift, or a shift applied to
18144 the first operand of another operation. We want the value being
18145 shifted, in either case. */
18146 if (GET_CODE (early_op) != REG)
18147 early_op = XEXP (early_op, 0);
18149 return !reg_overlap_mentioned_p (value, early_op);
18152 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18153 have an early register mult dependency on the result of
18157 arm_no_early_mul_dep (rtx producer, rtx consumer)
18159 rtx value = PATTERN (producer);
18160 rtx op = PATTERN (consumer);
18162 if (GET_CODE (value) == COND_EXEC)
18163 value = COND_EXEC_CODE (value);
18164 if (GET_CODE (value) == PARALLEL)
18165 value = XVECEXP (value, 0, 0);
18166 value = XEXP (value, 0);
18167 if (GET_CODE (op) == COND_EXEC)
18168 op = COND_EXEC_CODE (op);
18169 if (GET_CODE (op) == PARALLEL)
18170 op = XVECEXP (op, 0, 0);
18173 return (GET_CODE (op) == PLUS
18174 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18177 /* We can't rely on the caller doing the proper promotion when
18178 using APCS or ATPCS. */
18181 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
18183 return !TARGET_AAPCS_BASED;
18187 /* AAPCS based ABIs use short enums by default. */
18190 arm_default_short_enums (void)
18192 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18196 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18199 arm_align_anon_bitfield (void)
18201 return TARGET_AAPCS_BASED;
18205 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18208 arm_cxx_guard_type (void)
18210 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18213 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18214 has an accumulator dependency on the result of the producer (a
18215 multiplication instruction) and no other dependency on that result. */
18217 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18219 rtx mul = PATTERN (producer);
18220 rtx mac = PATTERN (consumer);
18222 rtx mac_op0, mac_op1, mac_acc;
18224 if (GET_CODE (mul) == COND_EXEC)
18225 mul = COND_EXEC_CODE (mul);
18226 if (GET_CODE (mac) == COND_EXEC)
18227 mac = COND_EXEC_CODE (mac);
18229 /* Check that mul is of the form (set (...) (mult ...))
18230 and mla is of the form (set (...) (plus (mult ...) (...))). */
18231 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18232 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18233 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18236 mul_result = XEXP (mul, 0);
18237 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18238 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18239 mac_acc = XEXP (XEXP (mac, 1), 1);
18241 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18242 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18243 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18247 /* The EABI says test the least significant bit of a guard variable. */
18250 arm_cxx_guard_mask_bit (void)
18252 return TARGET_AAPCS_BASED;
18256 /* The EABI specifies that all array cookies are 8 bytes long. */
18259 arm_get_cookie_size (tree type)
18263 if (!TARGET_AAPCS_BASED)
18264 return default_cxx_get_cookie_size (type);
18266 size = build_int_cst (sizetype, 8);
18271 /* The EABI says that array cookies should also contain the element size. */
18274 arm_cookie_has_size (void)
18276 return TARGET_AAPCS_BASED;
18280 /* The EABI says constructors and destructors should return a pointer to
18281 the object constructed/destroyed. */
18284 arm_cxx_cdtor_returns_this (void)
18286 return TARGET_AAPCS_BASED;
18289 /* The EABI says that an inline function may never be the key
18293 arm_cxx_key_method_may_be_inline (void)
18295 return !TARGET_AAPCS_BASED;
18299 arm_cxx_determine_class_data_visibility (tree decl)
18301 if (!TARGET_AAPCS_BASED)
18304 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18305 is exported. However, on systems without dynamic vague linkage,
18306 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18307 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18308 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18310 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18311 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18315 arm_cxx_class_data_always_comdat (void)
18317 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18318 vague linkage if the class has no key function. */
18319 return !TARGET_AAPCS_BASED;
18323 /* The EABI says __aeabi_atexit should be used to register static
18327 arm_cxx_use_aeabi_atexit (void)
18329 return TARGET_AAPCS_BASED;
18334 arm_set_return_address (rtx source, rtx scratch)
18336 arm_stack_offsets *offsets;
18337 HOST_WIDE_INT delta;
18339 unsigned long saved_regs;
18341 saved_regs = arm_compute_save_reg_mask ();
18343 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18344 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18347 if (frame_pointer_needed)
18348 addr = plus_constant(hard_frame_pointer_rtx, -4);
18351 /* LR will be the first saved register. */
18352 offsets = arm_get_frame_offsets ();
18353 delta = offsets->outgoing_args - (offsets->frame + 4);
18358 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18359 GEN_INT (delta & ~4095)));
18364 addr = stack_pointer_rtx;
18366 addr = plus_constant (addr, delta);
18368 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18374 thumb_set_return_address (rtx source, rtx scratch)
18376 arm_stack_offsets *offsets;
18377 HOST_WIDE_INT delta;
18378 HOST_WIDE_INT limit;
18381 unsigned long mask;
18383 emit_insn (gen_rtx_USE (VOIDmode, source));
18385 mask = thumb1_compute_save_reg_mask ();
18386 if (mask & (1 << LR_REGNUM))
18388 offsets = arm_get_frame_offsets ();
18391 /* Find the saved regs. */
18392 if (frame_pointer_needed)
18394 delta = offsets->soft_frame - offsets->saved_args;
18395 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18401 delta = offsets->outgoing_args - offsets->saved_args;
18404 /* Allow for the stack frame. */
18405 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18407 /* The link register is always the first saved register. */
18410 /* Construct the address. */
18411 addr = gen_rtx_REG (SImode, reg);
18414 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18415 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18419 addr = plus_constant (addr, delta);
18421 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18424 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18427 /* Implements target hook vector_mode_supported_p. */
18429 arm_vector_mode_supported_p (enum machine_mode mode)
18431 /* Neon also supports V2SImode, etc. listed in the clause below. */
18432 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18433 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18436 if ((mode == V2SImode)
18437 || (mode == V4HImode)
18438 || (mode == V8QImode))
18444 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18445 ARM insns and therefore guarantee that the shift count is modulo 256.
18446 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18447 guarantee no particular behavior for out-of-range counts. */
18449 static unsigned HOST_WIDE_INT
18450 arm_shift_truncation_mask (enum machine_mode mode)
18452 return mode == SImode ? 255 : 0;
18456 /* Map internal gcc register numbers to DWARF2 register numbers. */
18459 arm_dbx_register_number (unsigned int regno)
18464 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18465 compatibility. The EABI defines them as registers 96-103. */
18466 if (IS_FPA_REGNUM (regno))
18467 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18469 /* FIXME: VFPv3 register numbering. */
18470 if (IS_VFP_REGNUM (regno))
18471 return 64 + regno - FIRST_VFP_REGNUM;
18473 if (IS_IWMMXT_GR_REGNUM (regno))
18474 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18476 if (IS_IWMMXT_REGNUM (regno))
18477 return 112 + regno - FIRST_IWMMXT_REGNUM;
18479 gcc_unreachable ();
18483 #ifdef TARGET_UNWIND_INFO
18484 /* Emit unwind directives for a store-multiple instruction or stack pointer
18485 push during alignment.
18486 These should only ever be generated by the function prologue code, so
18487 expect them to have a particular form. */
18490 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18493 HOST_WIDE_INT offset;
18494 HOST_WIDE_INT nregs;
18500 e = XVECEXP (p, 0, 0);
18501 if (GET_CODE (e) != SET)
18504 /* First insn will adjust the stack pointer. */
18505 if (GET_CODE (e) != SET
18506 || GET_CODE (XEXP (e, 0)) != REG
18507 || REGNO (XEXP (e, 0)) != SP_REGNUM
18508 || GET_CODE (XEXP (e, 1)) != PLUS)
18511 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18512 nregs = XVECLEN (p, 0) - 1;
18514 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18517 /* The function prologue may also push pc, but not annotate it as it is
18518 never restored. We turn this into a stack pointer adjustment. */
18519 if (nregs * 4 == offset - 4)
18521 fprintf (asm_out_file, "\t.pad #4\n");
18525 fprintf (asm_out_file, "\t.save {");
18527 else if (IS_VFP_REGNUM (reg))
18530 fprintf (asm_out_file, "\t.vsave {");
18532 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18534 /* FPA registers are done differently. */
18535 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18539 /* Unknown register type. */
18542 /* If the stack increment doesn't match the size of the saved registers,
18543 something has gone horribly wrong. */
18544 if (offset != nregs * reg_size)
18549 /* The remaining insns will describe the stores. */
18550 for (i = 1; i <= nregs; i++)
18552 /* Expect (set (mem <addr>) (reg)).
18553 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18554 e = XVECEXP (p, 0, i);
18555 if (GET_CODE (e) != SET
18556 || GET_CODE (XEXP (e, 0)) != MEM
18557 || GET_CODE (XEXP (e, 1)) != REG)
18560 reg = REGNO (XEXP (e, 1));
18565 fprintf (asm_out_file, ", ");
18566 /* We can't use %r for vfp because we need to use the
18567 double precision register names. */
18568 if (IS_VFP_REGNUM (reg))
18569 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18571 asm_fprintf (asm_out_file, "%r", reg);
18573 #ifdef ENABLE_CHECKING
18574 /* Check that the addresses are consecutive. */
18575 e = XEXP (XEXP (e, 0), 0);
18576 if (GET_CODE (e) == PLUS)
18578 offset += reg_size;
18579 if (GET_CODE (XEXP (e, 0)) != REG
18580 || REGNO (XEXP (e, 0)) != SP_REGNUM
18581 || GET_CODE (XEXP (e, 1)) != CONST_INT
18582 || offset != INTVAL (XEXP (e, 1)))
18586 || GET_CODE (e) != REG
18587 || REGNO (e) != SP_REGNUM)
18591 fprintf (asm_out_file, "}\n");
18594 /* Emit unwind directives for a SET. */
18597 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18605 switch (GET_CODE (e0))
18608 /* Pushing a single register. */
18609 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18610 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18611 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18614 asm_fprintf (asm_out_file, "\t.save ");
18615 if (IS_VFP_REGNUM (REGNO (e1)))
18616 asm_fprintf(asm_out_file, "{d%d}\n",
18617 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18619 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18623 if (REGNO (e0) == SP_REGNUM)
18625 /* A stack increment. */
18626 if (GET_CODE (e1) != PLUS
18627 || GET_CODE (XEXP (e1, 0)) != REG
18628 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18629 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18632 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18633 -INTVAL (XEXP (e1, 1)));
18635 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18637 HOST_WIDE_INT offset;
18639 if (GET_CODE (e1) == PLUS)
18641 if (GET_CODE (XEXP (e1, 0)) != REG
18642 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18644 reg = REGNO (XEXP (e1, 0));
18645 offset = INTVAL (XEXP (e1, 1));
18646 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18647 HARD_FRAME_POINTER_REGNUM, reg,
18648 INTVAL (XEXP (e1, 1)));
18650 else if (GET_CODE (e1) == REG)
18653 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18654 HARD_FRAME_POINTER_REGNUM, reg);
18659 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18661 /* Move from sp to reg. */
18662 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18664 else if (GET_CODE (e1) == PLUS
18665 && GET_CODE (XEXP (e1, 0)) == REG
18666 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18667 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18669 /* Set reg to offset from sp. */
18670 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18671 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18673 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18675 /* Stack pointer save before alignment. */
18677 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18690 /* Emit unwind directives for the given insn. */
18693 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18697 if (!ARM_EABI_UNWIND_TABLES)
18700 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18703 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18705 pat = XEXP (pat, 0);
18707 pat = PATTERN (insn);
18709 switch (GET_CODE (pat))
18712 arm_unwind_emit_set (asm_out_file, pat);
18716 /* Store multiple. */
18717 arm_unwind_emit_sequence (asm_out_file, pat);
18726 /* Output a reference from a function exception table to the type_info
18727 object X. The EABI specifies that the symbol should be relocated by
18728 an R_ARM_TARGET2 relocation. */
18731 arm_output_ttype (rtx x)
18733 fputs ("\t.word\t", asm_out_file);
18734 output_addr_const (asm_out_file, x);
18735 /* Use special relocations for symbol references. */
18736 if (GET_CODE (x) != CONST_INT)
18737 fputs ("(TARGET2)", asm_out_file);
18738 fputc ('\n', asm_out_file);
18742 #endif /* TARGET_UNWIND_INFO */
18745 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18746 stack alignment. */
18749 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18751 rtx unspec = SET_SRC (pattern);
18752 gcc_assert (GET_CODE (unspec) == UNSPEC);
18756 case UNSPEC_STACK_ALIGN:
18757 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18758 put anything on the stack, so hopefully it won't matter.
18759 CFA = SP will be correct after alignment. */
18760 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18761 SET_DEST (pattern));
18764 gcc_unreachable ();
18769 /* Output unwind directives for the start/end of a function. */
18772 arm_output_fn_unwind (FILE * f, bool prologue)
18774 if (!ARM_EABI_UNWIND_TABLES)
18778 fputs ("\t.fnstart\n", f);
18780 fputs ("\t.fnend\n", f);
18784 arm_emit_tls_decoration (FILE *fp, rtx x)
18786 enum tls_reloc reloc;
18789 val = XVECEXP (x, 0, 0);
18790 reloc = INTVAL (XVECEXP (x, 0, 1));
18792 output_addr_const (fp, val);
18797 fputs ("(tlsgd)", fp);
18800 fputs ("(tlsldm)", fp);
18803 fputs ("(tlsldo)", fp);
18806 fputs ("(gottpoff)", fp);
18809 fputs ("(tpoff)", fp);
18812 gcc_unreachable ();
18820 fputs (" + (. - ", fp);
18821 output_addr_const (fp, XVECEXP (x, 0, 2));
18823 output_addr_const (fp, XVECEXP (x, 0, 3));
18833 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18836 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18838 gcc_assert (size == 4);
18839 fputs ("\t.word\t", file);
18840 output_addr_const (file, x);
18841 fputs ("(tlsldo)", file);
18845 arm_output_addr_const_extra (FILE *fp, rtx x)
18847 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18848 return arm_emit_tls_decoration (fp, x);
18849 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18852 int labelno = INTVAL (XVECEXP (x, 0, 0));
18854 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18855 assemble_name_raw (fp, label);
18859 else if (GET_CODE (x) == CONST_VECTOR)
18860 return arm_emit_vector_const (fp, x);
18865 /* Output assembly for a shift instruction.
18866 SET_FLAGS determines how the instruction modifies the condition codes.
18867 0 - Do not set condition codes.
18868 1 - Set condition codes.
18869 2 - Use smallest instruction. */
18871 arm_output_shift(rtx * operands, int set_flags)
18874 static const char flag_chars[3] = {'?', '.', '!'};
18879 c = flag_chars[set_flags];
18880 if (TARGET_UNIFIED_ASM)
18882 shift = shift_op(operands[3], &val);
18886 operands[2] = GEN_INT(val);
18887 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18890 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18893 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18894 output_asm_insn (pattern, operands);
18898 /* Output a Thumb-2 casesi instruction. */
18900 thumb2_output_casesi (rtx *operands)
18902 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18904 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18906 output_asm_insn ("cmp\t%0, %1", operands);
18907 output_asm_insn ("bhi\t%l3", operands);
18908 switch (GET_MODE(diff_vec))
18911 return "tbb\t[%|pc, %0]";
18913 return "tbh\t[%|pc, %0, lsl #1]";
18917 output_asm_insn ("adr\t%4, %l2", operands);
18918 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18919 output_asm_insn ("add\t%4, %4, %5", operands);
18924 output_asm_insn ("adr\t%4, %l2", operands);
18925 return "ldr\t%|pc, [%4, %0, lsl #2]";
18928 gcc_unreachable ();
18932 /* A table and a function to perform ARM-specific name mangling for
18933 NEON vector types in order to conform to the AAPCS (see "Procedure
18934 Call Standard for the ARM Architecture", Appendix A). To qualify
18935 for emission with the mangled names defined in that document, a
18936 vector type must not only be of the correct mode but also be
18937 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18940 enum machine_mode mode;
18941 const char *element_type_name;
18942 const char *aapcs_name;
18943 } arm_mangle_map_entry;
18945 static arm_mangle_map_entry arm_mangle_map[] = {
18946 /* 64-bit containerized types. */
18947 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18948 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18949 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18950 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18951 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18952 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18953 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18954 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18955 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18956 /* 128-bit containerized types. */
18957 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18958 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18959 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18960 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18961 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18962 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18963 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18964 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18965 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18966 { VOIDmode, NULL, NULL }
18970 arm_mangle_type (tree type)
18972 arm_mangle_map_entry *pos = arm_mangle_map;
18974 if (TREE_CODE (type) != VECTOR_TYPE)
18977 /* Check the mode of the vector type, and the name of the vector
18978 element type, against the table. */
18979 while (pos->mode != VOIDmode)
18981 tree elt_type = TREE_TYPE (type);
18983 if (pos->mode == TYPE_MODE (type)
18984 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18985 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18986 pos->element_type_name))
18987 return pos->aapcs_name;
18992 /* Use the default mangling for unrecognized (possibly user-defined)
18997 #include "gt-arm.h"