1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 Boston, MA 02110-1301, USA. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
57 /* Forward definitions of types. */
58 typedef struct minipool_node Mnode;
59 typedef struct minipool_fixup Mfix;
61 const struct attribute_spec arm_attribute_table[];
63 /* Forward function declarations. */
64 static arm_stack_offsets *arm_get_frame_offsets (void);
65 static void arm_add_gc_roots (void);
66 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
67 HOST_WIDE_INT, rtx, rtx, int, int);
68 static unsigned bit_count (unsigned long);
69 static int arm_address_register_rtx_p (rtx, int);
70 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
71 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
72 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
73 inline static int thumb1_index_register_rtx_p (rtx, int);
74 static int thumb_far_jump_used_p (void);
75 static bool thumb_force_lr_save (void);
76 static unsigned long thumb1_compute_save_reg_mask (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static int arm_size_return_regs (void);
81 static bool arm_assemble_integer (rtx, unsigned int, int);
83 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
84 static arm_cc get_arm_condition_code (rtx);
85 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
86 static rtx is_jump_table (rtx);
87 static const char *output_multi_immediate (rtx *, const char *, const char *,
89 static const char *shift_op (rtx, HOST_WIDE_INT *);
90 static struct machine_function *arm_init_machine_status (void);
91 static void thumb_exit (FILE *, int);
92 static rtx is_jump_table (rtx);
93 static HOST_WIDE_INT get_jump_table_size (rtx);
94 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_forward_ref (Mfix *);
96 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
97 static Mnode *add_minipool_backward_ref (Mfix *);
98 static void assign_minipool_offsets (Mfix *);
99 static void arm_print_value (FILE *, rtx);
100 static void dump_minipool (rtx);
101 static int arm_barrier_cost (rtx);
102 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
103 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
104 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
106 static void arm_reorg (void);
107 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
108 static unsigned long arm_compute_save_reg0_reg12_mask (void);
109 static unsigned long arm_compute_save_reg_mask (void);
110 static unsigned long arm_isr_value (tree);
111 static unsigned long arm_compute_func_type (void);
112 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
113 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
114 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
115 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
117 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
118 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
120 static int arm_comp_type_attributes (tree, tree);
121 static void arm_set_default_type_attributes (tree);
122 static int arm_adjust_cost (rtx, rtx, rtx, int);
123 static int count_insns_for_constant (HOST_WIDE_INT, int);
124 static int arm_get_strip_length (int);
125 static bool arm_function_ok_for_sibcall (tree, tree);
126 static void arm_internal_label (FILE *, const char *, unsigned long);
127 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
129 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
130 static bool arm_size_rtx_costs (rtx, int, int, int *);
131 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
133 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
134 static bool arm_9e_rtx_costs (rtx, int, int, int *);
135 static int arm_address_cost (rtx);
136 static bool arm_memory_load_p (rtx);
137 static bool arm_cirrus_insn_p (rtx);
138 static void cirrus_reorg (rtx);
139 static void arm_init_builtins (void);
140 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
141 static void arm_init_iwmmxt_builtins (void);
142 static rtx safe_vector_operand (rtx, enum machine_mode);
143 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
144 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void emit_constant_insn (rtx cond, rtx pattern);
147 static rtx emit_set_insn (rtx, rtx);
148 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
151 #ifdef OBJECT_FORMAT_ELF
152 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
153 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
156 static void arm_encode_section_info (tree, rtx, int);
159 static void arm_file_end (void);
160 static void arm_file_start (void);
163 static void aof_globalize_label (FILE *, const char *);
164 static void aof_dump_imports (FILE *);
165 static void aof_dump_pic_table (FILE *);
166 static void aof_file_start (void);
167 static void aof_file_end (void);
168 static void aof_asm_init_sections (void);
170 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
172 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
173 enum machine_mode, tree, bool);
174 static bool arm_promote_prototypes (tree);
175 static bool arm_default_short_enums (void);
176 static bool arm_align_anon_bitfield (void);
177 static bool arm_return_in_msb (tree);
178 static bool arm_must_pass_in_stack (enum machine_mode, tree);
179 #ifdef TARGET_UNWIND_INFO
180 static void arm_unwind_emit (FILE *, rtx);
181 static bool arm_output_ttype (rtx);
183 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
185 static tree arm_cxx_guard_type (void);
186 static bool arm_cxx_guard_mask_bit (void);
187 static tree arm_get_cookie_size (tree);
188 static bool arm_cookie_has_size (void);
189 static bool arm_cxx_cdtor_returns_this (void);
190 static bool arm_cxx_key_method_may_be_inline (void);
191 static void arm_cxx_determine_class_data_visibility (tree);
192 static bool arm_cxx_class_data_always_comdat (void);
193 static bool arm_cxx_use_aeabi_atexit (void);
194 static void arm_init_libfuncs (void);
195 static bool arm_handle_option (size_t, const char *, int);
196 static void arm_target_help (void);
197 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
198 static bool arm_cannot_copy_insn_p (rtx);
199 static bool arm_tls_symbol_p (rtx x);
200 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
203 /* Initialize the GCC target structure. */
204 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
205 #undef TARGET_MERGE_DECL_ATTRIBUTES
206 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
209 #undef TARGET_ATTRIBUTE_TABLE
210 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
212 #undef TARGET_ASM_FILE_START
213 #define TARGET_ASM_FILE_START arm_file_start
214 #undef TARGET_ASM_FILE_END
215 #define TARGET_ASM_FILE_END arm_file_end
218 #undef TARGET_ASM_BYTE_OP
219 #define TARGET_ASM_BYTE_OP "\tDCB\t"
220 #undef TARGET_ASM_ALIGNED_HI_OP
221 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
222 #undef TARGET_ASM_ALIGNED_SI_OP
223 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
224 #undef TARGET_ASM_GLOBALIZE_LABEL
225 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
226 #undef TARGET_ASM_FILE_START
227 #define TARGET_ASM_FILE_START aof_file_start
228 #undef TARGET_ASM_FILE_END
229 #define TARGET_ASM_FILE_END aof_file_end
231 #undef TARGET_ASM_ALIGNED_SI_OP
232 #define TARGET_ASM_ALIGNED_SI_OP NULL
233 #undef TARGET_ASM_INTEGER
234 #define TARGET_ASM_INTEGER arm_assemble_integer
237 #undef TARGET_ASM_FUNCTION_PROLOGUE
238 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
240 #undef TARGET_ASM_FUNCTION_EPILOGUE
241 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
243 #undef TARGET_DEFAULT_TARGET_FLAGS
244 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
245 #undef TARGET_HANDLE_OPTION
246 #define TARGET_HANDLE_OPTION arm_handle_option
248 #define TARGET_HELP arm_target_help
250 #undef TARGET_COMP_TYPE_ATTRIBUTES
251 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
253 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
254 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
256 #undef TARGET_SCHED_ADJUST_COST
257 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
259 #undef TARGET_ENCODE_SECTION_INFO
261 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
263 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
266 #undef TARGET_STRIP_NAME_ENCODING
267 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
269 #undef TARGET_ASM_INTERNAL_LABEL
270 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
272 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
273 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
275 #undef TARGET_ASM_OUTPUT_MI_THUNK
276 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
277 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
278 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
280 /* This will be overridden in arm_override_options. */
281 #undef TARGET_RTX_COSTS
282 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
283 #undef TARGET_ADDRESS_COST
284 #define TARGET_ADDRESS_COST arm_address_cost
286 #undef TARGET_SHIFT_TRUNCATION_MASK
287 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
288 #undef TARGET_VECTOR_MODE_SUPPORTED_P
289 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
291 #undef TARGET_MACHINE_DEPENDENT_REORG
292 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS arm_init_builtins
296 #undef TARGET_EXPAND_BUILTIN
297 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
299 #undef TARGET_INIT_LIBFUNCS
300 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
302 #undef TARGET_PROMOTE_FUNCTION_ARGS
303 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
304 #undef TARGET_PROMOTE_FUNCTION_RETURN
305 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
306 #undef TARGET_PROMOTE_PROTOTYPES
307 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
308 #undef TARGET_PASS_BY_REFERENCE
309 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
310 #undef TARGET_ARG_PARTIAL_BYTES
311 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
313 #undef TARGET_SETUP_INCOMING_VARARGS
314 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
316 #undef TARGET_DEFAULT_SHORT_ENUMS
317 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
319 #undef TARGET_ALIGN_ANON_BITFIELD
320 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
322 #undef TARGET_NARROW_VOLATILE_BITFIELD
323 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
325 #undef TARGET_CXX_GUARD_TYPE
326 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
328 #undef TARGET_CXX_GUARD_MASK_BIT
329 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
331 #undef TARGET_CXX_GET_COOKIE_SIZE
332 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
334 #undef TARGET_CXX_COOKIE_HAS_SIZE
335 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
337 #undef TARGET_CXX_CDTOR_RETURNS_THIS
338 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
340 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
341 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
343 #undef TARGET_CXX_USE_AEABI_ATEXIT
344 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
346 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
347 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
348 arm_cxx_determine_class_data_visibility
350 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
351 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
353 #undef TARGET_RETURN_IN_MSB
354 #define TARGET_RETURN_IN_MSB arm_return_in_msb
356 #undef TARGET_MUST_PASS_IN_STACK
357 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
359 #ifdef TARGET_UNWIND_INFO
360 #undef TARGET_UNWIND_EMIT
361 #define TARGET_UNWIND_EMIT arm_unwind_emit
363 /* EABI unwinding tables use a different format for the typeinfo tables. */
364 #undef TARGET_ASM_TTYPE
365 #define TARGET_ASM_TTYPE arm_output_ttype
367 #undef TARGET_ARM_EABI_UNWINDER
368 #define TARGET_ARM_EABI_UNWINDER true
369 #endif /* TARGET_UNWIND_INFO */
371 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
372 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
374 #undef TARGET_CANNOT_COPY_INSN_P
375 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
378 #undef TARGET_HAVE_TLS
379 #define TARGET_HAVE_TLS true
382 #undef TARGET_CANNOT_FORCE_CONST_MEM
383 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
385 #undef TARGET_MANGLE_TYPE
386 #define TARGET_MANGLE_TYPE arm_mangle_type
389 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
390 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
393 struct gcc_target targetm = TARGET_INITIALIZER;
395 /* Obstack for minipool constant handling. */
396 static struct obstack minipool_obstack;
397 static char * minipool_startobj;
399 /* The maximum number of insns skipped which
400 will be conditionalised if possible. */
401 static int max_insns_skipped = 5;
403 extern FILE * asm_out_file;
405 /* True if we are currently building a constant table. */
406 int making_const_table;
408 /* Define the information needed to generate branch insns. This is
409 stored from the compare operation. */
410 rtx arm_compare_op0, arm_compare_op1;
412 /* The processor for which instructions should be scheduled. */
413 enum processor_type arm_tune = arm_none;
415 /* The default processor used if not overridden by commandline. */
416 static enum processor_type arm_default_cpu = arm_none;
418 /* Which floating point model to use. */
419 enum arm_fp_model arm_fp_model;
421 /* Which floating point hardware is available. */
422 enum fputype arm_fpu_arch;
424 /* Which floating point hardware to schedule for. */
425 enum fputype arm_fpu_tune;
427 /* Whether to use floating point hardware. */
428 enum float_abi_type arm_float_abi;
430 /* Which ABI to use. */
431 enum arm_abi_type arm_abi;
433 /* Which thread pointer model to use. */
434 enum arm_tp_type target_thread_pointer = TP_AUTO;
436 /* Used to parse -mstructure_size_boundary command line option. */
437 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
439 /* Used for Thumb call_via trampolines. */
440 rtx thumb_call_via_label[14];
441 static int thumb_call_reg_needed;
443 /* Bit values used to identify processor capabilities. */
444 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
445 #define FL_ARCH3M (1 << 1) /* Extended multiply */
446 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
447 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
448 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
449 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
450 #define FL_THUMB (1 << 6) /* Thumb aware */
451 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
452 #define FL_STRONG (1 << 8) /* StrongARM */
453 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
454 #define FL_XSCALE (1 << 10) /* XScale */
455 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
456 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
457 media instructions. */
458 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
459 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
460 Note: ARM6 & 7 derivatives only. */
461 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
462 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
463 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
465 #define FL_DIV (1 << 18) /* Hardware divide. */
466 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
467 #define FL_NEON (1 << 20) /* Neon instructions. */
469 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
471 #define FL_FOR_ARCH2 FL_NOTM
472 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
473 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
474 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
475 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
476 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
477 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
478 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
479 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
480 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
481 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
482 #define FL_FOR_ARCH6J FL_FOR_ARCH6
483 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
484 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
485 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
486 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
487 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
488 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
489 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
490 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
492 /* The bits in this mask specify which
493 instructions we are allowed to generate. */
494 static unsigned long insn_flags = 0;
496 /* The bits in this mask specify which instruction scheduling options should
498 static unsigned long tune_flags = 0;
500 /* The following are used in the arm.md file as equivalents to bits
501 in the above two flag variables. */
503 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
506 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
509 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
512 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
515 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
518 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
521 /* Nonzero if this chip supports the ARM 6K extensions. */
524 /* Nonzero if instructions not present in the 'M' profile can be used. */
525 int arm_arch_notm = 0;
527 /* Nonzero if this chip can benefit from load scheduling. */
528 int arm_ld_sched = 0;
530 /* Nonzero if this chip is a StrongARM. */
531 int arm_tune_strongarm = 0;
533 /* Nonzero if this chip is a Cirrus variant. */
534 int arm_arch_cirrus = 0;
536 /* Nonzero if this chip supports Intel Wireless MMX technology. */
537 int arm_arch_iwmmxt = 0;
539 /* Nonzero if this chip is an XScale. */
540 int arm_arch_xscale = 0;
542 /* Nonzero if tuning for XScale */
543 int arm_tune_xscale = 0;
545 /* Nonzero if we want to tune for stores that access the write-buffer.
546 This typically means an ARM6 or ARM7 with MMU or MPU. */
547 int arm_tune_wbuf = 0;
549 /* Nonzero if generating Thumb instructions. */
552 /* Nonzero if we should define __THUMB_INTERWORK__ in the
554 XXX This is a bit of a hack, it's intended to help work around
555 problems in GLD which doesn't understand that armv5t code is
556 interworking clean. */
557 int arm_cpp_interwork = 0;
559 /* Nonzero if chip supports Thumb 2. */
562 /* Nonzero if chip supports integer division instruction. */
565 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
566 must report the mode of the memory reference from PRINT_OPERAND to
567 PRINT_OPERAND_ADDRESS. */
568 enum machine_mode output_memory_reference_mode;
570 /* The register number to be used for the PIC offset register. */
571 unsigned arm_pic_register = INVALID_REGNUM;
573 /* Set to 1 when a return insn is output, this means that the epilogue
575 int return_used_this_function;
577 /* Set to 1 after arm_reorg has started. Reset to start at the start of
578 the next function. */
579 static int after_arm_reorg = 0;
581 /* The maximum number of insns to be used when loading a constant. */
582 static int arm_constant_limit = 3;
584 /* For an explanation of these variables, see final_prescan_insn below. */
586 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
587 enum arm_cond_code arm_current_cc;
589 int arm_target_label;
590 /* The number of conditionally executed insns, including the current insn. */
591 int arm_condexec_count = 0;
592 /* A bitmask specifying the patterns for the IT block.
593 Zero means do not output an IT block before this insn. */
594 int arm_condexec_mask = 0;
595 /* The number of bits used in arm_condexec_mask. */
596 int arm_condexec_masklen = 0;
598 /* The condition codes of the ARM, and the inverse function. */
599 static const char * const arm_condition_codes[] =
601 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
602 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
605 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
606 #define streq(string1, string2) (strcmp (string1, string2) == 0)
608 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
609 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
610 | (1 << PIC_OFFSET_TABLE_REGNUM)))
612 /* Initialization code. */
616 const char *const name;
617 enum processor_type core;
619 const unsigned long flags;
620 bool (* rtx_costs) (rtx, int, int, int *);
623 /* Not all of these give usefully different compilation alternatives,
624 but there is no simple way of generalizing them. */
625 static const struct processors all_cores[] =
628 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
629 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
630 #include "arm-cores.def"
632 {NULL, arm_none, NULL, 0, NULL}
635 static const struct processors all_architectures[] =
637 /* ARM Architectures */
638 /* We don't specify rtx_costs here as it will be figured out
641 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
642 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
643 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
644 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
645 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
646 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
647 implementations that support it, so we will leave it out for now. */
648 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
649 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
650 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
651 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
652 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
653 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
654 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
655 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
656 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
657 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
658 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
659 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
660 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
661 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
662 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
663 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
664 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
665 {NULL, arm_none, NULL, 0 , NULL}
668 struct arm_cpu_select
672 const struct processors * processors;
675 /* This is a magic structure. The 'string' field is magically filled in
676 with a pointer to the value specified by the user on the command line
677 assuming that the user has specified such a value. */
679 static struct arm_cpu_select arm_select[] =
681 /* string name processors */
682 { NULL, "-mcpu=", all_cores },
683 { NULL, "-march=", all_architectures },
684 { NULL, "-mtune=", all_cores }
687 /* Defines representing the indexes into the above table. */
688 #define ARM_OPT_SET_CPU 0
689 #define ARM_OPT_SET_ARCH 1
690 #define ARM_OPT_SET_TUNE 2
692 /* The name of the preprocessor macro to define for this architecture. */
694 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
703 /* Available values for -mfpu=. */
705 static const struct fpu_desc all_fpus[] =
707 {"fpa", FPUTYPE_FPA},
708 {"fpe2", FPUTYPE_FPA_EMU2},
709 {"fpe3", FPUTYPE_FPA_EMU2},
710 {"maverick", FPUTYPE_MAVERICK},
711 {"vfp", FPUTYPE_VFP},
712 {"vfp3", FPUTYPE_VFP3},
713 {"neon", FPUTYPE_NEON}
717 /* Floating point models used by the different hardware.
718 See fputype in arm.h. */
720 static const enum fputype fp_model_for_fpu[] =
722 /* No FP hardware. */
723 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
724 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
725 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
726 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
727 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
728 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
729 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
730 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
737 enum float_abi_type abi_type;
741 /* Available values for -mfloat-abi=. */
743 static const struct float_abi all_float_abis[] =
745 {"soft", ARM_FLOAT_ABI_SOFT},
746 {"softfp", ARM_FLOAT_ABI_SOFTFP},
747 {"hard", ARM_FLOAT_ABI_HARD}
754 enum arm_abi_type abi_type;
758 /* Available values for -mabi=. */
760 static const struct abi_name arm_all_abis[] =
762 {"apcs-gnu", ARM_ABI_APCS},
763 {"atpcs", ARM_ABI_ATPCS},
764 {"aapcs", ARM_ABI_AAPCS},
765 {"iwmmxt", ARM_ABI_IWMMXT},
766 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
769 /* Supported TLS relocations. */
779 /* Emit an insn that's a simple single-set. Both the operands must be known
782 emit_set_insn (rtx x, rtx y)
784 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
787 /* Return the number of bits set in VALUE. */
789 bit_count (unsigned long value)
791 unsigned long count = 0;
796 value &= value - 1; /* Clear the least-significant set bit. */
802 /* Set up library functions unique to ARM. */
805 arm_init_libfuncs (void)
807 /* There are no special library functions unless we are using the
812 /* The functions below are described in Section 4 of the "Run-Time
813 ABI for the ARM architecture", Version 1.0. */
815 /* Double-precision floating-point arithmetic. Table 2. */
816 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
817 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
818 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
819 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
820 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
822 /* Double-precision comparisons. Table 3. */
823 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
824 set_optab_libfunc (ne_optab, DFmode, NULL);
825 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
826 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
827 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
828 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
829 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
831 /* Single-precision floating-point arithmetic. Table 4. */
832 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
833 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
834 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
835 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
836 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
838 /* Single-precision comparisons. Table 5. */
839 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
840 set_optab_libfunc (ne_optab, SFmode, NULL);
841 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
842 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
843 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
844 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
845 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
847 /* Floating-point to integer conversions. Table 6. */
848 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
849 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
850 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
851 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
852 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
853 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
854 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
855 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
857 /* Conversions between floating types. Table 7. */
858 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
859 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
861 /* Integer to floating-point conversions. Table 8. */
862 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
863 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
864 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
865 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
866 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
867 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
868 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
869 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
871 /* Long long. Table 9. */
872 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
873 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
874 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
875 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
876 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
877 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
878 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
879 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
881 /* Integer (32/32->32) division. \S 4.3.1. */
882 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
883 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
885 /* The divmod functions are designed so that they can be used for
886 plain division, even though they return both the quotient and the
887 remainder. The quotient is returned in the usual location (i.e.,
888 r0 for SImode, {r0, r1} for DImode), just as would be expected
889 for an ordinary division routine. Because the AAPCS calling
890 conventions specify that all of { r0, r1, r2, r3 } are
891 callee-saved registers, there is no need to tell the compiler
892 explicitly that those registers are clobbered by these
894 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
895 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
897 /* For SImode division the ABI provides div-without-mod routines,
899 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
900 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
902 /* We don't have mod libcalls. Fortunately gcc knows how to use the
903 divmod libcalls instead. */
904 set_optab_libfunc (smod_optab, DImode, NULL);
905 set_optab_libfunc (umod_optab, DImode, NULL);
906 set_optab_libfunc (smod_optab, SImode, NULL);
907 set_optab_libfunc (umod_optab, SImode, NULL);
910 /* Implement TARGET_HANDLE_OPTION. */
913 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
918 arm_select[1].string = arg;
922 arm_select[0].string = arg;
925 case OPT_mhard_float:
926 target_float_abi_name = "hard";
929 case OPT_msoft_float:
930 target_float_abi_name = "soft";
934 arm_select[2].string = arg;
943 arm_target_help (void)
946 static int columns = 0;
949 /* If we have not done so already, obtain the desired maximum width of
950 the output. Note - this is a duplication of the code at the start of
951 gcc/opts.c:print_specific_help() - the two copies should probably be
952 replaced by a single function. */
957 GET_ENVIRONMENT (p, "COLUMNS");
960 int value = atoi (p);
967 /* Use a reasonable default. */
971 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
973 /* The - 2 is because we know that the last entry in the array is NULL. */
974 i = ARRAY_SIZE (all_cores) - 2;
976 printf (" %s", all_cores[i].name);
977 remaining = columns - (strlen (all_cores[i].name) + 4);
978 gcc_assert (remaining >= 0);
982 int len = strlen (all_cores[i].name);
984 if (remaining > len + 2)
986 printf (", %s", all_cores[i].name);
987 remaining -= len + 2;
993 printf ("\n %s", all_cores[i].name);
994 remaining = columns - (len + 4);
998 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1000 i = ARRAY_SIZE (all_architectures) - 2;
1003 printf (" %s", all_architectures[i].name);
1004 remaining = columns - (strlen (all_architectures[i].name) + 4);
1005 gcc_assert (remaining >= 0);
1009 int len = strlen (all_architectures[i].name);
1011 if (remaining > len + 2)
1013 printf (", %s", all_architectures[i].name);
1014 remaining -= len + 2;
1020 printf ("\n %s", all_architectures[i].name);
1021 remaining = columns - (len + 4);
1028 /* Fix up any incompatible options that the user has specified.
1029 This has now turned into a maze. */
1031 arm_override_options (void)
1034 enum processor_type target_arch_cpu = arm_none;
1036 /* Set up the flags based on the cpu/architecture selected by the user. */
1037 for (i = ARRAY_SIZE (arm_select); i--;)
1039 struct arm_cpu_select * ptr = arm_select + i;
1041 if (ptr->string != NULL && ptr->string[0] != '\0')
1043 const struct processors * sel;
1045 for (sel = ptr->processors; sel->name != NULL; sel++)
1046 if (streq (ptr->string, sel->name))
1048 /* Set the architecture define. */
1049 if (i != ARM_OPT_SET_TUNE)
1050 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1052 /* Determine the processor core for which we should
1053 tune code-generation. */
1054 if (/* -mcpu= is a sensible default. */
1055 i == ARM_OPT_SET_CPU
1056 /* -mtune= overrides -mcpu= and -march=. */
1057 || i == ARM_OPT_SET_TUNE)
1058 arm_tune = (enum processor_type) (sel - ptr->processors);
1060 /* Remember the CPU associated with this architecture.
1061 If no other option is used to set the CPU type,
1062 we'll use this to guess the most suitable tuning
1064 if (i == ARM_OPT_SET_ARCH)
1065 target_arch_cpu = sel->core;
1067 if (i != ARM_OPT_SET_TUNE)
1069 /* If we have been given an architecture and a processor
1070 make sure that they are compatible. We only generate
1071 a warning though, and we prefer the CPU over the
1073 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1074 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1077 insn_flags = sel->flags;
1083 if (sel->name == NULL)
1084 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1088 /* Guess the tuning options from the architecture if necessary. */
1089 if (arm_tune == arm_none)
1090 arm_tune = target_arch_cpu;
1092 /* If the user did not specify a processor, choose one for them. */
1093 if (insn_flags == 0)
1095 const struct processors * sel;
1096 unsigned int sought;
1097 enum processor_type cpu;
1099 cpu = TARGET_CPU_DEFAULT;
1100 if (cpu == arm_none)
1102 #ifdef SUBTARGET_CPU_DEFAULT
1103 /* Use the subtarget default CPU if none was specified by
1105 cpu = SUBTARGET_CPU_DEFAULT;
1107 /* Default to ARM6. */
1108 if (cpu == arm_none)
1111 sel = &all_cores[cpu];
1113 insn_flags = sel->flags;
1115 /* Now check to see if the user has specified some command line
1116 switch that require certain abilities from the cpu. */
1119 if (TARGET_INTERWORK || TARGET_THUMB)
1121 sought |= (FL_THUMB | FL_MODE32);
1123 /* There are no ARM processors that support both APCS-26 and
1124 interworking. Therefore we force FL_MODE26 to be removed
1125 from insn_flags here (if it was set), so that the search
1126 below will always be able to find a compatible processor. */
1127 insn_flags &= ~FL_MODE26;
1130 if (sought != 0 && ((sought & insn_flags) != sought))
1132 /* Try to locate a CPU type that supports all of the abilities
1133 of the default CPU, plus the extra abilities requested by
1135 for (sel = all_cores; sel->name != NULL; sel++)
1136 if ((sel->flags & sought) == (sought | insn_flags))
1139 if (sel->name == NULL)
1141 unsigned current_bit_count = 0;
1142 const struct processors * best_fit = NULL;
1144 /* Ideally we would like to issue an error message here
1145 saying that it was not possible to find a CPU compatible
1146 with the default CPU, but which also supports the command
1147 line options specified by the programmer, and so they
1148 ought to use the -mcpu=<name> command line option to
1149 override the default CPU type.
1151 If we cannot find a cpu that has both the
1152 characteristics of the default cpu and the given
1153 command line options we scan the array again looking
1154 for a best match. */
1155 for (sel = all_cores; sel->name != NULL; sel++)
1156 if ((sel->flags & sought) == sought)
1160 count = bit_count (sel->flags & insn_flags);
1162 if (count >= current_bit_count)
1165 current_bit_count = count;
1169 gcc_assert (best_fit);
1173 insn_flags = sel->flags;
1175 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1176 arm_default_cpu = (enum processor_type) (sel - all_cores);
1177 if (arm_tune == arm_none)
1178 arm_tune = arm_default_cpu;
1181 /* The processor for which we should tune should now have been
1183 gcc_assert (arm_tune != arm_none);
1185 tune_flags = all_cores[(int)arm_tune].flags;
1187 targetm.rtx_costs = arm_size_rtx_costs;
1189 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1191 /* Make sure that the processor choice does not conflict with any of the
1192 other command line choices. */
1193 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1194 error ("target CPU does not support ARM mode");
1196 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1198 warning (0, "target CPU does not support interworking" );
1199 target_flags &= ~MASK_INTERWORK;
1202 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1204 warning (0, "target CPU does not support THUMB instructions");
1205 target_flags &= ~MASK_THUMB;
1208 if (TARGET_APCS_FRAME && TARGET_THUMB)
1210 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1211 target_flags &= ~MASK_APCS_FRAME;
1214 /* Callee super interworking implies thumb interworking. Adding
1215 this to the flags here simplifies the logic elsewhere. */
1216 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1217 target_flags |= MASK_INTERWORK;
1219 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1220 from here where no function is being compiled currently. */
1221 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1222 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1224 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1225 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1227 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1228 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1230 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1232 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1233 target_flags |= MASK_APCS_FRAME;
1236 if (TARGET_POKE_FUNCTION_NAME)
1237 target_flags |= MASK_APCS_FRAME;
1239 if (TARGET_APCS_REENT && flag_pic)
1240 error ("-fpic and -mapcs-reent are incompatible");
1242 if (TARGET_APCS_REENT)
1243 warning (0, "APCS reentrant code not supported. Ignored");
1245 /* If this target is normally configured to use APCS frames, warn if they
1246 are turned off and debugging is turned on. */
1248 && write_symbols != NO_DEBUG
1249 && !TARGET_APCS_FRAME
1250 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1251 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1253 if (TARGET_APCS_FLOAT)
1254 warning (0, "passing floating point arguments in fp regs not yet supported");
1256 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1257 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1258 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1259 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1260 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1261 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1262 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1263 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1264 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1265 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1266 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1267 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1269 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1270 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1271 thumb_code = (TARGET_ARM == 0);
1272 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1273 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1274 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1275 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1277 /* V5 code we generate is completely interworking capable, so we turn off
1278 TARGET_INTERWORK here to avoid many tests later on. */
1280 /* XXX However, we must pass the right pre-processor defines to CPP
1281 or GLD can get confused. This is a hack. */
1282 if (TARGET_INTERWORK)
1283 arm_cpp_interwork = 1;
1286 target_flags &= ~MASK_INTERWORK;
1288 if (target_abi_name)
1290 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1292 if (streq (arm_all_abis[i].name, target_abi_name))
1294 arm_abi = arm_all_abis[i].abi_type;
1298 if (i == ARRAY_SIZE (arm_all_abis))
1299 error ("invalid ABI option: -mabi=%s", target_abi_name);
1302 arm_abi = ARM_DEFAULT_ABI;
1304 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1305 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1307 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1308 error ("iwmmxt abi requires an iwmmxt capable cpu");
1310 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1311 if (target_fpu_name == NULL && target_fpe_name != NULL)
1313 if (streq (target_fpe_name, "2"))
1314 target_fpu_name = "fpe2";
1315 else if (streq (target_fpe_name, "3"))
1316 target_fpu_name = "fpe3";
1318 error ("invalid floating point emulation option: -mfpe=%s",
1321 if (target_fpu_name != NULL)
1323 /* The user specified a FPU. */
1324 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1326 if (streq (all_fpus[i].name, target_fpu_name))
1328 arm_fpu_arch = all_fpus[i].fpu;
1329 arm_fpu_tune = arm_fpu_arch;
1330 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1334 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1335 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1339 #ifdef FPUTYPE_DEFAULT
1340 /* Use the default if it is specified for this platform. */
1341 arm_fpu_arch = FPUTYPE_DEFAULT;
1342 arm_fpu_tune = FPUTYPE_DEFAULT;
1344 /* Pick one based on CPU type. */
1345 /* ??? Some targets assume FPA is the default.
1346 if ((insn_flags & FL_VFP) != 0)
1347 arm_fpu_arch = FPUTYPE_VFP;
1350 if (arm_arch_cirrus)
1351 arm_fpu_arch = FPUTYPE_MAVERICK;
1353 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1355 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1356 arm_fpu_tune = FPUTYPE_FPA;
1358 arm_fpu_tune = arm_fpu_arch;
1359 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1360 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1363 if (target_float_abi_name != NULL)
1365 /* The user specified a FP ABI. */
1366 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1368 if (streq (all_float_abis[i].name, target_float_abi_name))
1370 arm_float_abi = all_float_abis[i].abi_type;
1374 if (i == ARRAY_SIZE (all_float_abis))
1375 error ("invalid floating point abi: -mfloat-abi=%s",
1376 target_float_abi_name);
1379 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1381 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1382 sorry ("-mfloat-abi=hard and VFP");
1384 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1385 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1386 will ever exist. GCC makes no attempt to support this combination. */
1387 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1388 sorry ("iWMMXt and hardware floating point");
1390 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1391 if (TARGET_THUMB2 && TARGET_IWMMXT)
1392 sorry ("Thumb-2 iWMMXt");
1394 /* If soft-float is specified then don't use FPU. */
1395 if (TARGET_SOFT_FLOAT)
1396 arm_fpu_arch = FPUTYPE_NONE;
1398 /* For arm2/3 there is no need to do any scheduling if there is only
1399 a floating point emulator, or we are doing software floating-point. */
1400 if ((TARGET_SOFT_FLOAT
1401 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1402 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1403 && (tune_flags & FL_MODE32) == 0)
1404 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1406 if (target_thread_switch)
1408 if (strcmp (target_thread_switch, "soft") == 0)
1409 target_thread_pointer = TP_SOFT;
1410 else if (strcmp (target_thread_switch, "auto") == 0)
1411 target_thread_pointer = TP_AUTO;
1412 else if (strcmp (target_thread_switch, "cp15") == 0)
1413 target_thread_pointer = TP_CP15;
1415 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1418 /* Use the cp15 method if it is available. */
1419 if (target_thread_pointer == TP_AUTO)
1421 if (arm_arch6k && !TARGET_THUMB)
1422 target_thread_pointer = TP_CP15;
1424 target_thread_pointer = TP_SOFT;
1427 if (TARGET_HARD_TP && TARGET_THUMB1)
1428 error ("can not use -mtp=cp15 with 16-bit Thumb");
1430 /* Override the default structure alignment for AAPCS ABI. */
1431 if (TARGET_AAPCS_BASED)
1432 arm_structure_size_boundary = 8;
1434 if (structure_size_string != NULL)
1436 int size = strtol (structure_size_string, NULL, 0);
1438 if (size == 8 || size == 32
1439 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1440 arm_structure_size_boundary = size;
1442 warning (0, "structure size boundary can only be set to %s",
1443 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1446 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1448 error ("RTP PIC is incompatible with Thumb");
1452 /* If stack checking is disabled, we can use r10 as the PIC register,
1453 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1454 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1456 if (TARGET_VXWORKS_RTP)
1457 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1458 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1461 if (flag_pic && TARGET_VXWORKS_RTP)
1462 arm_pic_register = 9;
1464 if (arm_pic_register_string != NULL)
1466 int pic_register = decode_reg_name (arm_pic_register_string);
1469 warning (0, "-mpic-register= is useless without -fpic");
1471 /* Prevent the user from choosing an obviously stupid PIC register. */
1472 else if (pic_register < 0 || call_used_regs[pic_register]
1473 || pic_register == HARD_FRAME_POINTER_REGNUM
1474 || pic_register == STACK_POINTER_REGNUM
1475 || pic_register >= PC_REGNUM
1476 || (TARGET_VXWORKS_RTP
1477 && (unsigned int) pic_register != arm_pic_register))
1478 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1480 arm_pic_register = pic_register;
1483 /* ??? We might want scheduling for thumb2. */
1484 if (TARGET_THUMB && flag_schedule_insns)
1486 /* Don't warn since it's on by default in -O2. */
1487 flag_schedule_insns = 0;
1492 arm_constant_limit = 1;
1494 /* If optimizing for size, bump the number of instructions that we
1495 are prepared to conditionally execute (even on a StrongARM). */
1496 max_insns_skipped = 6;
1500 /* For processors with load scheduling, it never costs more than
1501 2 cycles to load a constant, and the load scheduler may well
1502 reduce that to 1. */
1504 arm_constant_limit = 1;
1506 /* On XScale the longer latency of a load makes it more difficult
1507 to achieve a good schedule, so it's faster to synthesize
1508 constants that can be done in two insns. */
1509 if (arm_tune_xscale)
1510 arm_constant_limit = 2;
1512 /* StrongARM has early execution of branches, so a sequence
1513 that is worth skipping is shorter. */
1514 if (arm_tune_strongarm)
1515 max_insns_skipped = 3;
1518 /* Register global variables with the garbage collector. */
1519 arm_add_gc_roots ();
1523 arm_add_gc_roots (void)
1525 gcc_obstack_init(&minipool_obstack);
1526 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1529 /* A table of known ARM exception types.
1530 For use with the interrupt function attribute. */
1534 const char *const arg;
1535 const unsigned long return_value;
1539 static const isr_attribute_arg isr_attribute_args [] =
1541 { "IRQ", ARM_FT_ISR },
1542 { "irq", ARM_FT_ISR },
1543 { "FIQ", ARM_FT_FIQ },
1544 { "fiq", ARM_FT_FIQ },
1545 { "ABORT", ARM_FT_ISR },
1546 { "abort", ARM_FT_ISR },
1547 { "ABORT", ARM_FT_ISR },
1548 { "abort", ARM_FT_ISR },
1549 { "UNDEF", ARM_FT_EXCEPTION },
1550 { "undef", ARM_FT_EXCEPTION },
1551 { "SWI", ARM_FT_EXCEPTION },
1552 { "swi", ARM_FT_EXCEPTION },
1553 { NULL, ARM_FT_NORMAL }
1556 /* Returns the (interrupt) function type of the current
1557 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1559 static unsigned long
1560 arm_isr_value (tree argument)
1562 const isr_attribute_arg * ptr;
1566 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1568 /* No argument - default to IRQ. */
1569 if (argument == NULL_TREE)
1572 /* Get the value of the argument. */
1573 if (TREE_VALUE (argument) == NULL_TREE
1574 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1575 return ARM_FT_UNKNOWN;
1577 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1579 /* Check it against the list of known arguments. */
1580 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1581 if (streq (arg, ptr->arg))
1582 return ptr->return_value;
1584 /* An unrecognized interrupt type. */
1585 return ARM_FT_UNKNOWN;
1588 /* Computes the type of the current function. */
1590 static unsigned long
1591 arm_compute_func_type (void)
1593 unsigned long type = ARM_FT_UNKNOWN;
1597 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1599 /* Decide if the current function is volatile. Such functions
1600 never return, and many memory cycles can be saved by not storing
1601 register values that will never be needed again. This optimization
1602 was added to speed up context switching in a kernel application. */
1604 && (TREE_NOTHROW (current_function_decl)
1605 || !(flag_unwind_tables
1606 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1607 && TREE_THIS_VOLATILE (current_function_decl))
1608 type |= ARM_FT_VOLATILE;
1610 if (cfun->static_chain_decl != NULL)
1611 type |= ARM_FT_NESTED;
1613 attr = DECL_ATTRIBUTES (current_function_decl);
1615 a = lookup_attribute ("naked", attr);
1617 type |= ARM_FT_NAKED;
1619 a = lookup_attribute ("isr", attr);
1621 a = lookup_attribute ("interrupt", attr);
1624 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1626 type |= arm_isr_value (TREE_VALUE (a));
1631 /* Returns the type of the current function. */
1634 arm_current_func_type (void)
1636 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1637 cfun->machine->func_type = arm_compute_func_type ();
1639 return cfun->machine->func_type;
1642 /* Return 1 if it is possible to return using a single instruction.
1643 If SIBLING is non-null, this is a test for a return before a sibling
1644 call. SIBLING is the call insn, so we can examine its register usage. */
1647 use_return_insn (int iscond, rtx sibling)
1650 unsigned int func_type;
1651 unsigned long saved_int_regs;
1652 unsigned HOST_WIDE_INT stack_adjust;
1653 arm_stack_offsets *offsets;
1655 /* Never use a return instruction before reload has run. */
1656 if (!reload_completed)
1659 func_type = arm_current_func_type ();
1661 /* Naked, volatile and stack alignment functions need special
1663 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1666 /* So do interrupt functions that use the frame pointer and Thumb
1667 interrupt functions. */
1668 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1671 offsets = arm_get_frame_offsets ();
1672 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1674 /* As do variadic functions. */
1675 if (current_function_pretend_args_size
1676 || cfun->machine->uses_anonymous_args
1677 /* Or if the function calls __builtin_eh_return () */
1678 || current_function_calls_eh_return
1679 /* Or if the function calls alloca */
1680 || current_function_calls_alloca
1681 /* Or if there is a stack adjustment. However, if the stack pointer
1682 is saved on the stack, we can use a pre-incrementing stack load. */
1683 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1686 saved_int_regs = arm_compute_save_reg_mask ();
1688 /* Unfortunately, the insn
1690 ldmib sp, {..., sp, ...}
1692 triggers a bug on most SA-110 based devices, such that the stack
1693 pointer won't be correctly restored if the instruction takes a
1694 page fault. We work around this problem by popping r3 along with
1695 the other registers, since that is never slower than executing
1696 another instruction.
1698 We test for !arm_arch5 here, because code for any architecture
1699 less than this could potentially be run on one of the buggy
1701 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1703 /* Validate that r3 is a call-clobbered register (always true in
1704 the default abi) ... */
1705 if (!call_used_regs[3])
1708 /* ... that it isn't being used for a return value ... */
1709 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1712 /* ... or for a tail-call argument ... */
1715 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1717 if (find_regno_fusage (sibling, USE, 3))
1721 /* ... and that there are no call-saved registers in r0-r2
1722 (always true in the default ABI). */
1723 if (saved_int_regs & 0x7)
1727 /* Can't be done if interworking with Thumb, and any registers have been
1729 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1732 /* On StrongARM, conditional returns are expensive if they aren't
1733 taken and multiple registers have been stacked. */
1734 if (iscond && arm_tune_strongarm)
1736 /* Conditional return when just the LR is stored is a simple
1737 conditional-load instruction, that's not expensive. */
1738 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1742 && arm_pic_register != INVALID_REGNUM
1743 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1747 /* If there are saved registers but the LR isn't saved, then we need
1748 two instructions for the return. */
1749 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1752 /* Can't be done if any of the FPA regs are pushed,
1753 since this also requires an insn. */
1754 if (TARGET_HARD_FLOAT && TARGET_FPA)
1755 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1756 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1759 /* Likewise VFP regs. */
1760 if (TARGET_HARD_FLOAT && TARGET_VFP)
1761 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1762 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1765 if (TARGET_REALLY_IWMMXT)
1766 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1767 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1773 /* Return TRUE if int I is a valid immediate ARM constant. */
1776 const_ok_for_arm (HOST_WIDE_INT i)
1780 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1781 be all zero, or all one. */
1782 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1783 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1784 != ((~(unsigned HOST_WIDE_INT) 0)
1785 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1788 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1790 /* Fast return for 0 and small values. We must do this for zero, since
1791 the code below can't handle that one case. */
1792 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1795 /* Get the number of trailing zeros. */
1796 lowbit = ffs((int) i) - 1;
1798 /* Only even shifts are allowed in ARM mode so round down to the
1799 nearest even number. */
1803 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1808 /* Allow rotated constants in ARM mode. */
1810 && ((i & ~0xc000003f) == 0
1811 || (i & ~0xf000000f) == 0
1812 || (i & ~0xfc000003) == 0))
1819 /* Allow repeated pattern. */
1822 if (i == v || i == (v | (v << 8)))
1829 /* Return true if I is a valid constant for the operation CODE. */
1831 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1833 if (const_ok_for_arm (i))
1839 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1841 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1847 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1854 /* Emit a sequence of insns to handle a large constant.
1855 CODE is the code of the operation required, it can be any of SET, PLUS,
1856 IOR, AND, XOR, MINUS;
1857 MODE is the mode in which the operation is being performed;
1858 VAL is the integer to operate on;
1859 SOURCE is the other operand (a register, or a null-pointer for SET);
1860 SUBTARGETS means it is safe to create scratch registers if that will
1861 either produce a simpler sequence, or we will want to cse the values.
1862 Return value is the number of insns emitted. */
1864 /* ??? Tweak this for thumb2. */
1866 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1867 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1871 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1872 cond = COND_EXEC_TEST (PATTERN (insn));
1876 if (subtargets || code == SET
1877 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1878 && REGNO (target) != REGNO (source)))
1880 /* After arm_reorg has been called, we can't fix up expensive
1881 constants by pushing them into memory so we must synthesize
1882 them in-line, regardless of the cost. This is only likely to
1883 be more costly on chips that have load delay slots and we are
1884 compiling without running the scheduler (so no splitting
1885 occurred before the final instruction emission).
1887 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1889 if (!after_arm_reorg
1891 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1893 > arm_constant_limit + (code != SET)))
1897 /* Currently SET is the only monadic value for CODE, all
1898 the rest are diadic. */
1899 emit_set_insn (target, GEN_INT (val));
1904 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1906 emit_set_insn (temp, GEN_INT (val));
1907 /* For MINUS, the value is subtracted from, since we never
1908 have subtraction of a constant. */
1910 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1912 emit_set_insn (target,
1913 gen_rtx_fmt_ee (code, mode, source, temp));
1919 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1923 /* Return the number of ARM instructions required to synthesize the given
1926 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1928 HOST_WIDE_INT temp1;
1936 if (remainder & (3 << (i - 2)))
1941 temp1 = remainder & ((0x0ff << end)
1942 | ((i < end) ? (0xff >> (32 - end)) : 0));
1943 remainder &= ~temp1;
1948 } while (remainder);
1952 /* Emit an instruction with the indicated PATTERN. If COND is
1953 non-NULL, conditionalize the execution of the instruction on COND
1957 emit_constant_insn (rtx cond, rtx pattern)
1960 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1961 emit_insn (pattern);
1964 /* As above, but extra parameter GENERATE which, if clear, suppresses
1966 /* ??? This needs more work for thumb2. */
1969 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1970 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1975 int can_negate_initial = 0;
1978 int num_bits_set = 0;
1979 int set_sign_bit_copies = 0;
1980 int clear_sign_bit_copies = 0;
1981 int clear_zero_bit_copies = 0;
1982 int set_zero_bit_copies = 0;
1984 unsigned HOST_WIDE_INT temp1, temp2;
1985 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1987 /* Find out which operations are safe for a given CODE. Also do a quick
1988 check for degenerate cases; these can occur when DImode operations
2000 can_negate_initial = 1;
2004 if (remainder == 0xffffffff)
2007 emit_constant_insn (cond,
2008 gen_rtx_SET (VOIDmode, target,
2009 GEN_INT (ARM_SIGN_EXTEND (val))));
2014 if (reload_completed && rtx_equal_p (target, source))
2017 emit_constant_insn (cond,
2018 gen_rtx_SET (VOIDmode, target, source));
2027 emit_constant_insn (cond,
2028 gen_rtx_SET (VOIDmode, target, const0_rtx));
2031 if (remainder == 0xffffffff)
2033 if (reload_completed && rtx_equal_p (target, source))
2036 emit_constant_insn (cond,
2037 gen_rtx_SET (VOIDmode, target, source));
2046 if (reload_completed && rtx_equal_p (target, source))
2049 emit_constant_insn (cond,
2050 gen_rtx_SET (VOIDmode, target, source));
2054 /* We don't know how to handle other cases yet. */
2055 gcc_assert (remainder == 0xffffffff);
2058 emit_constant_insn (cond,
2059 gen_rtx_SET (VOIDmode, target,
2060 gen_rtx_NOT (mode, source)));
2064 /* We treat MINUS as (val - source), since (source - val) is always
2065 passed as (source + (-val)). */
2069 emit_constant_insn (cond,
2070 gen_rtx_SET (VOIDmode, target,
2071 gen_rtx_NEG (mode, source)));
2074 if (const_ok_for_arm (val))
2077 emit_constant_insn (cond,
2078 gen_rtx_SET (VOIDmode, target,
2079 gen_rtx_MINUS (mode, GEN_INT (val),
2091 /* If we can do it in one insn get out quickly. */
2092 if (const_ok_for_arm (val)
2093 || (can_negate_initial && const_ok_for_arm (-val))
2094 || (can_invert && const_ok_for_arm (~val)))
2097 emit_constant_insn (cond,
2098 gen_rtx_SET (VOIDmode, target,
2100 ? gen_rtx_fmt_ee (code, mode, source,
2106 /* Calculate a few attributes that may be useful for specific
2108 for (i = 31; i >= 0; i--)
2110 if ((remainder & (1 << i)) == 0)
2111 clear_sign_bit_copies++;
2116 for (i = 31; i >= 0; i--)
2118 if ((remainder & (1 << i)) != 0)
2119 set_sign_bit_copies++;
2124 for (i = 0; i <= 31; i++)
2126 if ((remainder & (1 << i)) == 0)
2127 clear_zero_bit_copies++;
2132 for (i = 0; i <= 31; i++)
2134 if ((remainder & (1 << i)) != 0)
2135 set_zero_bit_copies++;
2143 /* See if we can use movw. */
2144 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2147 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2152 /* See if we can do this by sign_extending a constant that is known
2153 to be negative. This is a good, way of doing it, since the shift
2154 may well merge into a subsequent insn. */
2155 if (set_sign_bit_copies > 1)
2157 if (const_ok_for_arm
2158 (temp1 = ARM_SIGN_EXTEND (remainder
2159 << (set_sign_bit_copies - 1))))
2163 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2164 emit_constant_insn (cond,
2165 gen_rtx_SET (VOIDmode, new_src,
2167 emit_constant_insn (cond,
2168 gen_ashrsi3 (target, new_src,
2169 GEN_INT (set_sign_bit_copies - 1)));
2173 /* For an inverted constant, we will need to set the low bits,
2174 these will be shifted out of harm's way. */
2175 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2176 if (const_ok_for_arm (~temp1))
2180 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2181 emit_constant_insn (cond,
2182 gen_rtx_SET (VOIDmode, new_src,
2184 emit_constant_insn (cond,
2185 gen_ashrsi3 (target, new_src,
2186 GEN_INT (set_sign_bit_copies - 1)));
2192 /* See if we can calculate the value as the difference between two
2193 valid immediates. */
2194 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2196 int topshift = clear_sign_bit_copies & ~1;
2198 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2199 & (0xff000000 >> topshift));
2201 /* If temp1 is zero, then that means the 9 most significant
2202 bits of remainder were 1 and we've caused it to overflow.
2203 When topshift is 0 we don't need to do anything since we
2204 can borrow from 'bit 32'. */
2205 if (temp1 == 0 && topshift != 0)
2206 temp1 = 0x80000000 >> (topshift - 1);
2208 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2210 if (const_ok_for_arm (temp2))
2214 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2215 emit_constant_insn (cond,
2216 gen_rtx_SET (VOIDmode, new_src,
2218 emit_constant_insn (cond,
2219 gen_addsi3 (target, new_src,
2227 /* See if we can generate this by setting the bottom (or the top)
2228 16 bits, and then shifting these into the other half of the
2229 word. We only look for the simplest cases, to do more would cost
2230 too much. Be careful, however, not to generate this when the
2231 alternative would take fewer insns. */
2232 if (val & 0xffff0000)
2234 temp1 = remainder & 0xffff0000;
2235 temp2 = remainder & 0x0000ffff;
2237 /* Overlaps outside this range are best done using other methods. */
2238 for (i = 9; i < 24; i++)
2240 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2241 && !const_ok_for_arm (temp2))
2243 rtx new_src = (subtargets
2244 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2246 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2247 source, subtargets, generate);
2255 gen_rtx_ASHIFT (mode, source,
2262 /* Don't duplicate cases already considered. */
2263 for (i = 17; i < 24; i++)
2265 if (((temp1 | (temp1 >> i)) == remainder)
2266 && !const_ok_for_arm (temp1))
2268 rtx new_src = (subtargets
2269 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2271 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2272 source, subtargets, generate);
2277 gen_rtx_SET (VOIDmode, target,
2280 gen_rtx_LSHIFTRT (mode, source,
2291 /* If we have IOR or XOR, and the constant can be loaded in a
2292 single instruction, and we can find a temporary to put it in,
2293 then this can be done in two instructions instead of 3-4. */
2295 /* TARGET can't be NULL if SUBTARGETS is 0 */
2296 || (reload_completed && !reg_mentioned_p (target, source)))
2298 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2302 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2304 emit_constant_insn (cond,
2305 gen_rtx_SET (VOIDmode, sub,
2307 emit_constant_insn (cond,
2308 gen_rtx_SET (VOIDmode, target,
2309 gen_rtx_fmt_ee (code, mode,
2319 if (set_sign_bit_copies > 8
2320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2325 rtx shift = GEN_INT (set_sign_bit_copies);
2329 gen_rtx_SET (VOIDmode, sub,
2331 gen_rtx_ASHIFT (mode,
2336 gen_rtx_SET (VOIDmode, target,
2338 gen_rtx_LSHIFTRT (mode, sub,
2344 if (set_zero_bit_copies > 8
2345 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2349 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2350 rtx shift = GEN_INT (set_zero_bit_copies);
2354 gen_rtx_SET (VOIDmode, sub,
2356 gen_rtx_LSHIFTRT (mode,
2361 gen_rtx_SET (VOIDmode, target,
2363 gen_rtx_ASHIFT (mode, sub,
2369 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2373 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2374 emit_constant_insn (cond,
2375 gen_rtx_SET (VOIDmode, sub,
2376 gen_rtx_NOT (mode, source)));
2379 sub = gen_reg_rtx (mode);
2380 emit_constant_insn (cond,
2381 gen_rtx_SET (VOIDmode, sub,
2382 gen_rtx_AND (mode, source,
2384 emit_constant_insn (cond,
2385 gen_rtx_SET (VOIDmode, target,
2386 gen_rtx_NOT (mode, sub)));
2393 /* See if two shifts will do 2 or more insn's worth of work. */
2394 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2396 HOST_WIDE_INT shift_mask = ((0xffffffff
2397 << (32 - clear_sign_bit_copies))
2400 if ((remainder | shift_mask) != 0xffffffff)
2404 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2405 insns = arm_gen_constant (AND, mode, cond,
2406 remainder | shift_mask,
2407 new_src, source, subtargets, 1);
2412 rtx targ = subtargets ? NULL_RTX : target;
2413 insns = arm_gen_constant (AND, mode, cond,
2414 remainder | shift_mask,
2415 targ, source, subtargets, 0);
2421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2422 rtx shift = GEN_INT (clear_sign_bit_copies);
2424 emit_insn (gen_ashlsi3 (new_src, source, shift));
2425 emit_insn (gen_lshrsi3 (target, new_src, shift));
2431 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2433 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2435 if ((remainder | shift_mask) != 0xffffffff)
2439 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2441 insns = arm_gen_constant (AND, mode, cond,
2442 remainder | shift_mask,
2443 new_src, source, subtargets, 1);
2448 rtx targ = subtargets ? NULL_RTX : target;
2450 insns = arm_gen_constant (AND, mode, cond,
2451 remainder | shift_mask,
2452 targ, source, subtargets, 0);
2458 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2459 rtx shift = GEN_INT (clear_zero_bit_copies);
2461 emit_insn (gen_lshrsi3 (new_src, source, shift));
2462 emit_insn (gen_ashlsi3 (target, new_src, shift));
2474 for (i = 0; i < 32; i++)
2475 if (remainder & (1 << i))
2478 if (code == AND || (can_invert && num_bits_set > 16))
2479 remainder = (~remainder) & 0xffffffff;
2480 else if (code == PLUS && num_bits_set > 16)
2481 remainder = (-remainder) & 0xffffffff;
2488 /* Now try and find a way of doing the job in either two or three
2490 We start by looking for the largest block of zeros that are aligned on
2491 a 2-bit boundary, we then fill up the temps, wrapping around to the
2492 top of the word when we drop off the bottom.
2493 In the worst case this code should produce no more than four insns.
2494 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2495 best place to start. */
2497 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2503 int best_consecutive_zeros = 0;
2505 for (i = 0; i < 32; i += 2)
2507 int consecutive_zeros = 0;
2509 if (!(remainder & (3 << i)))
2511 while ((i < 32) && !(remainder & (3 << i)))
2513 consecutive_zeros += 2;
2516 if (consecutive_zeros > best_consecutive_zeros)
2518 best_consecutive_zeros = consecutive_zeros;
2519 best_start = i - consecutive_zeros;
2525 /* So long as it won't require any more insns to do so, it's
2526 desirable to emit a small constant (in bits 0...9) in the last
2527 insn. This way there is more chance that it can be combined with
2528 a later addressing insn to form a pre-indexed load or store
2529 operation. Consider:
2531 *((volatile int *)0xe0000100) = 1;
2532 *((volatile int *)0xe0000110) = 2;
2534 We want this to wind up as:
2538 str rB, [rA, #0x100]
2540 str rB, [rA, #0x110]
2542 rather than having to synthesize both large constants from scratch.
2544 Therefore, we calculate how many insns would be required to emit
2545 the constant starting from `best_start', and also starting from
2546 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2547 yield a shorter sequence, we may as well use zero. */
2549 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2550 && (count_insns_for_constant (remainder, 0) <=
2551 count_insns_for_constant (remainder, best_start)))
2555 /* Now start emitting the insns. */
2563 if (remainder & (3 << (i - 2)))
2568 temp1 = remainder & ((0x0ff << end)
2569 | ((i < end) ? (0xff >> (32 - end)) : 0));
2570 remainder &= ~temp1;
2574 rtx new_src, temp1_rtx;
2576 if (code == SET || code == MINUS)
2578 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2579 if (can_invert && code != MINUS)
2584 if (remainder && subtargets)
2585 new_src = gen_reg_rtx (mode);
2590 else if (can_negate)
2594 temp1 = trunc_int_for_mode (temp1, mode);
2595 temp1_rtx = GEN_INT (temp1);
2599 else if (code == MINUS)
2600 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2602 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2604 emit_constant_insn (cond,
2605 gen_rtx_SET (VOIDmode, new_src,
2615 else if (code == MINUS)
2624 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2637 /* Canonicalize a comparison so that we are more likely to recognize it.
2638 This can be done for a few constant compares, where we can make the
2639 immediate value easier to load. */
2642 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2645 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2646 unsigned HOST_WIDE_INT maxval;
2647 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2658 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2660 *op1 = GEN_INT (i + 1);
2661 return code == GT ? GE : LT;
2668 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2670 *op1 = GEN_INT (i - 1);
2671 return code == GE ? GT : LE;
2677 if (i != ~((unsigned HOST_WIDE_INT) 0)
2678 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2680 *op1 = GEN_INT (i + 1);
2681 return code == GTU ? GEU : LTU;
2688 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2690 *op1 = GEN_INT (i - 1);
2691 return code == GEU ? GTU : LEU;
2703 /* Define how to find the value returned by a function. */
2706 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2708 enum machine_mode mode;
2709 int unsignedp ATTRIBUTE_UNUSED;
2710 rtx r ATTRIBUTE_UNUSED;
2712 mode = TYPE_MODE (type);
2713 /* Promote integer types. */
2714 if (INTEGRAL_TYPE_P (type))
2715 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2717 /* Promotes small structs returned in a register to full-word size
2718 for big-endian AAPCS. */
2719 if (arm_return_in_msb (type))
2721 HOST_WIDE_INT size = int_size_in_bytes (type);
2722 if (size % UNITS_PER_WORD != 0)
2724 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2725 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2729 return LIBCALL_VALUE(mode);
2732 /* Determine the amount of memory needed to store the possible return
2733 registers of an untyped call. */
2735 arm_apply_result_size (void)
2741 if (TARGET_HARD_FLOAT_ABI)
2745 if (TARGET_MAVERICK)
2748 if (TARGET_IWMMXT_ABI)
2755 /* Decide whether a type should be returned in memory (true)
2756 or in a register (false). This is called by the macro
2757 RETURN_IN_MEMORY. */
2759 arm_return_in_memory (tree type)
2763 size = int_size_in_bytes (type);
2765 /* Vector values should be returned using ARM registers, not memory (unless
2766 they're over 16 bytes, which will break since we only have four
2767 call-clobbered registers to play with). */
2768 if (TREE_CODE (type) == VECTOR_TYPE)
2769 return (size < 0 || size > (4 * UNITS_PER_WORD));
2771 if (!AGGREGATE_TYPE_P (type) &&
2772 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2773 /* All simple types are returned in registers.
2774 For AAPCS, complex types are treated the same as aggregates. */
2777 if (arm_abi != ARM_ABI_APCS)
2779 /* ATPCS and later return aggregate types in memory only if they are
2780 larger than a word (or are variable size). */
2781 return (size < 0 || size > UNITS_PER_WORD);
2784 /* For the arm-wince targets we choose to be compatible with Microsoft's
2785 ARM and Thumb compilers, which always return aggregates in memory. */
2787 /* All structures/unions bigger than one word are returned in memory.
2788 Also catch the case where int_size_in_bytes returns -1. In this case
2789 the aggregate is either huge or of variable size, and in either case
2790 we will want to return it via memory and not in a register. */
2791 if (size < 0 || size > UNITS_PER_WORD)
2794 if (TREE_CODE (type) == RECORD_TYPE)
2798 /* For a struct the APCS says that we only return in a register
2799 if the type is 'integer like' and every addressable element
2800 has an offset of zero. For practical purposes this means
2801 that the structure can have at most one non bit-field element
2802 and that this element must be the first one in the structure. */
2804 /* Find the first field, ignoring non FIELD_DECL things which will
2805 have been created by C++. */
2806 for (field = TYPE_FIELDS (type);
2807 field && TREE_CODE (field) != FIELD_DECL;
2808 field = TREE_CHAIN (field))
2812 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2814 /* Check that the first field is valid for returning in a register. */
2816 /* ... Floats are not allowed */
2817 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2820 /* ... Aggregates that are not themselves valid for returning in
2821 a register are not allowed. */
2822 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2825 /* Now check the remaining fields, if any. Only bitfields are allowed,
2826 since they are not addressable. */
2827 for (field = TREE_CHAIN (field);
2829 field = TREE_CHAIN (field))
2831 if (TREE_CODE (field) != FIELD_DECL)
2834 if (!DECL_BIT_FIELD_TYPE (field))
2841 if (TREE_CODE (type) == UNION_TYPE)
2845 /* Unions can be returned in registers if every element is
2846 integral, or can be returned in an integer register. */
2847 for (field = TYPE_FIELDS (type);
2849 field = TREE_CHAIN (field))
2851 if (TREE_CODE (field) != FIELD_DECL)
2854 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2857 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2863 #endif /* not ARM_WINCE */
2865 /* Return all other types in memory. */
2869 /* Indicate whether or not words of a double are in big-endian order. */
2872 arm_float_words_big_endian (void)
2874 if (TARGET_MAVERICK)
2877 /* For FPA, float words are always big-endian. For VFP, floats words
2878 follow the memory system mode. */
2886 return (TARGET_BIG_END ? 1 : 0);
2891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2892 for a call to a function whose data type is FNTYPE.
2893 For a library call, FNTYPE is NULL. */
2895 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2896 rtx libname ATTRIBUTE_UNUSED,
2897 tree fndecl ATTRIBUTE_UNUSED)
2899 /* On the ARM, the offset starts at 0. */
2901 pcum->iwmmxt_nregs = 0;
2902 pcum->can_split = true;
2904 /* Varargs vectors are treated the same as long long.
2905 named_count avoids having to change the way arm handles 'named' */
2906 pcum->named_count = 0;
2909 if (TARGET_REALLY_IWMMXT && fntype)
2913 for (fn_arg = TYPE_ARG_TYPES (fntype);
2915 fn_arg = TREE_CHAIN (fn_arg))
2916 pcum->named_count += 1;
2918 if (! pcum->named_count)
2919 pcum->named_count = INT_MAX;
2924 /* Return true if mode/type need doubleword alignment. */
2926 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2928 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2929 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2933 /* Determine where to put an argument to a function.
2934 Value is zero to push the argument on the stack,
2935 or a hard register in which to store the argument.
2937 MODE is the argument's machine mode.
2938 TYPE is the data type of the argument (as a tree).
2939 This is null for libcalls where that information may
2941 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2942 the preceding args and about the function being called.
2943 NAMED is nonzero if this argument is a named parameter
2944 (otherwise it is an extra parameter matching an ellipsis). */
2947 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2948 tree type, int named)
2952 /* Varargs vectors are treated the same as long long.
2953 named_count avoids having to change the way arm handles 'named' */
2954 if (TARGET_IWMMXT_ABI
2955 && arm_vector_mode_supported_p (mode)
2956 && pcum->named_count > pcum->nargs + 1)
2958 if (pcum->iwmmxt_nregs <= 9)
2959 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2962 pcum->can_split = false;
2967 /* Put doubleword aligned quantities in even register pairs. */
2969 && ARM_DOUBLEWORD_ALIGN
2970 && arm_needs_doubleword_align (mode, type))
2973 if (mode == VOIDmode)
2974 /* Pick an arbitrary value for operand 2 of the call insn. */
2977 /* Only allow splitting an arg between regs and memory if all preceding
2978 args were allocated to regs. For args passed by reference we only count
2979 the reference pointer. */
2980 if (pcum->can_split)
2983 nregs = ARM_NUM_REGS2 (mode, type);
2985 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2988 return gen_rtx_REG (mode, pcum->nregs);
2992 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2993 tree type, bool named ATTRIBUTE_UNUSED)
2995 int nregs = pcum->nregs;
2997 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3000 if (NUM_ARG_REGS > nregs
3001 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3003 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3008 /* Variable sized types are passed by reference. This is a GCC
3009 extension to the ARM ABI. */
3012 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3013 enum machine_mode mode ATTRIBUTE_UNUSED,
3014 tree type, bool named ATTRIBUTE_UNUSED)
3016 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3019 /* Encode the current state of the #pragma [no_]long_calls. */
3022 OFF, /* No #pragma [no_]long_calls is in effect. */
3023 LONG, /* #pragma long_calls is in effect. */
3024 SHORT /* #pragma no_long_calls is in effect. */
3027 static arm_pragma_enum arm_pragma_long_calls = OFF;
3030 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3032 arm_pragma_long_calls = LONG;
3036 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3038 arm_pragma_long_calls = SHORT;
3042 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3044 arm_pragma_long_calls = OFF;
3047 /* Table of machine attributes. */
3048 const struct attribute_spec arm_attribute_table[] =
3050 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3051 /* Function calls made to this symbol must be done indirectly, because
3052 it may lie outside of the 26 bit addressing range of a normal function
3054 { "long_call", 0, 0, false, true, true, NULL },
3055 /* Whereas these functions are always known to reside within the 26 bit
3056 addressing range. */
3057 { "short_call", 0, 0, false, true, true, NULL },
3058 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3059 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3060 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3061 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3063 /* ARM/PE has three new attributes:
3065 dllexport - for exporting a function/variable that will live in a dll
3066 dllimport - for importing a function/variable from a dll
3068 Microsoft allows multiple declspecs in one __declspec, separating
3069 them with spaces. We do NOT support this. Instead, use __declspec
3072 { "dllimport", 0, 0, true, false, false, NULL },
3073 { "dllexport", 0, 0, true, false, false, NULL },
3074 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3075 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3076 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3077 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3078 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3080 { NULL, 0, 0, false, false, false, NULL }
3083 /* Handle an attribute requiring a FUNCTION_DECL;
3084 arguments as in struct attribute_spec.handler. */
3086 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3087 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3089 if (TREE_CODE (*node) != FUNCTION_DECL)
3091 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3092 IDENTIFIER_POINTER (name));
3093 *no_add_attrs = true;
3099 /* Handle an "interrupt" or "isr" attribute;
3100 arguments as in struct attribute_spec.handler. */
3102 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3107 if (TREE_CODE (*node) != FUNCTION_DECL)
3109 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3110 IDENTIFIER_POINTER (name));
3111 *no_add_attrs = true;
3113 /* FIXME: the argument if any is checked for type attributes;
3114 should it be checked for decl ones? */
3118 if (TREE_CODE (*node) == FUNCTION_TYPE
3119 || TREE_CODE (*node) == METHOD_TYPE)
3121 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3123 warning (OPT_Wattributes, "%qs attribute ignored",
3124 IDENTIFIER_POINTER (name));
3125 *no_add_attrs = true;
3128 else if (TREE_CODE (*node) == POINTER_TYPE
3129 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3130 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3131 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3133 *node = build_variant_type_copy (*node);
3134 TREE_TYPE (*node) = build_type_attribute_variant
3136 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3137 *no_add_attrs = true;
3141 /* Possibly pass this attribute on from the type to a decl. */
3142 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3143 | (int) ATTR_FLAG_FUNCTION_NEXT
3144 | (int) ATTR_FLAG_ARRAY_NEXT))
3146 *no_add_attrs = true;
3147 return tree_cons (name, args, NULL_TREE);
3151 warning (OPT_Wattributes, "%qs attribute ignored",
3152 IDENTIFIER_POINTER (name));
3160 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3161 /* Handle the "notshared" attribute. This attribute is another way of
3162 requesting hidden visibility. ARM's compiler supports
3163 "__declspec(notshared)"; we support the same thing via an
3167 arm_handle_notshared_attribute (tree *node,
3168 tree name ATTRIBUTE_UNUSED,
3169 tree args ATTRIBUTE_UNUSED,
3170 int flags ATTRIBUTE_UNUSED,
3173 tree decl = TYPE_NAME (*node);
3177 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3178 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3179 *no_add_attrs = false;
3185 /* Return 0 if the attributes for two types are incompatible, 1 if they
3186 are compatible, and 2 if they are nearly compatible (which causes a
3187 warning to be generated). */
3189 arm_comp_type_attributes (tree type1, tree type2)
3193 /* Check for mismatch of non-default calling convention. */
3194 if (TREE_CODE (type1) != FUNCTION_TYPE)
3197 /* Check for mismatched call attributes. */
3198 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3199 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3200 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3201 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3203 /* Only bother to check if an attribute is defined. */
3204 if (l1 | l2 | s1 | s2)
3206 /* If one type has an attribute, the other must have the same attribute. */
3207 if ((l1 != l2) || (s1 != s2))
3210 /* Disallow mixed attributes. */
3211 if ((l1 & s2) || (l2 & s1))
3215 /* Check for mismatched ISR attribute. */
3216 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3218 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3219 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3221 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3228 /* Assigns default attributes to newly defined type. This is used to
3229 set short_call/long_call attributes for function types of
3230 functions defined inside corresponding #pragma scopes. */
3232 arm_set_default_type_attributes (tree type)
3234 /* Add __attribute__ ((long_call)) to all functions, when
3235 inside #pragma long_calls or __attribute__ ((short_call)),
3236 when inside #pragma no_long_calls. */
3237 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3239 tree type_attr_list, attr_name;
3240 type_attr_list = TYPE_ATTRIBUTES (type);
3242 if (arm_pragma_long_calls == LONG)
3243 attr_name = get_identifier ("long_call");
3244 else if (arm_pragma_long_calls == SHORT)
3245 attr_name = get_identifier ("short_call");
3249 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3250 TYPE_ATTRIBUTES (type) = type_attr_list;
3254 /* Return true if DECL is known to be linked into section SECTION. */
3257 arm_function_in_section_p (tree decl, section *section)
3259 /* We can only be certain about functions defined in the same
3260 compilation unit. */
3261 if (!TREE_STATIC (decl))
3264 /* Make sure that SYMBOL always binds to the definition in this
3265 compilation unit. */
3266 if (!targetm.binds_local_p (decl))
3269 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3270 if (!DECL_SECTION_NAME (decl))
3272 /* Only cater for unit-at-a-time mode, where we know that the user
3273 cannot later specify a section for DECL. */
3274 if (!flag_unit_at_a_time)
3277 /* Make sure that we will not create a unique section for DECL. */
3278 if (flag_function_sections || DECL_ONE_ONLY (decl))
3282 return function_section (decl) == section;
3285 /* Return nonzero if a 32-bit "long_call" should be generated for
3286 a call from the current function to DECL. We generate a long_call
3289 a. has an __attribute__((long call))
3290 or b. is within the scope of a #pragma long_calls
3291 or c. the -mlong-calls command line switch has been specified
3293 However we do not generate a long call if the function:
3295 d. has an __attribute__ ((short_call))
3296 or e. is inside the scope of a #pragma no_long_calls
3297 or f. is defined in the same section as the current function. */
3300 arm_is_long_call_p (tree decl)
3305 return TARGET_LONG_CALLS;
3307 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3308 if (lookup_attribute ("short_call", attrs))
3311 /* For "f", be conservative, and only cater for cases in which the
3312 whole of the current function is placed in the same section. */
3313 if (!flag_reorder_blocks_and_partition
3314 && arm_function_in_section_p (decl, current_function_section ()))
3317 if (lookup_attribute ("long_call", attrs))
3320 return TARGET_LONG_CALLS;
3323 /* Return nonzero if it is ok to make a tail-call to DECL. */
3325 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3327 unsigned long func_type;
3329 if (cfun->machine->sibcall_blocked)
3332 /* Never tailcall something for which we have no decl, or if we
3333 are in Thumb mode. */
3334 if (decl == NULL || TARGET_THUMB)
3337 /* The PIC register is live on entry to VxWorks PLT entries, so we
3338 must make the call before restoring the PIC register. */
3339 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3342 /* Cannot tail-call to long calls, since these are out of range of
3343 a branch instruction. */
3344 if (arm_is_long_call_p (decl))
3347 /* If we are interworking and the function is not declared static
3348 then we can't tail-call it unless we know that it exists in this
3349 compilation unit (since it might be a Thumb routine). */
3350 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3353 func_type = arm_current_func_type ();
3354 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3355 if (IS_INTERRUPT (func_type))
3358 /* Never tailcall if function may be called with a misaligned SP. */
3359 if (IS_STACKALIGN (func_type))
3362 /* Everything else is ok. */
3367 /* Addressing mode support functions. */
3369 /* Return nonzero if X is a legitimate immediate operand when compiling
3370 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3372 legitimate_pic_operand_p (rtx x)
3374 if (GET_CODE (x) == SYMBOL_REF
3375 || (GET_CODE (x) == CONST
3376 && GET_CODE (XEXP (x, 0)) == PLUS
3377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3383 /* Record that the current function needs a PIC register. Initialize
3384 cfun->machine->pic_reg if we have not already done so. */
3387 require_pic_register (void)
3389 /* A lot of the logic here is made obscure by the fact that this
3390 routine gets called as part of the rtx cost estimation process.
3391 We don't want those calls to affect any assumptions about the real
3392 function; and further, we can't call entry_of_function() until we
3393 start the real expansion process. */
3394 if (!current_function_uses_pic_offset_table)
3396 gcc_assert (can_create_pseudo_p ());
3397 if (arm_pic_register != INVALID_REGNUM)
3399 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3401 /* Play games to avoid marking the function as needing pic
3402 if we are being called as part of the cost-estimation
3404 if (current_ir_type () != IR_GIMPLE)
3405 current_function_uses_pic_offset_table = 1;
3411 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3413 /* Play games to avoid marking the function as needing pic
3414 if we are being called as part of the cost-estimation
3416 if (current_ir_type () != IR_GIMPLE)
3418 current_function_uses_pic_offset_table = 1;
3421 arm_load_pic_register (0UL);
3425 emit_insn_after (seq, entry_of_function ());
3432 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3434 if (GET_CODE (orig) == SYMBOL_REF
3435 || GET_CODE (orig) == LABEL_REF)
3437 #ifndef AOF_ASSEMBLER
3438 rtx pic_ref, address;
3443 /* If this function doesn't have a pic register, create one now. */
3444 require_pic_register ();
3448 gcc_assert (can_create_pseudo_p ());
3449 reg = gen_reg_rtx (Pmode);
3454 #ifdef AOF_ASSEMBLER
3455 /* The AOF assembler can generate relocations for these directly, and
3456 understands that the PIC register has to be added into the offset. */
3457 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3460 address = gen_reg_rtx (Pmode);
3465 emit_insn (gen_pic_load_addr_arm (address, orig));
3466 else if (TARGET_THUMB2)
3467 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3468 else /* TARGET_THUMB1 */
3469 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3471 /* VxWorks does not impose a fixed gap between segments; the run-time
3472 gap can be different from the object-file gap. We therefore can't
3473 use GOTOFF unless we are absolutely sure that the symbol is in the
3474 same segment as the GOT. Unfortunately, the flexibility of linker
3475 scripts means that we can't be sure of that in general, so assume
3476 that GOTOFF is never valid on VxWorks. */
3477 if ((GET_CODE (orig) == LABEL_REF
3478 || (GET_CODE (orig) == SYMBOL_REF &&
3479 SYMBOL_REF_LOCAL_P (orig)))
3481 && !TARGET_VXWORKS_RTP)
3482 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3485 pic_ref = gen_const_mem (Pmode,
3486 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3490 insn = emit_move_insn (reg, pic_ref);
3492 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3494 set_unique_reg_note (insn, REG_EQUAL, orig);
3498 else if (GET_CODE (orig) == CONST)
3502 if (GET_CODE (XEXP (orig, 0)) == PLUS
3503 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3506 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3507 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3512 gcc_assert (can_create_pseudo_p ());
3513 reg = gen_reg_rtx (Pmode);
3516 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3518 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3519 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3520 base == reg ? 0 : reg);
3522 if (GET_CODE (offset) == CONST_INT)
3524 /* The base register doesn't really matter, we only want to
3525 test the index for the appropriate mode. */
3526 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3528 gcc_assert (can_create_pseudo_p ());
3529 offset = force_reg (Pmode, offset);
3532 if (GET_CODE (offset) == CONST_INT)
3533 return plus_constant (base, INTVAL (offset));
3536 if (GET_MODE_SIZE (mode) > 4
3537 && (GET_MODE_CLASS (mode) == MODE_INT
3538 || TARGET_SOFT_FLOAT))
3540 emit_insn (gen_addsi3 (reg, base, offset));
3544 return gen_rtx_PLUS (Pmode, base, offset);
3551 /* Find a spare register to use during the prolog of a function. */
3554 thumb_find_work_register (unsigned long pushed_regs_mask)
3558 /* Check the argument registers first as these are call-used. The
3559 register allocation order means that sometimes r3 might be used
3560 but earlier argument registers might not, so check them all. */
3561 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3562 if (!df_regs_ever_live_p (reg))
3565 /* Before going on to check the call-saved registers we can try a couple
3566 more ways of deducing that r3 is available. The first is when we are
3567 pushing anonymous arguments onto the stack and we have less than 4
3568 registers worth of fixed arguments(*). In this case r3 will be part of
3569 the variable argument list and so we can be sure that it will be
3570 pushed right at the start of the function. Hence it will be available
3571 for the rest of the prologue.
3572 (*): ie current_function_pretend_args_size is greater than 0. */
3573 if (cfun->machine->uses_anonymous_args
3574 && current_function_pretend_args_size > 0)
3575 return LAST_ARG_REGNUM;
3577 /* The other case is when we have fixed arguments but less than 4 registers
3578 worth. In this case r3 might be used in the body of the function, but
3579 it is not being used to convey an argument into the function. In theory
3580 we could just check current_function_args_size to see how many bytes are
3581 being passed in argument registers, but it seems that it is unreliable.
3582 Sometimes it will have the value 0 when in fact arguments are being
3583 passed. (See testcase execute/20021111-1.c for an example). So we also
3584 check the args_info.nregs field as well. The problem with this field is
3585 that it makes no allowances for arguments that are passed to the
3586 function but which are not used. Hence we could miss an opportunity
3587 when a function has an unused argument in r3. But it is better to be
3588 safe than to be sorry. */
3589 if (! cfun->machine->uses_anonymous_args
3590 && current_function_args_size >= 0
3591 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3592 && cfun->args_info.nregs < 4)
3593 return LAST_ARG_REGNUM;
3595 /* Otherwise look for a call-saved register that is going to be pushed. */
3596 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3597 if (pushed_regs_mask & (1 << reg))
3602 /* Thumb-2 can use high regs. */
3603 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3604 if (pushed_regs_mask & (1 << reg))
3607 /* Something went wrong - thumb_compute_save_reg_mask()
3608 should have arranged for a suitable register to be pushed. */
3612 static GTY(()) int pic_labelno;
3614 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3618 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3620 #ifndef AOF_ASSEMBLER
3621 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3622 rtx global_offset_table;
3624 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3627 gcc_assert (flag_pic);
3629 pic_reg = cfun->machine->pic_reg;
3630 if (TARGET_VXWORKS_RTP)
3632 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3633 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3634 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3636 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3638 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3639 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3643 /* We use an UNSPEC rather than a LABEL_REF because this label
3644 never appears in the code stream. */
3646 labelno = GEN_INT (pic_labelno++);
3647 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3648 l1 = gen_rtx_CONST (VOIDmode, l1);
3651 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3652 /* On the ARM the PC register contains 'dot + 8' at the time of the
3653 addition, on the Thumb it is 'dot + 4'. */
3654 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3657 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3658 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3661 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3663 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3664 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3668 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3669 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3671 else if (TARGET_THUMB2)
3673 /* Thumb-2 only allows very limited access to the PC. Calculate the
3674 address in a temporary register. */
3675 if (arm_pic_register != INVALID_REGNUM)
3677 pic_tmp = gen_rtx_REG (SImode,
3678 thumb_find_work_register (saved_regs));
3682 gcc_assert (can_create_pseudo_p ());
3683 pic_tmp = gen_reg_rtx (Pmode);
3686 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3687 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3688 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3690 else /* TARGET_THUMB1 */
3692 if (arm_pic_register != INVALID_REGNUM
3693 && REGNO (pic_reg) > LAST_LO_REGNUM)
3695 /* We will have pushed the pic register, so we should always be
3696 able to find a work register. */
3697 pic_tmp = gen_rtx_REG (SImode,
3698 thumb_find_work_register (saved_regs));
3699 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3700 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3703 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3704 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3708 /* Need to emit this whether or not we obey regdecls,
3709 since setjmp/longjmp can cause life info to screw up. */
3710 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3711 #endif /* AOF_ASSEMBLER */
3715 /* Return nonzero if X is valid as an ARM state addressing register. */
3717 arm_address_register_rtx_p (rtx x, int strict_p)
3721 if (GET_CODE (x) != REG)
3727 return ARM_REGNO_OK_FOR_BASE_P (regno);
3729 return (regno <= LAST_ARM_REGNUM
3730 || regno >= FIRST_PSEUDO_REGISTER
3731 || regno == FRAME_POINTER_REGNUM
3732 || regno == ARG_POINTER_REGNUM);
3735 /* Return TRUE if this rtx is the difference of a symbol and a label,
3736 and will reduce to a PC-relative relocation in the object file.
3737 Expressions like this can be left alone when generating PIC, rather
3738 than forced through the GOT. */
3740 pcrel_constant_p (rtx x)
3742 if (GET_CODE (x) == MINUS)
3743 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3748 /* Return nonzero if X is a valid ARM state address operand. */
3750 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3754 enum rtx_code code = GET_CODE (x);
3756 if (arm_address_register_rtx_p (x, strict_p))
3759 use_ldrd = (TARGET_LDRD
3761 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3763 if (code == POST_INC || code == PRE_DEC
3764 || ((code == PRE_INC || code == POST_DEC)
3765 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3766 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3768 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3769 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3770 && GET_CODE (XEXP (x, 1)) == PLUS
3771 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3773 rtx addend = XEXP (XEXP (x, 1), 1);
3775 /* Don't allow ldrd post increment by register because it's hard
3776 to fixup invalid register choices. */
3778 && GET_CODE (x) == POST_MODIFY
3779 && GET_CODE (addend) == REG)
3782 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3783 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3786 /* After reload constants split into minipools will have addresses
3787 from a LABEL_REF. */
3788 else if (reload_completed
3789 && (code == LABEL_REF
3791 && GET_CODE (XEXP (x, 0)) == PLUS
3792 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3793 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3796 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3799 else if (code == PLUS)
3801 rtx xop0 = XEXP (x, 0);
3802 rtx xop1 = XEXP (x, 1);
3804 return ((arm_address_register_rtx_p (xop0, strict_p)
3805 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3806 || (arm_address_register_rtx_p (xop1, strict_p)
3807 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3811 /* Reload currently can't handle MINUS, so disable this for now */
3812 else if (GET_CODE (x) == MINUS)
3814 rtx xop0 = XEXP (x, 0);
3815 rtx xop1 = XEXP (x, 1);
3817 return (arm_address_register_rtx_p (xop0, strict_p)
3818 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3822 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3823 && code == SYMBOL_REF
3824 && CONSTANT_POOL_ADDRESS_P (x)
3826 && symbol_mentioned_p (get_pool_constant (x))
3827 && ! pcrel_constant_p (get_pool_constant (x))))
3833 /* Return nonzero if X is a valid Thumb-2 address operand. */
3835 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3838 enum rtx_code code = GET_CODE (x);
3840 if (arm_address_register_rtx_p (x, strict_p))
3843 use_ldrd = (TARGET_LDRD
3845 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3847 if (code == POST_INC || code == PRE_DEC
3848 || ((code == PRE_INC || code == POST_DEC)
3849 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3850 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3852 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3853 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3854 && GET_CODE (XEXP (x, 1)) == PLUS
3855 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3857 /* Thumb-2 only has autoincrement by constant. */
3858 rtx addend = XEXP (XEXP (x, 1), 1);
3859 HOST_WIDE_INT offset;
3861 if (GET_CODE (addend) != CONST_INT)
3864 offset = INTVAL(addend);
3865 if (GET_MODE_SIZE (mode) <= 4)
3866 return (offset > -256 && offset < 256);
3868 return (use_ldrd && offset > -1024 && offset < 1024
3869 && (offset & 3) == 0);
3872 /* After reload constants split into minipools will have addresses
3873 from a LABEL_REF. */
3874 else if (reload_completed
3875 && (code == LABEL_REF
3877 && GET_CODE (XEXP (x, 0)) == PLUS
3878 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3879 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3882 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3885 else if (code == PLUS)
3887 rtx xop0 = XEXP (x, 0);
3888 rtx xop1 = XEXP (x, 1);
3890 return ((arm_address_register_rtx_p (xop0, strict_p)
3891 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3892 || (arm_address_register_rtx_p (xop1, strict_p)
3893 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3896 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3897 && code == SYMBOL_REF
3898 && CONSTANT_POOL_ADDRESS_P (x)
3900 && symbol_mentioned_p (get_pool_constant (x))
3901 && ! pcrel_constant_p (get_pool_constant (x))))
3907 /* Return nonzero if INDEX is valid for an address index operand in
3910 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3913 HOST_WIDE_INT range;
3914 enum rtx_code code = GET_CODE (index);
3916 /* Standard coprocessor addressing modes. */
3917 if (TARGET_HARD_FLOAT
3918 && (TARGET_FPA || TARGET_MAVERICK)
3919 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3920 || (TARGET_MAVERICK && mode == DImode)))
3921 return (code == CONST_INT && INTVAL (index) < 1024
3922 && INTVAL (index) > -1024
3923 && (INTVAL (index) & 3) == 0);
3926 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3927 return (code == CONST_INT
3928 && INTVAL (index) < 1016
3929 && INTVAL (index) > -1024
3930 && (INTVAL (index) & 3) == 0);
3932 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3933 return (code == CONST_INT
3934 && INTVAL (index) < 1024
3935 && INTVAL (index) > -1024
3936 && (INTVAL (index) & 3) == 0);
3938 if (arm_address_register_rtx_p (index, strict_p)
3939 && (GET_MODE_SIZE (mode) <= 4))
3942 if (mode == DImode || mode == DFmode)
3944 if (code == CONST_INT)
3946 HOST_WIDE_INT val = INTVAL (index);
3949 return val > -256 && val < 256;
3951 return val > -4096 && val < 4092;
3954 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3957 if (GET_MODE_SIZE (mode) <= 4
3960 || (mode == QImode && outer == SIGN_EXTEND))))
3964 rtx xiop0 = XEXP (index, 0);
3965 rtx xiop1 = XEXP (index, 1);
3967 return ((arm_address_register_rtx_p (xiop0, strict_p)
3968 && power_of_two_operand (xiop1, SImode))
3969 || (arm_address_register_rtx_p (xiop1, strict_p)
3970 && power_of_two_operand (xiop0, SImode)));
3972 else if (code == LSHIFTRT || code == ASHIFTRT
3973 || code == ASHIFT || code == ROTATERT)
3975 rtx op = XEXP (index, 1);
3977 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3978 && GET_CODE (op) == CONST_INT
3980 && INTVAL (op) <= 31);
3984 /* For ARM v4 we may be doing a sign-extend operation during the
3988 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3994 range = (mode == HImode) ? 4095 : 4096;
3996 return (code == CONST_INT
3997 && INTVAL (index) < range
3998 && INTVAL (index) > -range);
4001 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4002 index operand. i.e. 1, 2, 4 or 8. */
4004 thumb2_index_mul_operand (rtx op)
4008 if (GET_CODE(op) != CONST_INT)
4012 return (val == 1 || val == 2 || val == 4 || val == 8);
4015 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4017 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4019 enum rtx_code code = GET_CODE (index);
4021 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4022 /* Standard coprocessor addressing modes. */
4023 if (TARGET_HARD_FLOAT
4024 && (TARGET_FPA || TARGET_MAVERICK)
4025 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4026 || (TARGET_MAVERICK && mode == DImode)))
4027 return (code == CONST_INT && INTVAL (index) < 1024
4028 && INTVAL (index) > -1024
4029 && (INTVAL (index) & 3) == 0);
4031 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4033 /* For DImode assume values will usually live in core regs
4034 and only allow LDRD addressing modes. */
4035 if (!TARGET_LDRD || mode != DImode)
4036 return (code == CONST_INT
4037 && INTVAL (index) < 1024
4038 && INTVAL (index) > -1024
4039 && (INTVAL (index) & 3) == 0);
4043 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4044 return (code == CONST_INT
4045 && INTVAL (index) < 1016
4046 && INTVAL (index) > -1024
4047 && (INTVAL (index) & 3) == 0);
4049 if (arm_address_register_rtx_p (index, strict_p)
4050 && (GET_MODE_SIZE (mode) <= 4))
4053 if (mode == DImode || mode == DFmode)
4055 HOST_WIDE_INT val = INTVAL (index);
4056 /* ??? Can we assume ldrd for thumb2? */
4057 /* Thumb-2 ldrd only has reg+const addressing modes. */
4058 if (code != CONST_INT)
4061 /* ldrd supports offsets of +-1020.
4062 However the ldr fallback does not. */
4063 return val > -256 && val < 256 && (val & 3) == 0;
4068 rtx xiop0 = XEXP (index, 0);
4069 rtx xiop1 = XEXP (index, 1);
4071 return ((arm_address_register_rtx_p (xiop0, strict_p)
4072 && thumb2_index_mul_operand (xiop1))
4073 || (arm_address_register_rtx_p (xiop1, strict_p)
4074 && thumb2_index_mul_operand (xiop0)));
4076 else if (code == ASHIFT)
4078 rtx op = XEXP (index, 1);
4080 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4081 && GET_CODE (op) == CONST_INT
4083 && INTVAL (op) <= 3);
4086 return (code == CONST_INT
4087 && INTVAL (index) < 4096
4088 && INTVAL (index) > -256);
4091 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4093 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4097 if (GET_CODE (x) != REG)
4103 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4105 return (regno <= LAST_LO_REGNUM
4106 || regno > LAST_VIRTUAL_REGISTER
4107 || regno == FRAME_POINTER_REGNUM
4108 || (GET_MODE_SIZE (mode) >= 4
4109 && (regno == STACK_POINTER_REGNUM
4110 || regno >= FIRST_PSEUDO_REGISTER
4111 || x == hard_frame_pointer_rtx
4112 || x == arg_pointer_rtx)));
4115 /* Return nonzero if x is a legitimate index register. This is the case
4116 for any base register that can access a QImode object. */
4118 thumb1_index_register_rtx_p (rtx x, int strict_p)
4120 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4123 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4125 The AP may be eliminated to either the SP or the FP, so we use the
4126 least common denominator, e.g. SImode, and offsets from 0 to 64.
4128 ??? Verify whether the above is the right approach.
4130 ??? Also, the FP may be eliminated to the SP, so perhaps that
4131 needs special handling also.
4133 ??? Look at how the mips16 port solves this problem. It probably uses
4134 better ways to solve some of these problems.
4136 Although it is not incorrect, we don't accept QImode and HImode
4137 addresses based on the frame pointer or arg pointer until the
4138 reload pass starts. This is so that eliminating such addresses
4139 into stack based ones won't produce impossible code. */
4141 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4143 /* ??? Not clear if this is right. Experiment. */
4144 if (GET_MODE_SIZE (mode) < 4
4145 && !(reload_in_progress || reload_completed)
4146 && (reg_mentioned_p (frame_pointer_rtx, x)
4147 || reg_mentioned_p (arg_pointer_rtx, x)
4148 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4149 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4150 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4151 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4154 /* Accept any base register. SP only in SImode or larger. */
4155 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4158 /* This is PC relative data before arm_reorg runs. */
4159 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4160 && GET_CODE (x) == SYMBOL_REF
4161 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4164 /* This is PC relative data after arm_reorg runs. */
4165 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4166 && (GET_CODE (x) == LABEL_REF
4167 || (GET_CODE (x) == CONST
4168 && GET_CODE (XEXP (x, 0)) == PLUS
4169 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4170 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4173 /* Post-inc indexing only supported for SImode and larger. */
4174 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4175 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4178 else if (GET_CODE (x) == PLUS)
4180 /* REG+REG address can be any two index registers. */
4181 /* We disallow FRAME+REG addressing since we know that FRAME
4182 will be replaced with STACK, and SP relative addressing only
4183 permits SP+OFFSET. */
4184 if (GET_MODE_SIZE (mode) <= 4
4185 && XEXP (x, 0) != frame_pointer_rtx
4186 && XEXP (x, 1) != frame_pointer_rtx
4187 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4188 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4191 /* REG+const has 5-7 bit offset for non-SP registers. */
4192 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4193 || XEXP (x, 0) == arg_pointer_rtx)
4194 && GET_CODE (XEXP (x, 1)) == CONST_INT
4195 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4198 /* REG+const has 10-bit offset for SP, but only SImode and
4199 larger is supported. */
4200 /* ??? Should probably check for DI/DFmode overflow here
4201 just like GO_IF_LEGITIMATE_OFFSET does. */
4202 else if (GET_CODE (XEXP (x, 0)) == REG
4203 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4204 && GET_MODE_SIZE (mode) >= 4
4205 && GET_CODE (XEXP (x, 1)) == CONST_INT
4206 && INTVAL (XEXP (x, 1)) >= 0
4207 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4208 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4211 else if (GET_CODE (XEXP (x, 0)) == REG
4212 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4213 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4214 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4215 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4216 && GET_MODE_SIZE (mode) >= 4
4217 && GET_CODE (XEXP (x, 1)) == CONST_INT
4218 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4222 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4223 && GET_MODE_SIZE (mode) == 4
4224 && GET_CODE (x) == SYMBOL_REF
4225 && CONSTANT_POOL_ADDRESS_P (x)
4227 && symbol_mentioned_p (get_pool_constant (x))
4228 && ! pcrel_constant_p (get_pool_constant (x))))
4234 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4235 instruction of mode MODE. */
4237 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4239 switch (GET_MODE_SIZE (mode))
4242 return val >= 0 && val < 32;
4245 return val >= 0 && val < 64 && (val & 1) == 0;
4249 && (val + GET_MODE_SIZE (mode)) <= 128
4254 /* Build the SYMBOL_REF for __tls_get_addr. */
4256 static GTY(()) rtx tls_get_addr_libfunc;
4259 get_tls_get_addr (void)
4261 if (!tls_get_addr_libfunc)
4262 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4263 return tls_get_addr_libfunc;
4267 arm_load_tp (rtx target)
4270 target = gen_reg_rtx (SImode);
4274 /* Can return in any reg. */
4275 emit_insn (gen_load_tp_hard (target));
4279 /* Always returned in r0. Immediately copy the result into a pseudo,
4280 otherwise other uses of r0 (e.g. setting up function arguments) may
4281 clobber the value. */
4285 emit_insn (gen_load_tp_soft ());
4287 tmp = gen_rtx_REG (SImode, 0);
4288 emit_move_insn (target, tmp);
4294 load_tls_operand (rtx x, rtx reg)
4298 if (reg == NULL_RTX)
4299 reg = gen_reg_rtx (SImode);
4301 tmp = gen_rtx_CONST (SImode, x);
4303 emit_move_insn (reg, tmp);
4309 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4311 rtx insns, label, labelno, sum;
4315 labelno = GEN_INT (pic_labelno++);
4316 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4317 label = gen_rtx_CONST (VOIDmode, label);
4319 sum = gen_rtx_UNSPEC (Pmode,
4320 gen_rtvec (4, x, GEN_INT (reloc), label,
4321 GEN_INT (TARGET_ARM ? 8 : 4)),
4323 reg = load_tls_operand (sum, reg);
4326 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4327 else if (TARGET_THUMB2)
4330 /* Thumb-2 only allows very limited access to the PC. Calculate
4331 the address in a temporary register. */
4332 tmp = gen_reg_rtx (SImode);
4333 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4334 emit_insn (gen_addsi3(reg, reg, tmp));
4336 else /* TARGET_THUMB1 */
4337 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4339 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4340 Pmode, 1, reg, Pmode);
4342 insns = get_insns ();
4349 legitimize_tls_address (rtx x, rtx reg)
4351 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4352 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4356 case TLS_MODEL_GLOBAL_DYNAMIC:
4357 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4358 dest = gen_reg_rtx (Pmode);
4359 emit_libcall_block (insns, dest, ret, x);
4362 case TLS_MODEL_LOCAL_DYNAMIC:
4363 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4365 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4366 share the LDM result with other LD model accesses. */
4367 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4369 dest = gen_reg_rtx (Pmode);
4370 emit_libcall_block (insns, dest, ret, eqv);
4372 /* Load the addend. */
4373 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4375 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4376 return gen_rtx_PLUS (Pmode, dest, addend);
4378 case TLS_MODEL_INITIAL_EXEC:
4379 labelno = GEN_INT (pic_labelno++);
4380 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4381 label = gen_rtx_CONST (VOIDmode, label);
4382 sum = gen_rtx_UNSPEC (Pmode,
4383 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4384 GEN_INT (TARGET_ARM ? 8 : 4)),
4386 reg = load_tls_operand (sum, reg);
4389 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4390 else if (TARGET_THUMB2)
4393 /* Thumb-2 only allows very limited access to the PC. Calculate
4394 the address in a temporary register. */
4395 tmp = gen_reg_rtx (SImode);
4396 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4397 emit_insn (gen_addsi3(reg, reg, tmp));
4398 emit_move_insn (reg, gen_const_mem (SImode, reg));
4402 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4403 emit_move_insn (reg, gen_const_mem (SImode, reg));
4406 tp = arm_load_tp (NULL_RTX);
4408 return gen_rtx_PLUS (Pmode, tp, reg);
4410 case TLS_MODEL_LOCAL_EXEC:
4411 tp = arm_load_tp (NULL_RTX);
4413 reg = gen_rtx_UNSPEC (Pmode,
4414 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4416 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4418 return gen_rtx_PLUS (Pmode, tp, reg);
4425 /* Try machine-dependent ways of modifying an illegitimate address
4426 to be legitimate. If we find one, return the new, valid address. */
4428 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4430 if (arm_tls_symbol_p (x))
4431 return legitimize_tls_address (x, NULL_RTX);
4433 if (GET_CODE (x) == PLUS)
4435 rtx xop0 = XEXP (x, 0);
4436 rtx xop1 = XEXP (x, 1);
4438 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4439 xop0 = force_reg (SImode, xop0);
4441 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4442 xop1 = force_reg (SImode, xop1);
4444 if (ARM_BASE_REGISTER_RTX_P (xop0)
4445 && GET_CODE (xop1) == CONST_INT)
4447 HOST_WIDE_INT n, low_n;
4451 /* VFP addressing modes actually allow greater offsets, but for
4452 now we just stick with the lowest common denominator. */
4454 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4466 low_n = ((mode) == TImode ? 0
4467 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4471 base_reg = gen_reg_rtx (SImode);
4472 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4473 emit_move_insn (base_reg, val);
4474 x = plus_constant (base_reg, low_n);
4476 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4477 x = gen_rtx_PLUS (SImode, xop0, xop1);
4480 /* XXX We don't allow MINUS any more -- see comment in
4481 arm_legitimate_address_p (). */
4482 else if (GET_CODE (x) == MINUS)
4484 rtx xop0 = XEXP (x, 0);
4485 rtx xop1 = XEXP (x, 1);
4487 if (CONSTANT_P (xop0))
4488 xop0 = force_reg (SImode, xop0);
4490 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4491 xop1 = force_reg (SImode, xop1);
4493 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4494 x = gen_rtx_MINUS (SImode, xop0, xop1);
4497 /* Make sure to take full advantage of the pre-indexed addressing mode
4498 with absolute addresses which often allows for the base register to
4499 be factorized for multiple adjacent memory references, and it might
4500 even allows for the mini pool to be avoided entirely. */
4501 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4504 HOST_WIDE_INT mask, base, index;
4507 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4508 use a 8-bit index. So let's use a 12-bit index for SImode only and
4509 hope that arm_gen_constant will enable ldrb to use more bits. */
4510 bits = (mode == SImode) ? 12 : 8;
4511 mask = (1 << bits) - 1;
4512 base = INTVAL (x) & ~mask;
4513 index = INTVAL (x) & mask;
4514 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4516 /* It'll most probably be more efficient to generate the base
4517 with more bits set and use a negative index instead. */
4521 base_reg = force_reg (SImode, GEN_INT (base));
4522 x = plus_constant (base_reg, index);
4527 /* We need to find and carefully transform any SYMBOL and LABEL
4528 references; so go back to the original address expression. */
4529 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4531 if (new_x != orig_x)
4539 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4540 to be legitimate. If we find one, return the new, valid address. */
4542 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4544 if (arm_tls_symbol_p (x))
4545 return legitimize_tls_address (x, NULL_RTX);
4547 if (GET_CODE (x) == PLUS
4548 && GET_CODE (XEXP (x, 1)) == CONST_INT
4549 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4550 || INTVAL (XEXP (x, 1)) < 0))
4552 rtx xop0 = XEXP (x, 0);
4553 rtx xop1 = XEXP (x, 1);
4554 HOST_WIDE_INT offset = INTVAL (xop1);
4556 /* Try and fold the offset into a biasing of the base register and
4557 then offsetting that. Don't do this when optimizing for space
4558 since it can cause too many CSEs. */
4559 if (optimize_size && offset >= 0
4560 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4562 HOST_WIDE_INT delta;
4565 delta = offset - (256 - GET_MODE_SIZE (mode));
4566 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4567 delta = 31 * GET_MODE_SIZE (mode);
4569 delta = offset & (~31 * GET_MODE_SIZE (mode));
4571 xop0 = force_operand (plus_constant (xop0, offset - delta),
4573 x = plus_constant (xop0, delta);
4575 else if (offset < 0 && offset > -256)
4576 /* Small negative offsets are best done with a subtract before the
4577 dereference, forcing these into a register normally takes two
4579 x = force_operand (x, NULL_RTX);
4582 /* For the remaining cases, force the constant into a register. */
4583 xop1 = force_reg (SImode, xop1);
4584 x = gen_rtx_PLUS (SImode, xop0, xop1);
4587 else if (GET_CODE (x) == PLUS
4588 && s_register_operand (XEXP (x, 1), SImode)
4589 && !s_register_operand (XEXP (x, 0), SImode))
4591 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4593 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4598 /* We need to find and carefully transform any SYMBOL and LABEL
4599 references; so go back to the original address expression. */
4600 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4602 if (new_x != orig_x)
4610 thumb_legitimize_reload_address (rtx *x_p,
4611 enum machine_mode mode,
4612 int opnum, int type,
4613 int ind_levels ATTRIBUTE_UNUSED)
4617 if (GET_CODE (x) == PLUS
4618 && GET_MODE_SIZE (mode) < 4
4619 && REG_P (XEXP (x, 0))
4620 && XEXP (x, 0) == stack_pointer_rtx
4621 && GET_CODE (XEXP (x, 1)) == CONST_INT
4622 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4627 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4628 Pmode, VOIDmode, 0, 0, opnum, type);
4632 /* If both registers are hi-regs, then it's better to reload the
4633 entire expression rather than each register individually. That
4634 only requires one reload register rather than two. */
4635 if (GET_CODE (x) == PLUS
4636 && REG_P (XEXP (x, 0))
4637 && REG_P (XEXP (x, 1))
4638 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4639 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4644 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4645 Pmode, VOIDmode, 0, 0, opnum, type);
4652 /* Test for various thread-local symbols. */
4654 /* Return TRUE if X is a thread-local symbol. */
4657 arm_tls_symbol_p (rtx x)
4659 if (! TARGET_HAVE_TLS)
4662 if (GET_CODE (x) != SYMBOL_REF)
4665 return SYMBOL_REF_TLS_MODEL (x) != 0;
4668 /* Helper for arm_tls_referenced_p. */
4671 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4673 if (GET_CODE (*x) == SYMBOL_REF)
4674 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4676 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4677 TLS offsets, not real symbol references. */
4678 if (GET_CODE (*x) == UNSPEC
4679 && XINT (*x, 1) == UNSPEC_TLS)
4685 /* Return TRUE if X contains any TLS symbol references. */
4688 arm_tls_referenced_p (rtx x)
4690 if (! TARGET_HAVE_TLS)
4693 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4696 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4699 arm_cannot_force_const_mem (rtx x)
4703 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4705 split_const (x, &base, &offset);
4706 if (GET_CODE (base) == SYMBOL_REF
4707 && !offset_within_block_p (base, INTVAL (offset)))
4710 return arm_tls_referenced_p (x);
4713 #define REG_OR_SUBREG_REG(X) \
4714 (GET_CODE (X) == REG \
4715 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4717 #define REG_OR_SUBREG_RTX(X) \
4718 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4720 #ifndef COSTS_N_INSNS
4721 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4724 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4726 enum machine_mode mode = GET_MODE (x);
4739 return COSTS_N_INSNS (1);
4742 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4745 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4752 return COSTS_N_INSNS (2) + cycles;
4754 return COSTS_N_INSNS (1) + 16;
4757 return (COSTS_N_INSNS (1)
4758 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4759 + GET_CODE (SET_DEST (x)) == MEM));
4764 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4766 if (thumb_shiftable_const (INTVAL (x)))
4767 return COSTS_N_INSNS (2);
4768 return COSTS_N_INSNS (3);
4770 else if ((outer == PLUS || outer == COMPARE)
4771 && INTVAL (x) < 256 && INTVAL (x) > -256)
4773 else if (outer == AND
4774 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4775 return COSTS_N_INSNS (1);
4776 else if (outer == ASHIFT || outer == ASHIFTRT
4777 || outer == LSHIFTRT)
4779 return COSTS_N_INSNS (2);
4785 return COSTS_N_INSNS (3);
4803 /* XXX another guess. */
4804 /* Memory costs quite a lot for the first word, but subsequent words
4805 load at the equivalent of a single insn each. */
4806 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4807 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4812 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4817 /* XXX still guessing. */
4818 switch (GET_MODE (XEXP (x, 0)))
4821 return (1 + (mode == DImode ? 4 : 0)
4822 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4825 return (4 + (mode == DImode ? 4 : 0)
4826 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4829 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4841 /* Worker routine for arm_rtx_costs. */
4842 /* ??? This needs updating for thumb2. */
4844 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4846 enum machine_mode mode = GET_MODE (x);
4847 enum rtx_code subcode;
4853 /* Memory costs quite a lot for the first word, but subsequent words
4854 load at the equivalent of a single insn each. */
4855 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4856 + (GET_CODE (x) == SYMBOL_REF
4857 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4863 return optimize_size ? COSTS_N_INSNS (2) : 100;
4866 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4873 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4875 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4876 + ((GET_CODE (XEXP (x, 0)) == REG
4877 || (GET_CODE (XEXP (x, 0)) == SUBREG
4878 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4880 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4881 || (GET_CODE (XEXP (x, 0)) == SUBREG
4882 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4884 + ((GET_CODE (XEXP (x, 1)) == REG
4885 || (GET_CODE (XEXP (x, 1)) == SUBREG
4886 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4887 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4891 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4893 extra_cost = rtx_cost (XEXP (x, 1), code);
4894 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4895 extra_cost += 4 * ARM_NUM_REGS (mode);
4900 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4901 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4902 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4903 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4906 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4907 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4908 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4909 && arm_const_double_rtx (XEXP (x, 1))))
4911 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4912 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4913 && arm_const_double_rtx (XEXP (x, 0))))
4916 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4917 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4918 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4919 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4920 || subcode == ASHIFTRT || subcode == LSHIFTRT
4921 || subcode == ROTATE || subcode == ROTATERT
4923 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4924 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4925 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4926 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4927 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4928 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4929 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4934 if (GET_CODE (XEXP (x, 0)) == MULT)
4936 extra_cost = rtx_cost (XEXP (x, 0), code);
4937 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4938 extra_cost += 4 * ARM_NUM_REGS (mode);
4942 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4943 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4944 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4945 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4946 && arm_const_double_rtx (XEXP (x, 1))))
4950 case AND: case XOR: case IOR:
4953 /* Normally the frame registers will be spilt into reg+const during
4954 reload, so it is a bad idea to combine them with other instructions,
4955 since then they might not be moved outside of loops. As a compromise
4956 we allow integration with ops that have a constant as their second
4958 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4959 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4960 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4961 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4962 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4966 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4967 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4968 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4969 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4972 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4973 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4974 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4975 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4976 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4979 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4980 return (1 + extra_cost
4981 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4982 || subcode == LSHIFTRT || subcode == ASHIFTRT
4983 || subcode == ROTATE || subcode == ROTATERT
4985 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4986 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4987 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4988 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4989 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4990 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4996 /* This should have been handled by the CPU specific routines. */
5000 if (arm_arch3m && mode == SImode
5001 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5002 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5003 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5004 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5005 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5006 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5011 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5012 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5016 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5018 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5021 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5029 return 4 + (mode == DImode ? 4 : 0);
5032 /* ??? value extensions are cheaper on armv6. */
5033 if (GET_MODE (XEXP (x, 0)) == QImode)
5034 return (4 + (mode == DImode ? 4 : 0)
5035 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5038 switch (GET_MODE (XEXP (x, 0)))
5041 return (1 + (mode == DImode ? 4 : 0)
5042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5045 return (4 + (mode == DImode ? 4 : 0)
5046 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5049 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5064 if (const_ok_for_arm (INTVAL (x)))
5065 return outer == SET ? 2 : -1;
5066 else if (outer == AND
5067 && const_ok_for_arm (~INTVAL (x)))
5069 else if ((outer == COMPARE
5070 || outer == PLUS || outer == MINUS)
5071 && const_ok_for_arm (-INTVAL (x)))
5082 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5083 return outer == SET ? 2 : -1;
5084 else if ((outer == COMPARE || outer == PLUS)
5085 && neg_const_double_rtx_ok_for_fpa (x))
5094 /* RTX costs when optimizing for size. */
5096 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5098 enum machine_mode mode = GET_MODE (x);
5102 /* XXX TBD. For now, use the standard costs. */
5103 *total = thumb1_rtx_costs (x, code, outer_code);
5110 /* A memory access costs 1 insn if the mode is small, or the address is
5111 a single register, otherwise it costs one insn per word. */
5112 if (REG_P (XEXP (x, 0)))
5113 *total = COSTS_N_INSNS (1);
5115 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5122 /* Needs a libcall, so it costs about this. */
5123 *total = COSTS_N_INSNS (2);
5127 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5129 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5137 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5139 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5142 else if (mode == SImode)
5144 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5145 /* Slightly disparage register shifts, but not by much. */
5146 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5147 *total += 1 + rtx_cost (XEXP (x, 1), code);
5151 /* Needs a libcall. */
5152 *total = COSTS_N_INSNS (2);
5156 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5158 *total = COSTS_N_INSNS (1);
5164 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5165 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5167 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5168 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5169 || subcode1 == ROTATE || subcode1 == ROTATERT
5170 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5171 || subcode1 == ASHIFTRT)
5173 /* It's just the cost of the two operands. */
5178 *total = COSTS_N_INSNS (1);
5182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5186 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5188 *total = COSTS_N_INSNS (1);
5193 case AND: case XOR: case IOR:
5196 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5198 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5199 || subcode == LSHIFTRT || subcode == ASHIFTRT
5200 || (code == AND && subcode == NOT))
5202 /* It's just the cost of the two operands. */
5208 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5212 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5216 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5217 *total = COSTS_N_INSNS (1);
5220 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5229 if (cc_register (XEXP (x, 0), VOIDmode))
5232 *total = COSTS_N_INSNS (1);
5236 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5237 *total = COSTS_N_INSNS (1);
5239 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5244 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5246 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5247 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5250 *total += COSTS_N_INSNS (1);
5255 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5257 switch (GET_MODE (XEXP (x, 0)))
5260 *total += COSTS_N_INSNS (1);
5264 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5270 *total += COSTS_N_INSNS (2);
5275 *total += COSTS_N_INSNS (1);
5280 if (const_ok_for_arm (INTVAL (x)))
5281 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5282 else if (const_ok_for_arm (~INTVAL (x)))
5283 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5284 else if (const_ok_for_arm (-INTVAL (x)))
5286 if (outer_code == COMPARE || outer_code == PLUS
5287 || outer_code == MINUS)
5290 *total = COSTS_N_INSNS (1);
5293 *total = COSTS_N_INSNS (2);
5299 *total = COSTS_N_INSNS (2);
5303 *total = COSTS_N_INSNS (4);
5307 if (mode != VOIDmode)
5308 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5310 *total = COSTS_N_INSNS (4); /* How knows? */
5315 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5316 supported on any "slowmul" cores, so it can be ignored. */
5319 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5321 enum machine_mode mode = GET_MODE (x);
5325 *total = thumb1_rtx_costs (x, code, outer_code);
5332 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5339 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5341 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5342 & (unsigned HOST_WIDE_INT) 0xffffffff);
5343 int cost, const_ok = const_ok_for_arm (i);
5344 int j, booth_unit_size;
5346 /* Tune as appropriate. */
5347 cost = const_ok ? 4 : 8;
5348 booth_unit_size = 2;
5349 for (j = 0; i && j < 32; j += booth_unit_size)
5351 i >>= booth_unit_size;
5359 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5360 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5364 *total = arm_rtx_costs_1 (x, code, outer_code);
5370 /* RTX cost for cores with a fast multiply unit (M variants). */
5373 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5375 enum machine_mode mode = GET_MODE (x);
5379 *total = thumb1_rtx_costs (x, code, outer_code);
5383 /* ??? should thumb2 use different costs? */
5387 /* There is no point basing this on the tuning, since it is always the
5388 fast variant if it exists at all. */
5390 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5391 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5392 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5399 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5406 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5408 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5409 & (unsigned HOST_WIDE_INT) 0xffffffff);
5410 int cost, const_ok = const_ok_for_arm (i);
5411 int j, booth_unit_size;
5413 /* Tune as appropriate. */
5414 cost = const_ok ? 4 : 8;
5415 booth_unit_size = 8;
5416 for (j = 0; i && j < 32; j += booth_unit_size)
5418 i >>= booth_unit_size;
5426 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5427 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5431 *total = arm_rtx_costs_1 (x, code, outer_code);
5437 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5438 so it can be ignored. */
5441 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5443 enum machine_mode mode = GET_MODE (x);
5447 *total = thumb1_rtx_costs (x, code, outer_code);
5454 /* There is no point basing this on the tuning, since it is always the
5455 fast variant if it exists at all. */
5457 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5458 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5459 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5466 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5473 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5475 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5476 & (unsigned HOST_WIDE_INT) 0xffffffff);
5477 int cost, const_ok = const_ok_for_arm (i);
5478 unsigned HOST_WIDE_INT masked_const;
5480 /* The cost will be related to two insns.
5481 First a load of the constant (MOV or LDR), then a multiply. */
5484 cost += 1; /* LDR is probably more expensive because
5485 of longer result latency. */
5486 masked_const = i & 0xffff8000;
5487 if (masked_const != 0 && masked_const != 0xffff8000)
5489 masked_const = i & 0xf8000000;
5490 if (masked_const == 0 || masked_const == 0xf8000000)
5499 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5500 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5504 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5505 will stall until the multiplication is complete. */
5506 if (GET_CODE (XEXP (x, 0)) == MULT)
5507 *total = 4 + rtx_cost (XEXP (x, 0), code);
5509 *total = arm_rtx_costs_1 (x, code, outer_code);
5513 *total = arm_rtx_costs_1 (x, code, outer_code);
5519 /* RTX costs for 9e (and later) cores. */
5522 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5524 enum machine_mode mode = GET_MODE (x);
5533 *total = COSTS_N_INSNS (3);
5537 *total = thumb1_rtx_costs (x, code, outer_code);
5545 /* There is no point basing this on the tuning, since it is always the
5546 fast variant if it exists at all. */
5548 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5549 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5550 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5574 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5575 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5579 *total = arm_rtx_costs_1 (x, code, outer_code);
5583 /* All address computations that can be done are free, but rtx cost returns
5584 the same for practically all of them. So we weight the different types
5585 of address here in the order (most pref first):
5586 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5588 arm_arm_address_cost (rtx x)
5590 enum rtx_code c = GET_CODE (x);
5592 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5594 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5597 if (c == PLUS || c == MINUS)
5599 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5602 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5612 arm_thumb_address_cost (rtx x)
5614 enum rtx_code c = GET_CODE (x);
5619 && GET_CODE (XEXP (x, 0)) == REG
5620 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5627 arm_address_cost (rtx x)
5629 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5633 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5637 /* Some true dependencies can have a higher cost depending
5638 on precisely how certain input operands are used. */
5640 && REG_NOTE_KIND (link) == 0
5641 && recog_memoized (insn) >= 0
5642 && recog_memoized (dep) >= 0)
5644 int shift_opnum = get_attr_shift (insn);
5645 enum attr_type attr_type = get_attr_type (dep);
5647 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5648 operand for INSN. If we have a shifted input operand and the
5649 instruction we depend on is another ALU instruction, then we may
5650 have to account for an additional stall. */
5651 if (shift_opnum != 0
5652 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5654 rtx shifted_operand;
5657 /* Get the shifted operand. */
5658 extract_insn (insn);
5659 shifted_operand = recog_data.operand[shift_opnum];
5661 /* Iterate over all the operands in DEP. If we write an operand
5662 that overlaps with SHIFTED_OPERAND, then we have increase the
5663 cost of this dependency. */
5665 preprocess_constraints ();
5666 for (opno = 0; opno < recog_data.n_operands; opno++)
5668 /* We can ignore strict inputs. */
5669 if (recog_data.operand_type[opno] == OP_IN)
5672 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5679 /* XXX This is not strictly true for the FPA. */
5680 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5681 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5684 /* Call insns don't incur a stall, even if they follow a load. */
5685 if (REG_NOTE_KIND (link) == 0
5686 && GET_CODE (insn) == CALL_INSN)
5689 if ((i_pat = single_set (insn)) != NULL
5690 && GET_CODE (SET_SRC (i_pat)) == MEM
5691 && (d_pat = single_set (dep)) != NULL
5692 && GET_CODE (SET_DEST (d_pat)) == MEM)
5694 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5695 /* This is a load after a store, there is no conflict if the load reads
5696 from a cached area. Assume that loads from the stack, and from the
5697 constant pool are cached, and that others will miss. This is a
5700 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5701 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5702 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5703 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5710 static int fp_consts_inited = 0;
5712 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5713 static const char * const strings_fp[8] =
5716 "4", "5", "0.5", "10"
5719 static REAL_VALUE_TYPE values_fp[8];
5722 init_fp_table (void)
5728 fp_consts_inited = 1;
5730 fp_consts_inited = 8;
5732 for (i = 0; i < fp_consts_inited; i++)
5734 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5739 /* Return TRUE if rtx X is a valid immediate FP constant. */
5741 arm_const_double_rtx (rtx x)
5746 if (!fp_consts_inited)
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5750 if (REAL_VALUE_MINUS_ZERO (r))
5753 for (i = 0; i < fp_consts_inited; i++)
5754 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5760 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5762 neg_const_double_rtx_ok_for_fpa (rtx x)
5767 if (!fp_consts_inited)
5770 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5771 r = REAL_VALUE_NEGATE (r);
5772 if (REAL_VALUE_MINUS_ZERO (r))
5775 for (i = 0; i < 8; i++)
5776 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5783 /* VFPv3 has a fairly wide range of representable immediates, formed from
5784 "quarter-precision" floating-point values. These can be evaluated using this
5785 formula (with ^ for exponentiation):
5789 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5790 16 <= n <= 31 and 0 <= r <= 7.
5792 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5794 - A (most-significant) is the sign bit.
5795 - BCD are the exponent (encoded as r XOR 3).
5796 - EFGH are the mantissa (encoded as n - 16).
5799 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5800 fconst[sd] instruction, or -1 if X isn't suitable. */
5802 vfp3_const_double_index (rtx x)
5804 REAL_VALUE_TYPE r, m;
5806 unsigned HOST_WIDE_INT mantissa, mant_hi;
5807 unsigned HOST_WIDE_INT mask;
5808 HOST_WIDE_INT m1, m2;
5809 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5811 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5814 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5816 /* We can't represent these things, so detect them first. */
5817 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5820 /* Extract sign, exponent and mantissa. */
5821 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5822 r = REAL_VALUE_ABS (r);
5823 exponent = REAL_EXP (&r);
5824 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5825 highest (sign) bit, with a fixed binary point at bit point_pos.
5826 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5827 bits for the mantissa, this may fail (low bits would be lost). */
5828 real_ldexp (&m, &r, point_pos - exponent);
5829 REAL_VALUE_TO_INT (&m1, &m2, m);
5833 /* If there are bits set in the low part of the mantissa, we can't
5834 represent this value. */
5838 /* Now make it so that mantissa contains the most-significant bits, and move
5839 the point_pos to indicate that the least-significant bits have been
5841 point_pos -= HOST_BITS_PER_WIDE_INT;
5844 /* We can permit four significant bits of mantissa only, plus a high bit
5845 which is always 1. */
5846 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5847 if ((mantissa & mask) != 0)
5850 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5851 mantissa >>= point_pos - 5;
5853 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5854 floating-point immediate zero with Neon using an integer-zero load, but
5855 that case is handled elsewhere.) */
5859 gcc_assert (mantissa >= 16 && mantissa <= 31);
5861 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5862 normalized significands are in the range [1, 2). (Our mantissa is shifted
5863 left 4 places at this point relative to normalized IEEE754 values). GCC
5864 internally uses [0.5, 1) (see real.c), so the exponent returned from
5865 REAL_EXP must be altered. */
5866 exponent = 5 - exponent;
5868 if (exponent < 0 || exponent > 7)
5871 /* Sign, mantissa and exponent are now in the correct form to plug into the
5872 formulae described in the comment above. */
5873 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5876 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5878 vfp3_const_double_rtx (rtx x)
5883 return vfp3_const_double_index (x) != -1;
5886 /* Recognize immediates which can be used in various Neon instructions. Legal
5887 immediates are described by the following table (for VMVN variants, the
5888 bitwise inverse of the constant shown is recognized. In either case, VMOV
5889 is output and the correct instruction to use for a given constant is chosen
5890 by the assembler). The constant shown is replicated across all elements of
5891 the destination vector.
5893 insn elems variant constant (binary)
5894 ---- ----- ------- -----------------
5895 vmov i32 0 00000000 00000000 00000000 abcdefgh
5896 vmov i32 1 00000000 00000000 abcdefgh 00000000
5897 vmov i32 2 00000000 abcdefgh 00000000 00000000
5898 vmov i32 3 abcdefgh 00000000 00000000 00000000
5899 vmov i16 4 00000000 abcdefgh
5900 vmov i16 5 abcdefgh 00000000
5901 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5902 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5903 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5904 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5905 vmvn i16 10 00000000 abcdefgh
5906 vmvn i16 11 abcdefgh 00000000
5907 vmov i32 12 00000000 00000000 abcdefgh 11111111
5908 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5909 vmov i32 14 00000000 abcdefgh 11111111 11111111
5910 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5912 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5913 eeeeeeee ffffffff gggggggg hhhhhhhh
5914 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5916 For case 18, B = !b. Representable values are exactly those accepted by
5917 vfp3_const_double_index, but are output as floating-point numbers rather
5920 Variants 0-5 (inclusive) may also be used as immediates for the second
5921 operand of VORR/VBIC instructions.
5923 The INVERSE argument causes the bitwise inverse of the given operand to be
5924 recognized instead (used for recognizing legal immediates for the VAND/VORN
5925 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5926 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5927 output, rather than the real insns vbic/vorr).
5929 INVERSE makes no difference to the recognition of float vectors.
5931 The return value is the variant of immediate as shown in the above table, or
5932 -1 if the given value doesn't match any of the listed patterns.
5935 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5936 rtx *modconst, int *elementwidth)
5938 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5940 for (i = 0; i < idx; i += (STRIDE)) \
5945 immtype = (CLASS); \
5946 elsize = (ELSIZE); \
5950 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5951 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5952 unsigned char bytes[16];
5953 int immtype = -1, matches;
5954 unsigned int invmask = inverse ? 0xff : 0;
5956 /* Vectors of float constants. */
5957 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5959 rtx el0 = CONST_VECTOR_ELT (op, 0);
5962 if (!vfp3_const_double_rtx (el0))
5965 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5967 for (i = 1; i < n_elts; i++)
5969 rtx elt = CONST_VECTOR_ELT (op, i);
5972 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5974 if (!REAL_VALUES_EQUAL (r0, re))
5979 *modconst = CONST_VECTOR_ELT (op, 0);
5987 /* Splat vector constant out into a byte vector. */
5988 for (i = 0; i < n_elts; i++)
5990 rtx el = CONST_VECTOR_ELT (op, i);
5991 unsigned HOST_WIDE_INT elpart;
5992 unsigned int part, parts;
5994 if (GET_CODE (el) == CONST_INT)
5996 elpart = INTVAL (el);
5999 else if (GET_CODE (el) == CONST_DOUBLE)
6001 elpart = CONST_DOUBLE_LOW (el);
6007 for (part = 0; part < parts; part++)
6010 for (byte = 0; byte < innersize; byte++)
6012 bytes[idx++] = (elpart & 0xff) ^ invmask;
6013 elpart >>= BITS_PER_UNIT;
6015 if (GET_CODE (el) == CONST_DOUBLE)
6016 elpart = CONST_DOUBLE_HIGH (el);
6021 gcc_assert (idx == GET_MODE_SIZE (mode));
6025 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6026 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6028 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6029 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6031 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6034 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6035 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6037 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6039 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6041 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6042 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6044 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6045 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6047 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6048 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6050 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6051 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6053 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6055 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6057 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6058 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6060 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6061 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6063 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6064 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6066 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6069 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6071 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6072 && bytes[i] == bytes[(i + 8) % idx]);
6080 *elementwidth = elsize;
6084 unsigned HOST_WIDE_INT imm = 0;
6086 /* Un-invert bytes of recognized vector, if neccessary. */
6088 for (i = 0; i < idx; i++)
6089 bytes[i] ^= invmask;
6093 /* FIXME: Broken on 32-bit H_W_I hosts. */
6094 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6096 for (i = 0; i < 8; i++)
6097 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6098 << (i * BITS_PER_UNIT);
6100 *modconst = GEN_INT (imm);
6104 unsigned HOST_WIDE_INT imm = 0;
6106 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6107 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6109 *modconst = GEN_INT (imm);
6117 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6118 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6119 float elements), and a modified constant (whatever should be output for a
6120 VMOV) in *MODCONST. */
6123 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6124 rtx *modconst, int *elementwidth)
6128 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6134 *modconst = tmpconst;
6137 *elementwidth = tmpwidth;
6142 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6143 the immediate is valid, write a constant suitable for using as an operand
6144 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6145 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6148 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6149 rtx *modconst, int *elementwidth)
6153 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6155 if (retval < 0 || retval > 5)
6159 *modconst = tmpconst;
6162 *elementwidth = tmpwidth;
6167 /* Return a string suitable for output of Neon immediate logic operation
6171 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6172 int inverse, int quad)
6174 int width, is_valid;
6175 static char templ[40];
6177 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6179 gcc_assert (is_valid != 0);
6182 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6184 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6189 /* Output a sequence of pairwise operations to implement a reduction.
6190 NOTE: We do "too much work" here, because pairwise operations work on two
6191 registers-worth of operands in one go. Unfortunately we can't exploit those
6192 extra calculations to do the full operation in fewer steps, I don't think.
6193 Although all vector elements of the result but the first are ignored, we
6194 actually calculate the same result in each of the elements. An alternative
6195 such as initially loading a vector with zero to use as each of the second
6196 operands would use up an additional register and take an extra instruction,
6197 for no particular gain. */
6200 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6201 rtx (*reduc) (rtx, rtx, rtx))
6203 enum machine_mode inner = GET_MODE_INNER (mode);
6204 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6207 for (i = parts / 2; i >= 1; i /= 2)
6209 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6210 emit_insn (reduc (dest, tmpsum, tmpsum));
6215 /* Initialise a vector with non-constant elements. FIXME: We can do better
6216 than the current implementation (building a vector on the stack and then
6217 loading it) in many cases. See rs6000.c. */
6220 neon_expand_vector_init (rtx target, rtx vals)
6222 enum machine_mode mode = GET_MODE (target);
6223 enum machine_mode inner = GET_MODE_INNER (mode);
6224 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6227 gcc_assert (VECTOR_MODE_P (mode));
6229 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6230 for (i = 0; i < n_elts; i++)
6231 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6232 XVECEXP (vals, 0, i));
6234 emit_move_insn (target, mem);
6238 /* Predicates for `match_operand' and `match_operator'. */
6240 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6242 cirrus_memory_offset (rtx op)
6244 /* Reject eliminable registers. */
6245 if (! (reload_in_progress || reload_completed)
6246 && ( reg_mentioned_p (frame_pointer_rtx, op)
6247 || reg_mentioned_p (arg_pointer_rtx, op)
6248 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6249 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6250 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6251 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6254 if (GET_CODE (op) == MEM)
6260 /* Match: (mem (reg)). */
6261 if (GET_CODE (ind) == REG)
6267 if (GET_CODE (ind) == PLUS
6268 && GET_CODE (XEXP (ind, 0)) == REG
6269 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6270 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6277 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6278 WB is true if full writeback address modes are allowed and is false
6279 if limited writeback address modes (POST_INC and PRE_DEC) are
6283 arm_coproc_mem_operand (rtx op, bool wb)
6287 /* Reject eliminable registers. */
6288 if (! (reload_in_progress || reload_completed)
6289 && ( reg_mentioned_p (frame_pointer_rtx, op)
6290 || reg_mentioned_p (arg_pointer_rtx, op)
6291 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6292 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6293 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6294 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6297 /* Constants are converted into offsets from labels. */
6298 if (GET_CODE (op) != MEM)
6303 if (reload_completed
6304 && (GET_CODE (ind) == LABEL_REF
6305 || (GET_CODE (ind) == CONST
6306 && GET_CODE (XEXP (ind, 0)) == PLUS
6307 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6308 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6311 /* Match: (mem (reg)). */
6312 if (GET_CODE (ind) == REG)
6313 return arm_address_register_rtx_p (ind, 0);
6315 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6316 acceptable in any case (subject to verification by
6317 arm_address_register_rtx_p). We need WB to be true to accept
6318 PRE_INC and POST_DEC. */
6319 if (GET_CODE (ind) == POST_INC
6320 || GET_CODE (ind) == PRE_DEC
6322 && (GET_CODE (ind) == PRE_INC
6323 || GET_CODE (ind) == POST_DEC)))
6324 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6327 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6328 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6329 && GET_CODE (XEXP (ind, 1)) == PLUS
6330 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6331 ind = XEXP (ind, 1);
6336 if (GET_CODE (ind) == PLUS
6337 && GET_CODE (XEXP (ind, 0)) == REG
6338 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6339 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6340 && INTVAL (XEXP (ind, 1)) > -1024
6341 && INTVAL (XEXP (ind, 1)) < 1024
6342 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6348 /* Return TRUE if OP is a memory operand which we can load or store a vector
6349 to/from. If CORE is true, we're moving from ARM registers not Neon
6352 neon_vector_mem_operand (rtx op, bool core)
6356 /* Reject eliminable registers. */
6357 if (! (reload_in_progress || reload_completed)
6358 && ( reg_mentioned_p (frame_pointer_rtx, op)
6359 || reg_mentioned_p (arg_pointer_rtx, op)
6360 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6361 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6362 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6363 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6366 /* Constants are converted into offsets from labels. */
6367 if (GET_CODE (op) != MEM)
6372 if (reload_completed
6373 && (GET_CODE (ind) == LABEL_REF
6374 || (GET_CODE (ind) == CONST
6375 && GET_CODE (XEXP (ind, 0)) == PLUS
6376 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6377 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6380 /* Match: (mem (reg)). */
6381 if (GET_CODE (ind) == REG)
6382 return arm_address_register_rtx_p (ind, 0);
6384 /* Allow post-increment with Neon registers. */
6385 if (!core && GET_CODE (ind) == POST_INC)
6386 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6389 /* FIXME: We can support this too if we use VLD1/VST1. */
6391 && GET_CODE (ind) == POST_MODIFY
6392 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6393 && GET_CODE (XEXP (ind, 1)) == PLUS
6394 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6395 ind = XEXP (ind, 1);
6402 && GET_CODE (ind) == PLUS
6403 && GET_CODE (XEXP (ind, 0)) == REG
6404 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6405 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6406 && INTVAL (XEXP (ind, 1)) > -1024
6407 && INTVAL (XEXP (ind, 1)) < 1016
6408 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6414 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6417 neon_struct_mem_operand (rtx op)
6421 /* Reject eliminable registers. */
6422 if (! (reload_in_progress || reload_completed)
6423 && ( reg_mentioned_p (frame_pointer_rtx, op)
6424 || reg_mentioned_p (arg_pointer_rtx, op)
6425 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6426 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6427 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6428 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6431 /* Constants are converted into offsets from labels. */
6432 if (GET_CODE (op) != MEM)
6437 if (reload_completed
6438 && (GET_CODE (ind) == LABEL_REF
6439 || (GET_CODE (ind) == CONST
6440 && GET_CODE (XEXP (ind, 0)) == PLUS
6441 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6442 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6445 /* Match: (mem (reg)). */
6446 if (GET_CODE (ind) == REG)
6447 return arm_address_register_rtx_p (ind, 0);
6452 /* Return true if X is a register that will be eliminated later on. */
6454 arm_eliminable_register (rtx x)
6456 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6457 || REGNO (x) == ARG_POINTER_REGNUM
6458 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6459 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6462 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6463 coprocessor registers. Otherwise return NO_REGS. */
6466 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6469 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6470 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6471 && neon_vector_mem_operand (x, FALSE))
6474 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6477 return GENERAL_REGS;
6480 /* Values which must be returned in the most-significant end of the return
6484 arm_return_in_msb (tree valtype)
6486 return (TARGET_AAPCS_BASED
6488 && (AGGREGATE_TYPE_P (valtype)
6489 || TREE_CODE (valtype) == COMPLEX_TYPE));
6492 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6493 Use by the Cirrus Maverick code which has to workaround
6494 a hardware bug triggered by such instructions. */
6496 arm_memory_load_p (rtx insn)
6498 rtx body, lhs, rhs;;
6500 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6503 body = PATTERN (insn);
6505 if (GET_CODE (body) != SET)
6508 lhs = XEXP (body, 0);
6509 rhs = XEXP (body, 1);
6511 lhs = REG_OR_SUBREG_RTX (lhs);
6513 /* If the destination is not a general purpose
6514 register we do not have to worry. */
6515 if (GET_CODE (lhs) != REG
6516 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6519 /* As well as loads from memory we also have to react
6520 to loads of invalid constants which will be turned
6521 into loads from the minipool. */
6522 return (GET_CODE (rhs) == MEM
6523 || GET_CODE (rhs) == SYMBOL_REF
6524 || note_invalid_constants (insn, -1, false));
6527 /* Return TRUE if INSN is a Cirrus instruction. */
6529 arm_cirrus_insn_p (rtx insn)
6531 enum attr_cirrus attr;
6533 /* get_attr cannot accept USE or CLOBBER. */
6535 || GET_CODE (insn) != INSN
6536 || GET_CODE (PATTERN (insn)) == USE
6537 || GET_CODE (PATTERN (insn)) == CLOBBER)
6540 attr = get_attr_cirrus (insn);
6542 return attr != CIRRUS_NOT;
6545 /* Cirrus reorg for invalid instruction combinations. */
6547 cirrus_reorg (rtx first)
6549 enum attr_cirrus attr;
6550 rtx body = PATTERN (first);
6554 /* Any branch must be followed by 2 non Cirrus instructions. */
6555 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6558 t = next_nonnote_insn (first);
6560 if (arm_cirrus_insn_p (t))
6563 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6567 emit_insn_after (gen_nop (), first);
6572 /* (float (blah)) is in parallel with a clobber. */
6573 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6574 body = XVECEXP (body, 0, 0);
6576 if (GET_CODE (body) == SET)
6578 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6580 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6581 be followed by a non Cirrus insn. */
6582 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6584 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6585 emit_insn_after (gen_nop (), first);
6589 else if (arm_memory_load_p (first))
6591 unsigned int arm_regno;
6593 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6594 ldr/cfmv64hr combination where the Rd field is the same
6595 in both instructions must be split with a non Cirrus
6602 /* Get Arm register number for ldr insn. */
6603 if (GET_CODE (lhs) == REG)
6604 arm_regno = REGNO (lhs);
6607 gcc_assert (GET_CODE (rhs) == REG);
6608 arm_regno = REGNO (rhs);
6612 first = next_nonnote_insn (first);
6614 if (! arm_cirrus_insn_p (first))
6617 body = PATTERN (first);
6619 /* (float (blah)) is in parallel with a clobber. */
6620 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6621 body = XVECEXP (body, 0, 0);
6623 if (GET_CODE (body) == FLOAT)
6624 body = XEXP (body, 0);
6626 if (get_attr_cirrus (first) == CIRRUS_MOVE
6627 && GET_CODE (XEXP (body, 1)) == REG
6628 && arm_regno == REGNO (XEXP (body, 1)))
6629 emit_insn_after (gen_nop (), first);
6635 /* get_attr cannot accept USE or CLOBBER. */
6637 || GET_CODE (first) != INSN
6638 || GET_CODE (PATTERN (first)) == USE
6639 || GET_CODE (PATTERN (first)) == CLOBBER)
6642 attr = get_attr_cirrus (first);
6644 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6645 must be followed by a non-coprocessor instruction. */
6646 if (attr == CIRRUS_COMPARE)
6650 t = next_nonnote_insn (first);
6652 if (arm_cirrus_insn_p (t))
6655 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6659 emit_insn_after (gen_nop (), first);
6665 /* Return TRUE if X references a SYMBOL_REF. */
6667 symbol_mentioned_p (rtx x)
6672 if (GET_CODE (x) == SYMBOL_REF)
6675 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6676 are constant offsets, not symbols. */
6677 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6680 fmt = GET_RTX_FORMAT (GET_CODE (x));
6682 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6688 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6689 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6692 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6699 /* Return TRUE if X references a LABEL_REF. */
6701 label_mentioned_p (rtx x)
6706 if (GET_CODE (x) == LABEL_REF)
6709 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6710 instruction, but they are constant offsets, not symbols. */
6711 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6714 fmt = GET_RTX_FORMAT (GET_CODE (x));
6715 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6721 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6722 if (label_mentioned_p (XVECEXP (x, i, j)))
6725 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6733 tls_mentioned_p (rtx x)
6735 switch (GET_CODE (x))
6738 return tls_mentioned_p (XEXP (x, 0));
6741 if (XINT (x, 1) == UNSPEC_TLS)
6749 /* Must not copy a SET whose source operand is PC-relative. */
6752 arm_cannot_copy_insn_p (rtx insn)
6754 rtx pat = PATTERN (insn);
6756 if (GET_CODE (pat) == SET)
6758 rtx rhs = SET_SRC (pat);
6760 if (GET_CODE (rhs) == UNSPEC
6761 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6764 if (GET_CODE (rhs) == MEM
6765 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6766 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6776 enum rtx_code code = GET_CODE (x);
6793 /* Return 1 if memory locations are adjacent. */
6795 adjacent_mem_locations (rtx a, rtx b)
6797 /* We don't guarantee to preserve the order of these memory refs. */
6798 if (volatile_refs_p (a) || volatile_refs_p (b))
6801 if ((GET_CODE (XEXP (a, 0)) == REG
6802 || (GET_CODE (XEXP (a, 0)) == PLUS
6803 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6804 && (GET_CODE (XEXP (b, 0)) == REG
6805 || (GET_CODE (XEXP (b, 0)) == PLUS
6806 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6808 HOST_WIDE_INT val0 = 0, val1 = 0;
6812 if (GET_CODE (XEXP (a, 0)) == PLUS)
6814 reg0 = XEXP (XEXP (a, 0), 0);
6815 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6820 if (GET_CODE (XEXP (b, 0)) == PLUS)
6822 reg1 = XEXP (XEXP (b, 0), 0);
6823 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6828 /* Don't accept any offset that will require multiple
6829 instructions to handle, since this would cause the
6830 arith_adjacentmem pattern to output an overlong sequence. */
6831 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6834 /* Don't allow an eliminable register: register elimination can make
6835 the offset too large. */
6836 if (arm_eliminable_register (reg0))
6839 val_diff = val1 - val0;
6843 /* If the target has load delay slots, then there's no benefit
6844 to using an ldm instruction unless the offset is zero and
6845 we are optimizing for size. */
6846 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6847 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6848 && (val_diff == 4 || val_diff == -4));
6851 return ((REGNO (reg0) == REGNO (reg1))
6852 && (val_diff == 4 || val_diff == -4));
6859 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6860 HOST_WIDE_INT *load_offset)
6862 int unsorted_regs[4];
6863 HOST_WIDE_INT unsorted_offsets[4];
6868 /* Can only handle 2, 3, or 4 insns at present,
6869 though could be easily extended if required. */
6870 gcc_assert (nops >= 2 && nops <= 4);
6872 /* Loop over the operands and check that the memory references are
6873 suitable (i.e. immediate offsets from the same base register). At
6874 the same time, extract the target register, and the memory
6876 for (i = 0; i < nops; i++)
6881 /* Convert a subreg of a mem into the mem itself. */
6882 if (GET_CODE (operands[nops + i]) == SUBREG)
6883 operands[nops + i] = alter_subreg (operands + (nops + i));
6885 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6887 /* Don't reorder volatile memory references; it doesn't seem worth
6888 looking for the case where the order is ok anyway. */
6889 if (MEM_VOLATILE_P (operands[nops + i]))
6892 offset = const0_rtx;
6894 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6895 || (GET_CODE (reg) == SUBREG
6896 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6897 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6898 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6900 || (GET_CODE (reg) == SUBREG
6901 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6902 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6907 base_reg = REGNO (reg);
6908 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6909 ? REGNO (operands[i])
6910 : REGNO (SUBREG_REG (operands[i])));
6915 if (base_reg != (int) REGNO (reg))
6916 /* Not addressed from the same base register. */
6919 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6920 ? REGNO (operands[i])
6921 : REGNO (SUBREG_REG (operands[i])));
6922 if (unsorted_regs[i] < unsorted_regs[order[0]])
6926 /* If it isn't an integer register, or if it overwrites the
6927 base register but isn't the last insn in the list, then
6928 we can't do this. */
6929 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6930 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6933 unsorted_offsets[i] = INTVAL (offset);
6936 /* Not a suitable memory address. */
6940 /* All the useful information has now been extracted from the
6941 operands into unsorted_regs and unsorted_offsets; additionally,
6942 order[0] has been set to the lowest numbered register in the
6943 list. Sort the registers into order, and check that the memory
6944 offsets are ascending and adjacent. */
6946 for (i = 1; i < nops; i++)
6950 order[i] = order[i - 1];
6951 for (j = 0; j < nops; j++)
6952 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6953 && (order[i] == order[i - 1]
6954 || unsorted_regs[j] < unsorted_regs[order[i]]))
6957 /* Have we found a suitable register? if not, one must be used more
6959 if (order[i] == order[i - 1])
6962 /* Is the memory address adjacent and ascending? */
6963 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6971 for (i = 0; i < nops; i++)
6972 regs[i] = unsorted_regs[order[i]];
6974 *load_offset = unsorted_offsets[order[0]];
6977 if (unsorted_offsets[order[0]] == 0)
6978 return 1; /* ldmia */
6980 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
6981 return 2; /* ldmib */
6983 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
6984 return 3; /* ldmda */
6986 if (unsorted_offsets[order[nops - 1]] == -4)
6987 return 4; /* ldmdb */
6989 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
6990 if the offset isn't small enough. The reason 2 ldrs are faster
6991 is because these ARMs are able to do more than one cache access
6992 in a single cycle. The ARM9 and StrongARM have Harvard caches,
6993 whilst the ARM8 has a double bandwidth cache. This means that
6994 these cores can do both an instruction fetch and a data fetch in
6995 a single cycle, so the trick of calculating the address into a
6996 scratch register (one of the result regs) and then doing a load
6997 multiple actually becomes slower (and no smaller in code size).
6998 That is the transformation
7000 ldr rd1, [rbase + offset]
7001 ldr rd2, [rbase + offset + 4]
7005 add rd1, rbase, offset
7006 ldmia rd1, {rd1, rd2}
7008 produces worse code -- '3 cycles + any stalls on rd2' instead of
7009 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7010 access per cycle, the first sequence could never complete in less
7011 than 6 cycles, whereas the ldm sequence would only take 5 and
7012 would make better use of sequential accesses if not hitting the
7015 We cheat here and test 'arm_ld_sched' which we currently know to
7016 only be true for the ARM8, ARM9 and StrongARM. If this ever
7017 changes, then the test below needs to be reworked. */
7018 if (nops == 2 && arm_ld_sched)
7021 /* Can't do it without setting up the offset, only do this if it takes
7022 no more than one insn. */
7023 return (const_ok_for_arm (unsorted_offsets[order[0]])
7024 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7028 emit_ldm_seq (rtx *operands, int nops)
7032 HOST_WIDE_INT offset;
7036 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7039 strcpy (buf, "ldm%(ia%)\t");
7043 strcpy (buf, "ldm%(ib%)\t");
7047 strcpy (buf, "ldm%(da%)\t");
7051 strcpy (buf, "ldm%(db%)\t");
7056 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7057 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7060 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7061 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7063 output_asm_insn (buf, operands);
7065 strcpy (buf, "ldm%(ia%)\t");
7072 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7073 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7075 for (i = 1; i < nops; i++)
7076 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7077 reg_names[regs[i]]);
7079 strcat (buf, "}\t%@ phole ldm");
7081 output_asm_insn (buf, operands);
7086 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7087 HOST_WIDE_INT * load_offset)
7089 int unsorted_regs[4];
7090 HOST_WIDE_INT unsorted_offsets[4];
7095 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7096 extended if required. */
7097 gcc_assert (nops >= 2 && nops <= 4);
7099 /* Loop over the operands and check that the memory references are
7100 suitable (i.e. immediate offsets from the same base register). At
7101 the same time, extract the target register, and the memory
7103 for (i = 0; i < nops; i++)
7108 /* Convert a subreg of a mem into the mem itself. */
7109 if (GET_CODE (operands[nops + i]) == SUBREG)
7110 operands[nops + i] = alter_subreg (operands + (nops + i));
7112 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7114 /* Don't reorder volatile memory references; it doesn't seem worth
7115 looking for the case where the order is ok anyway. */
7116 if (MEM_VOLATILE_P (operands[nops + i]))
7119 offset = const0_rtx;
7121 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7122 || (GET_CODE (reg) == SUBREG
7123 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7124 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7125 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7127 || (GET_CODE (reg) == SUBREG
7128 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7129 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7134 base_reg = REGNO (reg);
7135 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7136 ? REGNO (operands[i])
7137 : REGNO (SUBREG_REG (operands[i])));
7142 if (base_reg != (int) REGNO (reg))
7143 /* Not addressed from the same base register. */
7146 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7147 ? REGNO (operands[i])
7148 : REGNO (SUBREG_REG (operands[i])));
7149 if (unsorted_regs[i] < unsorted_regs[order[0]])
7153 /* If it isn't an integer register, then we can't do this. */
7154 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7157 unsorted_offsets[i] = INTVAL (offset);
7160 /* Not a suitable memory address. */
7164 /* All the useful information has now been extracted from the
7165 operands into unsorted_regs and unsorted_offsets; additionally,
7166 order[0] has been set to the lowest numbered register in the
7167 list. Sort the registers into order, and check that the memory
7168 offsets are ascending and adjacent. */
7170 for (i = 1; i < nops; i++)
7174 order[i] = order[i - 1];
7175 for (j = 0; j < nops; j++)
7176 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7177 && (order[i] == order[i - 1]
7178 || unsorted_regs[j] < unsorted_regs[order[i]]))
7181 /* Have we found a suitable register? if not, one must be used more
7183 if (order[i] == order[i - 1])
7186 /* Is the memory address adjacent and ascending? */
7187 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7195 for (i = 0; i < nops; i++)
7196 regs[i] = unsorted_regs[order[i]];
7198 *load_offset = unsorted_offsets[order[0]];
7201 if (unsorted_offsets[order[0]] == 0)
7202 return 1; /* stmia */
7204 if (unsorted_offsets[order[0]] == 4)
7205 return 2; /* stmib */
7207 if (unsorted_offsets[order[nops - 1]] == 0)
7208 return 3; /* stmda */
7210 if (unsorted_offsets[order[nops - 1]] == -4)
7211 return 4; /* stmdb */
7217 emit_stm_seq (rtx *operands, int nops)
7221 HOST_WIDE_INT offset;
7225 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7228 strcpy (buf, "stm%(ia%)\t");
7232 strcpy (buf, "stm%(ib%)\t");
7236 strcpy (buf, "stm%(da%)\t");
7240 strcpy (buf, "stm%(db%)\t");
7247 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7248 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7250 for (i = 1; i < nops; i++)
7251 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7252 reg_names[regs[i]]);
7254 strcat (buf, "}\t%@ phole stm");
7256 output_asm_insn (buf, operands);
7260 /* Routines for use in generating RTL. */
7263 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7264 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7266 HOST_WIDE_INT offset = *offsetp;
7269 int sign = up ? 1 : -1;
7272 /* XScale has load-store double instructions, but they have stricter
7273 alignment requirements than load-store multiple, so we cannot
7276 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7277 the pipeline until completion.
7285 An ldr instruction takes 1-3 cycles, but does not block the
7294 Best case ldr will always win. However, the more ldr instructions
7295 we issue, the less likely we are to be able to schedule them well.
7296 Using ldr instructions also increases code size.
7298 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7299 for counts of 3 or 4 regs. */
7300 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7306 for (i = 0; i < count; i++)
7308 addr = plus_constant (from, i * 4 * sign);
7309 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7310 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7316 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7326 result = gen_rtx_PARALLEL (VOIDmode,
7327 rtvec_alloc (count + (write_back ? 1 : 0)));
7330 XVECEXP (result, 0, 0)
7331 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7336 for (j = 0; i < count; i++, j++)
7338 addr = plus_constant (from, j * 4 * sign);
7339 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7340 XVECEXP (result, 0, i)
7341 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7352 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7353 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7355 HOST_WIDE_INT offset = *offsetp;
7358 int sign = up ? 1 : -1;
7361 /* See arm_gen_load_multiple for discussion of
7362 the pros/cons of ldm/stm usage for XScale. */
7363 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7369 for (i = 0; i < count; i++)
7371 addr = plus_constant (to, i * 4 * sign);
7372 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7373 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7379 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7389 result = gen_rtx_PARALLEL (VOIDmode,
7390 rtvec_alloc (count + (write_back ? 1 : 0)));
7393 XVECEXP (result, 0, 0)
7394 = gen_rtx_SET (VOIDmode, to,
7395 plus_constant (to, count * 4 * sign));
7400 for (j = 0; i < count; i++, j++)
7402 addr = plus_constant (to, j * 4 * sign);
7403 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7404 XVECEXP (result, 0, i)
7405 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7416 arm_gen_movmemqi (rtx *operands)
7418 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7419 HOST_WIDE_INT srcoffset, dstoffset;
7421 rtx src, dst, srcbase, dstbase;
7422 rtx part_bytes_reg = NULL;
7425 if (GET_CODE (operands[2]) != CONST_INT
7426 || GET_CODE (operands[3]) != CONST_INT
7427 || INTVAL (operands[2]) > 64
7428 || INTVAL (operands[3]) & 3)
7431 dstbase = operands[0];
7432 srcbase = operands[1];
7434 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7435 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7437 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7438 out_words_to_go = INTVAL (operands[2]) / 4;
7439 last_bytes = INTVAL (operands[2]) & 3;
7440 dstoffset = srcoffset = 0;
7442 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7443 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7445 for (i = 0; in_words_to_go >= 2; i+=4)
7447 if (in_words_to_go > 4)
7448 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7449 srcbase, &srcoffset));
7451 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7452 FALSE, srcbase, &srcoffset));
7454 if (out_words_to_go)
7456 if (out_words_to_go > 4)
7457 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7458 dstbase, &dstoffset));
7459 else if (out_words_to_go != 1)
7460 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7464 dstbase, &dstoffset));
7467 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7468 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7469 if (last_bytes != 0)
7471 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7477 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7478 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7481 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7482 if (out_words_to_go)
7486 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7487 sreg = copy_to_reg (mem);
7489 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7490 emit_move_insn (mem, sreg);
7493 gcc_assert (!in_words_to_go); /* Sanity check */
7498 gcc_assert (in_words_to_go > 0);
7500 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7501 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7504 gcc_assert (!last_bytes || part_bytes_reg);
7506 if (BYTES_BIG_ENDIAN && last_bytes)
7508 rtx tmp = gen_reg_rtx (SImode);
7510 /* The bytes we want are in the top end of the word. */
7511 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7512 GEN_INT (8 * (4 - last_bytes))));
7513 part_bytes_reg = tmp;
7517 mem = adjust_automodify_address (dstbase, QImode,
7518 plus_constant (dst, last_bytes - 1),
7519 dstoffset + last_bytes - 1);
7520 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7524 tmp = gen_reg_rtx (SImode);
7525 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7526 part_bytes_reg = tmp;
7535 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7536 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7540 rtx tmp = gen_reg_rtx (SImode);
7541 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7542 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7543 part_bytes_reg = tmp;
7550 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7551 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7558 /* Select a dominance comparison mode if possible for a test of the general
7559 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7560 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7561 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7562 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7563 In all cases OP will be either EQ or NE, but we don't need to know which
7564 here. If we are unable to support a dominance comparison we return
7565 CC mode. This will then fail to match for the RTL expressions that
7566 generate this call. */
7568 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7570 enum rtx_code cond1, cond2;
7573 /* Currently we will probably get the wrong result if the individual
7574 comparisons are not simple. This also ensures that it is safe to
7575 reverse a comparison if necessary. */
7576 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7578 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7582 /* The if_then_else variant of this tests the second condition if the
7583 first passes, but is true if the first fails. Reverse the first
7584 condition to get a true "inclusive-or" expression. */
7585 if (cond_or == DOM_CC_NX_OR_Y)
7586 cond1 = reverse_condition (cond1);
7588 /* If the comparisons are not equal, and one doesn't dominate the other,
7589 then we can't do this. */
7591 && !comparison_dominates_p (cond1, cond2)
7592 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7597 enum rtx_code temp = cond1;
7605 if (cond_or == DOM_CC_X_AND_Y)
7610 case EQ: return CC_DEQmode;
7611 case LE: return CC_DLEmode;
7612 case LEU: return CC_DLEUmode;
7613 case GE: return CC_DGEmode;
7614 case GEU: return CC_DGEUmode;
7615 default: gcc_unreachable ();
7619 if (cond_or == DOM_CC_X_AND_Y)
7635 if (cond_or == DOM_CC_X_AND_Y)
7651 if (cond_or == DOM_CC_X_AND_Y)
7667 if (cond_or == DOM_CC_X_AND_Y)
7682 /* The remaining cases only occur when both comparisons are the
7685 gcc_assert (cond1 == cond2);
7689 gcc_assert (cond1 == cond2);
7693 gcc_assert (cond1 == cond2);
7697 gcc_assert (cond1 == cond2);
7701 gcc_assert (cond1 == cond2);
7710 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7712 /* All floating point compares return CCFP if it is an equality
7713 comparison, and CCFPE otherwise. */
7714 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7734 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7743 /* A compare with a shifted operand. Because of canonicalization, the
7744 comparison will have to be swapped when we emit the assembler. */
7745 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7746 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7747 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7748 || GET_CODE (x) == ROTATERT))
7751 /* This operation is performed swapped, but since we only rely on the Z
7752 flag we don't need an additional mode. */
7753 if (GET_MODE (y) == SImode && REG_P (y)
7754 && GET_CODE (x) == NEG
7755 && (op == EQ || op == NE))
7758 /* This is a special case that is used by combine to allow a
7759 comparison of a shifted byte load to be split into a zero-extend
7760 followed by a comparison of the shifted integer (only valid for
7761 equalities and unsigned inequalities). */
7762 if (GET_MODE (x) == SImode
7763 && GET_CODE (x) == ASHIFT
7764 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7765 && GET_CODE (XEXP (x, 0)) == SUBREG
7766 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7767 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7768 && (op == EQ || op == NE
7769 || op == GEU || op == GTU || op == LTU || op == LEU)
7770 && GET_CODE (y) == CONST_INT)
7773 /* A construct for a conditional compare, if the false arm contains
7774 0, then both conditions must be true, otherwise either condition
7775 must be true. Not all conditions are possible, so CCmode is
7776 returned if it can't be done. */
7777 if (GET_CODE (x) == IF_THEN_ELSE
7778 && (XEXP (x, 2) == const0_rtx
7779 || XEXP (x, 2) == const1_rtx)
7780 && COMPARISON_P (XEXP (x, 0))
7781 && COMPARISON_P (XEXP (x, 1)))
7782 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7783 INTVAL (XEXP (x, 2)));
7785 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7786 if (GET_CODE (x) == AND
7787 && COMPARISON_P (XEXP (x, 0))
7788 && COMPARISON_P (XEXP (x, 1)))
7789 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7792 if (GET_CODE (x) == IOR
7793 && COMPARISON_P (XEXP (x, 0))
7794 && COMPARISON_P (XEXP (x, 1)))
7795 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7798 /* An operation (on Thumb) where we want to test for a single bit.
7799 This is done by shifting that bit up into the top bit of a
7800 scratch register; we can then branch on the sign bit. */
7802 && GET_MODE (x) == SImode
7803 && (op == EQ || op == NE)
7804 && GET_CODE (x) == ZERO_EXTRACT
7805 && XEXP (x, 1) == const1_rtx)
7808 /* An operation that sets the condition codes as a side-effect, the
7809 V flag is not set correctly, so we can only use comparisons where
7810 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7812 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7813 if (GET_MODE (x) == SImode
7815 && (op == EQ || op == NE || op == LT || op == GE)
7816 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7817 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7818 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7819 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7820 || GET_CODE (x) == LSHIFTRT
7821 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7822 || GET_CODE (x) == ROTATERT
7823 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7826 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7829 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7830 && GET_CODE (x) == PLUS
7831 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7837 /* X and Y are two things to compare using CODE. Emit the compare insn and
7838 return the rtx for register 0 in the proper mode. FP means this is a
7839 floating point compare: I don't think that it is needed on the arm. */
7841 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7843 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7844 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7846 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7851 /* Generate a sequence of insns that will generate the correct return
7852 address mask depending on the physical architecture that the program
7855 arm_gen_return_addr_mask (void)
7857 rtx reg = gen_reg_rtx (Pmode);
7859 emit_insn (gen_return_addr_mask (reg));
7864 arm_reload_in_hi (rtx *operands)
7866 rtx ref = operands[1];
7868 HOST_WIDE_INT offset = 0;
7870 if (GET_CODE (ref) == SUBREG)
7872 offset = SUBREG_BYTE (ref);
7873 ref = SUBREG_REG (ref);
7876 if (GET_CODE (ref) == REG)
7878 /* We have a pseudo which has been spilt onto the stack; there
7879 are two cases here: the first where there is a simple
7880 stack-slot replacement and a second where the stack-slot is
7881 out of range, or is used as a subreg. */
7882 if (reg_equiv_mem[REGNO (ref)])
7884 ref = reg_equiv_mem[REGNO (ref)];
7885 base = find_replacement (&XEXP (ref, 0));
7888 /* The slot is out of range, or was dressed up in a SUBREG. */
7889 base = reg_equiv_address[REGNO (ref)];
7892 base = find_replacement (&XEXP (ref, 0));
7894 /* Handle the case where the address is too complex to be offset by 1. */
7895 if (GET_CODE (base) == MINUS
7896 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7898 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7900 emit_set_insn (base_plus, base);
7903 else if (GET_CODE (base) == PLUS)
7905 /* The addend must be CONST_INT, or we would have dealt with it above. */
7906 HOST_WIDE_INT hi, lo;
7908 offset += INTVAL (XEXP (base, 1));
7909 base = XEXP (base, 0);
7911 /* Rework the address into a legal sequence of insns. */
7912 /* Valid range for lo is -4095 -> 4095 */
7915 : -((-offset) & 0xfff));
7917 /* Corner case, if lo is the max offset then we would be out of range
7918 once we have added the additional 1 below, so bump the msb into the
7919 pre-loading insn(s). */
7923 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7924 ^ (HOST_WIDE_INT) 0x80000000)
7925 - (HOST_WIDE_INT) 0x80000000);
7927 gcc_assert (hi + lo == offset);
7931 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7933 /* Get the base address; addsi3 knows how to handle constants
7934 that require more than one insn. */
7935 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7941 /* Operands[2] may overlap operands[0] (though it won't overlap
7942 operands[1]), that's why we asked for a DImode reg -- so we can
7943 use the bit that does not overlap. */
7944 if (REGNO (operands[2]) == REGNO (operands[0]))
7945 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7947 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7949 emit_insn (gen_zero_extendqisi2 (scratch,
7950 gen_rtx_MEM (QImode,
7951 plus_constant (base,
7953 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7954 gen_rtx_MEM (QImode,
7955 plus_constant (base,
7957 if (!BYTES_BIG_ENDIAN)
7958 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7959 gen_rtx_IOR (SImode,
7962 gen_rtx_SUBREG (SImode, operands[0], 0),
7966 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7967 gen_rtx_IOR (SImode,
7968 gen_rtx_ASHIFT (SImode, scratch,
7970 gen_rtx_SUBREG (SImode, operands[0], 0)));
7973 /* Handle storing a half-word to memory during reload by synthesizing as two
7974 byte stores. Take care not to clobber the input values until after we
7975 have moved them somewhere safe. This code assumes that if the DImode
7976 scratch in operands[2] overlaps either the input value or output address
7977 in some way, then that value must die in this insn (we absolutely need
7978 two scratch registers for some corner cases). */
7980 arm_reload_out_hi (rtx *operands)
7982 rtx ref = operands[0];
7983 rtx outval = operands[1];
7985 HOST_WIDE_INT offset = 0;
7987 if (GET_CODE (ref) == SUBREG)
7989 offset = SUBREG_BYTE (ref);
7990 ref = SUBREG_REG (ref);
7993 if (GET_CODE (ref) == REG)
7995 /* We have a pseudo which has been spilt onto the stack; there
7996 are two cases here: the first where there is a simple
7997 stack-slot replacement and a second where the stack-slot is
7998 out of range, or is used as a subreg. */
7999 if (reg_equiv_mem[REGNO (ref)])
8001 ref = reg_equiv_mem[REGNO (ref)];
8002 base = find_replacement (&XEXP (ref, 0));
8005 /* The slot is out of range, or was dressed up in a SUBREG. */
8006 base = reg_equiv_address[REGNO (ref)];
8009 base = find_replacement (&XEXP (ref, 0));
8011 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8013 /* Handle the case where the address is too complex to be offset by 1. */
8014 if (GET_CODE (base) == MINUS
8015 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8017 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8019 /* Be careful not to destroy OUTVAL. */
8020 if (reg_overlap_mentioned_p (base_plus, outval))
8022 /* Updating base_plus might destroy outval, see if we can
8023 swap the scratch and base_plus. */
8024 if (!reg_overlap_mentioned_p (scratch, outval))
8027 scratch = base_plus;
8032 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8034 /* Be conservative and copy OUTVAL into the scratch now,
8035 this should only be necessary if outval is a subreg
8036 of something larger than a word. */
8037 /* XXX Might this clobber base? I can't see how it can,
8038 since scratch is known to overlap with OUTVAL, and
8039 must be wider than a word. */
8040 emit_insn (gen_movhi (scratch_hi, outval));
8041 outval = scratch_hi;
8045 emit_set_insn (base_plus, base);
8048 else if (GET_CODE (base) == PLUS)
8050 /* The addend must be CONST_INT, or we would have dealt with it above. */
8051 HOST_WIDE_INT hi, lo;
8053 offset += INTVAL (XEXP (base, 1));
8054 base = XEXP (base, 0);
8056 /* Rework the address into a legal sequence of insns. */
8057 /* Valid range for lo is -4095 -> 4095 */
8060 : -((-offset) & 0xfff));
8062 /* Corner case, if lo is the max offset then we would be out of range
8063 once we have added the additional 1 below, so bump the msb into the
8064 pre-loading insn(s). */
8068 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8069 ^ (HOST_WIDE_INT) 0x80000000)
8070 - (HOST_WIDE_INT) 0x80000000);
8072 gcc_assert (hi + lo == offset);
8076 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8078 /* Be careful not to destroy OUTVAL. */
8079 if (reg_overlap_mentioned_p (base_plus, outval))
8081 /* Updating base_plus might destroy outval, see if we
8082 can swap the scratch and base_plus. */
8083 if (!reg_overlap_mentioned_p (scratch, outval))
8086 scratch = base_plus;
8091 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8093 /* Be conservative and copy outval into scratch now,
8094 this should only be necessary if outval is a
8095 subreg of something larger than a word. */
8096 /* XXX Might this clobber base? I can't see how it
8097 can, since scratch is known to overlap with
8099 emit_insn (gen_movhi (scratch_hi, outval));
8100 outval = scratch_hi;
8104 /* Get the base address; addsi3 knows how to handle constants
8105 that require more than one insn. */
8106 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8112 if (BYTES_BIG_ENDIAN)
8114 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8115 plus_constant (base, offset + 1)),
8116 gen_lowpart (QImode, outval)));
8117 emit_insn (gen_lshrsi3 (scratch,
8118 gen_rtx_SUBREG (SImode, outval, 0),
8120 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8121 gen_lowpart (QImode, scratch)));
8125 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8126 gen_lowpart (QImode, outval)));
8127 emit_insn (gen_lshrsi3 (scratch,
8128 gen_rtx_SUBREG (SImode, outval, 0),
8130 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8131 plus_constant (base, offset + 1)),
8132 gen_lowpart (QImode, scratch)));
8136 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8137 (padded to the size of a word) should be passed in a register. */
8140 arm_must_pass_in_stack (enum machine_mode mode, tree type)
8142 if (TARGET_AAPCS_BASED)
8143 return must_pass_in_stack_var_size (mode, type);
8145 return must_pass_in_stack_var_size_or_pad (mode, type);
8149 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8150 Return true if an argument passed on the stack should be padded upwards,
8151 i.e. if the least-significant byte has useful data.
8152 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8153 aggregate types are placed in the lowest memory address. */
8156 arm_pad_arg_upward (enum machine_mode mode, tree type)
8158 if (!TARGET_AAPCS_BASED)
8159 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8161 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8168 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8169 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8170 byte of the register has useful data, and return the opposite if the
8171 most significant byte does.
8172 For AAPCS, small aggregates and small complex types are always padded
8176 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8177 tree type, int first ATTRIBUTE_UNUSED)
8179 if (TARGET_AAPCS_BASED
8181 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8182 && int_size_in_bytes (type) <= 4)
8185 /* Otherwise, use default padding. */
8186 return !BYTES_BIG_ENDIAN;
8190 /* Print a symbolic form of X to the debug file, F. */
8192 arm_print_value (FILE *f, rtx x)
8194 switch (GET_CODE (x))
8197 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8201 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8209 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8211 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8212 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8220 fprintf (f, "\"%s\"", XSTR (x, 0));
8224 fprintf (f, "`%s'", XSTR (x, 0));
8228 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8232 arm_print_value (f, XEXP (x, 0));
8236 arm_print_value (f, XEXP (x, 0));
8238 arm_print_value (f, XEXP (x, 1));
8246 fprintf (f, "????");
8251 /* Routines for manipulation of the constant pool. */
8253 /* Arm instructions cannot load a large constant directly into a
8254 register; they have to come from a pc relative load. The constant
8255 must therefore be placed in the addressable range of the pc
8256 relative load. Depending on the precise pc relative load
8257 instruction the range is somewhere between 256 bytes and 4k. This
8258 means that we often have to dump a constant inside a function, and
8259 generate code to branch around it.
8261 It is important to minimize this, since the branches will slow
8262 things down and make the code larger.
8264 Normally we can hide the table after an existing unconditional
8265 branch so that there is no interruption of the flow, but in the
8266 worst case the code looks like this:
8284 We fix this by performing a scan after scheduling, which notices
8285 which instructions need to have their operands fetched from the
8286 constant table and builds the table.
8288 The algorithm starts by building a table of all the constants that
8289 need fixing up and all the natural barriers in the function (places
8290 where a constant table can be dropped without breaking the flow).
8291 For each fixup we note how far the pc-relative replacement will be
8292 able to reach and the offset of the instruction into the function.
8294 Having built the table we then group the fixes together to form
8295 tables that are as large as possible (subject to addressing
8296 constraints) and emit each table of constants after the last
8297 barrier that is within range of all the instructions in the group.
8298 If a group does not contain a barrier, then we forcibly create one
8299 by inserting a jump instruction into the flow. Once the table has
8300 been inserted, the insns are then modified to reference the
8301 relevant entry in the pool.
8303 Possible enhancements to the algorithm (not implemented) are:
8305 1) For some processors and object formats, there may be benefit in
8306 aligning the pools to the start of cache lines; this alignment
8307 would need to be taken into account when calculating addressability
8310 /* These typedefs are located at the start of this file, so that
8311 they can be used in the prototypes there. This comment is to
8312 remind readers of that fact so that the following structures
8313 can be understood more easily.
8315 typedef struct minipool_node Mnode;
8316 typedef struct minipool_fixup Mfix; */
8318 struct minipool_node
8320 /* Doubly linked chain of entries. */
8323 /* The maximum offset into the code that this entry can be placed. While
8324 pushing fixes for forward references, all entries are sorted in order
8325 of increasing max_address. */
8326 HOST_WIDE_INT max_address;
8327 /* Similarly for an entry inserted for a backwards ref. */
8328 HOST_WIDE_INT min_address;
8329 /* The number of fixes referencing this entry. This can become zero
8330 if we "unpush" an entry. In this case we ignore the entry when we
8331 come to emit the code. */
8333 /* The offset from the start of the minipool. */
8334 HOST_WIDE_INT offset;
8335 /* The value in table. */
8337 /* The mode of value. */
8338 enum machine_mode mode;
8339 /* The size of the value. With iWMMXt enabled
8340 sizes > 4 also imply an alignment of 8-bytes. */
8344 struct minipool_fixup
8348 HOST_WIDE_INT address;
8350 enum machine_mode mode;
8354 HOST_WIDE_INT forwards;
8355 HOST_WIDE_INT backwards;
8358 /* Fixes less than a word need padding out to a word boundary. */
8359 #define MINIPOOL_FIX_SIZE(mode) \
8360 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8362 static Mnode * minipool_vector_head;
8363 static Mnode * minipool_vector_tail;
8364 static rtx minipool_vector_label;
8365 static int minipool_pad;
8367 /* The linked list of all minipool fixes required for this function. */
8368 Mfix * minipool_fix_head;
8369 Mfix * minipool_fix_tail;
8370 /* The fix entry for the current minipool, once it has been placed. */
8371 Mfix * minipool_barrier;
8373 /* Determines if INSN is the start of a jump table. Returns the end
8374 of the TABLE or NULL_RTX. */
8376 is_jump_table (rtx insn)
8380 if (GET_CODE (insn) == JUMP_INSN
8381 && JUMP_LABEL (insn) != NULL
8382 && ((table = next_real_insn (JUMP_LABEL (insn)))
8383 == next_real_insn (insn))
8385 && GET_CODE (table) == JUMP_INSN
8386 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8387 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8393 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8394 #define JUMP_TABLES_IN_TEXT_SECTION 0
8397 static HOST_WIDE_INT
8398 get_jump_table_size (rtx insn)
8400 /* ADDR_VECs only take room if read-only data does into the text
8402 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8404 rtx body = PATTERN (insn);
8405 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8407 HOST_WIDE_INT modesize;
8409 modesize = GET_MODE_SIZE (GET_MODE (body));
8410 size = modesize * XVECLEN (body, elt);
8414 /* Round up size of TBB table to a halfword boundary. */
8415 size = (size + 1) & ~(HOST_WIDE_INT)1;
8418 /* No padding necessary for TBH. */
8421 /* Add two bytes for alignment on Thumb. */
8434 /* Move a minipool fix MP from its current location to before MAX_MP.
8435 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8436 constraints may need updating. */
8438 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8439 HOST_WIDE_INT max_address)
8441 /* The code below assumes these are different. */
8442 gcc_assert (mp != max_mp);
8446 if (max_address < mp->max_address)
8447 mp->max_address = max_address;
8451 if (max_address > max_mp->max_address - mp->fix_size)
8452 mp->max_address = max_mp->max_address - mp->fix_size;
8454 mp->max_address = max_address;
8456 /* Unlink MP from its current position. Since max_mp is non-null,
8457 mp->prev must be non-null. */
8458 mp->prev->next = mp->next;
8459 if (mp->next != NULL)
8460 mp->next->prev = mp->prev;
8462 minipool_vector_tail = mp->prev;
8464 /* Re-insert it before MAX_MP. */
8466 mp->prev = max_mp->prev;
8469 if (mp->prev != NULL)
8470 mp->prev->next = mp;
8472 minipool_vector_head = mp;
8475 /* Save the new entry. */
8478 /* Scan over the preceding entries and adjust their addresses as
8480 while (mp->prev != NULL
8481 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8483 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8490 /* Add a constant to the minipool for a forward reference. Returns the
8491 node added or NULL if the constant will not fit in this pool. */
8493 add_minipool_forward_ref (Mfix *fix)
8495 /* If set, max_mp is the first pool_entry that has a lower
8496 constraint than the one we are trying to add. */
8497 Mnode * max_mp = NULL;
8498 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8501 /* If the minipool starts before the end of FIX->INSN then this FIX
8502 can not be placed into the current pool. Furthermore, adding the
8503 new constant pool entry may cause the pool to start FIX_SIZE bytes
8505 if (minipool_vector_head &&
8506 (fix->address + get_attr_length (fix->insn)
8507 >= minipool_vector_head->max_address - fix->fix_size))
8510 /* Scan the pool to see if a constant with the same value has
8511 already been added. While we are doing this, also note the
8512 location where we must insert the constant if it doesn't already
8514 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8516 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8517 && fix->mode == mp->mode
8518 && (GET_CODE (fix->value) != CODE_LABEL
8519 || (CODE_LABEL_NUMBER (fix->value)
8520 == CODE_LABEL_NUMBER (mp->value)))
8521 && rtx_equal_p (fix->value, mp->value))
8523 /* More than one fix references this entry. */
8525 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8528 /* Note the insertion point if necessary. */
8530 && mp->max_address > max_address)
8533 /* If we are inserting an 8-bytes aligned quantity and
8534 we have not already found an insertion point, then
8535 make sure that all such 8-byte aligned quantities are
8536 placed at the start of the pool. */
8537 if (ARM_DOUBLEWORD_ALIGN
8539 && fix->fix_size >= 8
8540 && mp->fix_size < 8)
8543 max_address = mp->max_address;
8547 /* The value is not currently in the minipool, so we need to create
8548 a new entry for it. If MAX_MP is NULL, the entry will be put on
8549 the end of the list since the placement is less constrained than
8550 any existing entry. Otherwise, we insert the new fix before
8551 MAX_MP and, if necessary, adjust the constraints on the other
8554 mp->fix_size = fix->fix_size;
8555 mp->mode = fix->mode;
8556 mp->value = fix->value;
8558 /* Not yet required for a backwards ref. */
8559 mp->min_address = -65536;
8563 mp->max_address = max_address;
8565 mp->prev = minipool_vector_tail;
8567 if (mp->prev == NULL)
8569 minipool_vector_head = mp;
8570 minipool_vector_label = gen_label_rtx ();
8573 mp->prev->next = mp;
8575 minipool_vector_tail = mp;
8579 if (max_address > max_mp->max_address - mp->fix_size)
8580 mp->max_address = max_mp->max_address - mp->fix_size;
8582 mp->max_address = max_address;
8585 mp->prev = max_mp->prev;
8587 if (mp->prev != NULL)
8588 mp->prev->next = mp;
8590 minipool_vector_head = mp;
8593 /* Save the new entry. */
8596 /* Scan over the preceding entries and adjust their addresses as
8598 while (mp->prev != NULL
8599 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8601 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8609 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8610 HOST_WIDE_INT min_address)
8612 HOST_WIDE_INT offset;
8614 /* The code below assumes these are different. */
8615 gcc_assert (mp != min_mp);
8619 if (min_address > mp->min_address)
8620 mp->min_address = min_address;
8624 /* We will adjust this below if it is too loose. */
8625 mp->min_address = min_address;
8627 /* Unlink MP from its current position. Since min_mp is non-null,
8628 mp->next must be non-null. */
8629 mp->next->prev = mp->prev;
8630 if (mp->prev != NULL)
8631 mp->prev->next = mp->next;
8633 minipool_vector_head = mp->next;
8635 /* Reinsert it after MIN_MP. */
8637 mp->next = min_mp->next;
8639 if (mp->next != NULL)
8640 mp->next->prev = mp;
8642 minipool_vector_tail = mp;
8648 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8650 mp->offset = offset;
8651 if (mp->refcount > 0)
8652 offset += mp->fix_size;
8654 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8655 mp->next->min_address = mp->min_address + mp->fix_size;
8661 /* Add a constant to the minipool for a backward reference. Returns the
8662 node added or NULL if the constant will not fit in this pool.
8664 Note that the code for insertion for a backwards reference can be
8665 somewhat confusing because the calculated offsets for each fix do
8666 not take into account the size of the pool (which is still under
8669 add_minipool_backward_ref (Mfix *fix)
8671 /* If set, min_mp is the last pool_entry that has a lower constraint
8672 than the one we are trying to add. */
8673 Mnode *min_mp = NULL;
8674 /* This can be negative, since it is only a constraint. */
8675 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8678 /* If we can't reach the current pool from this insn, or if we can't
8679 insert this entry at the end of the pool without pushing other
8680 fixes out of range, then we don't try. This ensures that we
8681 can't fail later on. */
8682 if (min_address >= minipool_barrier->address
8683 || (minipool_vector_tail->min_address + fix->fix_size
8684 >= minipool_barrier->address))
8687 /* Scan the pool to see if a constant with the same value has
8688 already been added. While we are doing this, also note the
8689 location where we must insert the constant if it doesn't already
8691 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8693 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8694 && fix->mode == mp->mode
8695 && (GET_CODE (fix->value) != CODE_LABEL
8696 || (CODE_LABEL_NUMBER (fix->value)
8697 == CODE_LABEL_NUMBER (mp->value)))
8698 && rtx_equal_p (fix->value, mp->value)
8699 /* Check that there is enough slack to move this entry to the
8700 end of the table (this is conservative). */
8702 > (minipool_barrier->address
8703 + minipool_vector_tail->offset
8704 + minipool_vector_tail->fix_size)))
8707 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8711 mp->min_address += fix->fix_size;
8714 /* Note the insertion point if necessary. */
8715 if (mp->min_address < min_address)
8717 /* For now, we do not allow the insertion of 8-byte alignment
8718 requiring nodes anywhere but at the start of the pool. */
8719 if (ARM_DOUBLEWORD_ALIGN
8720 && fix->fix_size >= 8 && mp->fix_size < 8)
8725 else if (mp->max_address
8726 < minipool_barrier->address + mp->offset + fix->fix_size)
8728 /* Inserting before this entry would push the fix beyond
8729 its maximum address (which can happen if we have
8730 re-located a forwards fix); force the new fix to come
8733 min_address = mp->min_address + fix->fix_size;
8735 /* If we are inserting an 8-bytes aligned quantity and
8736 we have not already found an insertion point, then
8737 make sure that all such 8-byte aligned quantities are
8738 placed at the start of the pool. */
8739 else if (ARM_DOUBLEWORD_ALIGN
8741 && fix->fix_size >= 8
8742 && mp->fix_size < 8)
8745 min_address = mp->min_address + fix->fix_size;
8750 /* We need to create a new entry. */
8752 mp->fix_size = fix->fix_size;
8753 mp->mode = fix->mode;
8754 mp->value = fix->value;
8756 mp->max_address = minipool_barrier->address + 65536;
8758 mp->min_address = min_address;
8763 mp->next = minipool_vector_head;
8765 if (mp->next == NULL)
8767 minipool_vector_tail = mp;
8768 minipool_vector_label = gen_label_rtx ();
8771 mp->next->prev = mp;
8773 minipool_vector_head = mp;
8777 mp->next = min_mp->next;
8781 if (mp->next != NULL)
8782 mp->next->prev = mp;
8784 minipool_vector_tail = mp;
8787 /* Save the new entry. */
8795 /* Scan over the following entries and adjust their offsets. */
8796 while (mp->next != NULL)
8798 if (mp->next->min_address < mp->min_address + mp->fix_size)
8799 mp->next->min_address = mp->min_address + mp->fix_size;
8802 mp->next->offset = mp->offset + mp->fix_size;
8804 mp->next->offset = mp->offset;
8813 assign_minipool_offsets (Mfix *barrier)
8815 HOST_WIDE_INT offset = 0;
8818 minipool_barrier = barrier;
8820 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8822 mp->offset = offset;
8824 if (mp->refcount > 0)
8825 offset += mp->fix_size;
8829 /* Output the literal table */
8831 dump_minipool (rtx scan)
8837 if (ARM_DOUBLEWORD_ALIGN)
8838 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8839 if (mp->refcount > 0 && mp->fix_size >= 8)
8847 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8848 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8850 scan = emit_label_after (gen_label_rtx (), scan);
8851 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8852 scan = emit_label_after (minipool_vector_label, scan);
8854 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8856 if (mp->refcount > 0)
8861 ";; Offset %u, min %ld, max %ld ",
8862 (unsigned) mp->offset, (unsigned long) mp->min_address,
8863 (unsigned long) mp->max_address);
8864 arm_print_value (dump_file, mp->value);
8865 fputc ('\n', dump_file);
8868 switch (mp->fix_size)
8870 #ifdef HAVE_consttable_1
8872 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8876 #ifdef HAVE_consttable_2
8878 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8882 #ifdef HAVE_consttable_4
8884 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8888 #ifdef HAVE_consttable_8
8890 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8894 #ifdef HAVE_consttable_16
8896 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8909 minipool_vector_head = minipool_vector_tail = NULL;
8910 scan = emit_insn_after (gen_consttable_end (), scan);
8911 scan = emit_barrier_after (scan);
8914 /* Return the cost of forcibly inserting a barrier after INSN. */
8916 arm_barrier_cost (rtx insn)
8918 /* Basing the location of the pool on the loop depth is preferable,
8919 but at the moment, the basic block information seems to be
8920 corrupt by this stage of the compilation. */
8922 rtx next = next_nonnote_insn (insn);
8924 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8927 switch (GET_CODE (insn))
8930 /* It will always be better to place the table before the label, rather
8939 return base_cost - 10;
8942 return base_cost + 10;
8946 /* Find the best place in the insn stream in the range
8947 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8948 Create the barrier by inserting a jump and add a new fix entry for
8951 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8953 HOST_WIDE_INT count = 0;
8955 rtx from = fix->insn;
8956 /* The instruction after which we will insert the jump. */
8957 rtx selected = NULL;
8959 /* The address at which the jump instruction will be placed. */
8960 HOST_WIDE_INT selected_address;
8962 HOST_WIDE_INT max_count = max_address - fix->address;
8963 rtx label = gen_label_rtx ();
8965 selected_cost = arm_barrier_cost (from);
8966 selected_address = fix->address;
8968 while (from && count < max_count)
8973 /* This code shouldn't have been called if there was a natural barrier
8975 gcc_assert (GET_CODE (from) != BARRIER);
8977 /* Count the length of this insn. */
8978 count += get_attr_length (from);
8980 /* If there is a jump table, add its length. */
8981 tmp = is_jump_table (from);
8984 count += get_jump_table_size (tmp);
8986 /* Jump tables aren't in a basic block, so base the cost on
8987 the dispatch insn. If we select this location, we will
8988 still put the pool after the table. */
8989 new_cost = arm_barrier_cost (from);
8991 if (count < max_count
8992 && (!selected || new_cost <= selected_cost))
8995 selected_cost = new_cost;
8996 selected_address = fix->address + count;
8999 /* Continue after the dispatch table. */
9000 from = NEXT_INSN (tmp);
9004 new_cost = arm_barrier_cost (from);
9006 if (count < max_count
9007 && (!selected || new_cost <= selected_cost))
9010 selected_cost = new_cost;
9011 selected_address = fix->address + count;
9014 from = NEXT_INSN (from);
9017 /* Make sure that we found a place to insert the jump. */
9018 gcc_assert (selected);
9020 /* Create a new JUMP_INSN that branches around a barrier. */
9021 from = emit_jump_insn_after (gen_jump (label), selected);
9022 JUMP_LABEL (from) = label;
9023 barrier = emit_barrier_after (from);
9024 emit_label_after (label, barrier);
9026 /* Create a minipool barrier entry for the new barrier. */
9027 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9028 new_fix->insn = barrier;
9029 new_fix->address = selected_address;
9030 new_fix->next = fix->next;
9031 fix->next = new_fix;
9036 /* Record that there is a natural barrier in the insn stream at
9039 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9041 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9044 fix->address = address;
9047 if (minipool_fix_head != NULL)
9048 minipool_fix_tail->next = fix;
9050 minipool_fix_head = fix;
9052 minipool_fix_tail = fix;
9055 /* Record INSN, which will need fixing up to load a value from the
9056 minipool. ADDRESS is the offset of the insn since the start of the
9057 function; LOC is a pointer to the part of the insn which requires
9058 fixing; VALUE is the constant that must be loaded, which is of type
9061 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9062 enum machine_mode mode, rtx value)
9064 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9066 #ifdef AOF_ASSEMBLER
9067 /* PIC symbol references need to be converted into offsets into the
9069 /* XXX This shouldn't be done here. */
9070 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
9071 value = aof_pic_entry (value);
9072 #endif /* AOF_ASSEMBLER */
9075 fix->address = address;
9078 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9080 fix->forwards = get_attr_pool_range (insn);
9081 fix->backwards = get_attr_neg_pool_range (insn);
9082 fix->minipool = NULL;
9084 /* If an insn doesn't have a range defined for it, then it isn't
9085 expecting to be reworked by this code. Better to stop now than
9086 to generate duff assembly code. */
9087 gcc_assert (fix->forwards || fix->backwards);
9089 /* If an entry requires 8-byte alignment then assume all constant pools
9090 require 4 bytes of padding. Trying to do this later on a per-pool
9091 basis is awkward because existing pool entries have to be modified. */
9092 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9098 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9099 GET_MODE_NAME (mode),
9100 INSN_UID (insn), (unsigned long) address,
9101 -1 * (long)fix->backwards, (long)fix->forwards);
9102 arm_print_value (dump_file, fix->value);
9103 fprintf (dump_file, "\n");
9106 /* Add it to the chain of fixes. */
9109 if (minipool_fix_head != NULL)
9110 minipool_fix_tail->next = fix;
9112 minipool_fix_head = fix;
9114 minipool_fix_tail = fix;
9117 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9118 Returns the number of insns needed, or 99 if we don't know how to
9121 arm_const_double_inline_cost (rtx val)
9123 rtx lowpart, highpart;
9124 enum machine_mode mode;
9126 mode = GET_MODE (val);
9128 if (mode == VOIDmode)
9131 gcc_assert (GET_MODE_SIZE (mode) == 8);
9133 lowpart = gen_lowpart (SImode, val);
9134 highpart = gen_highpart_mode (SImode, mode, val);
9136 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9137 gcc_assert (GET_CODE (highpart) == CONST_INT);
9139 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9140 NULL_RTX, NULL_RTX, 0, 0)
9141 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9142 NULL_RTX, NULL_RTX, 0, 0));
9145 /* Return true if it is worthwhile to split a 64-bit constant into two
9146 32-bit operations. This is the case if optimizing for size, or
9147 if we have load delay slots, or if one 32-bit part can be done with
9148 a single data operation. */
9150 arm_const_double_by_parts (rtx val)
9152 enum machine_mode mode = GET_MODE (val);
9155 if (optimize_size || arm_ld_sched)
9158 if (mode == VOIDmode)
9161 part = gen_highpart_mode (SImode, mode, val);
9163 gcc_assert (GET_CODE (part) == CONST_INT);
9165 if (const_ok_for_arm (INTVAL (part))
9166 || const_ok_for_arm (~INTVAL (part)))
9169 part = gen_lowpart (SImode, val);
9171 gcc_assert (GET_CODE (part) == CONST_INT);
9173 if (const_ok_for_arm (INTVAL (part))
9174 || const_ok_for_arm (~INTVAL (part)))
9180 /* Scan INSN and note any of its operands that need fixing.
9181 If DO_PUSHES is false we do not actually push any of the fixups
9182 needed. The function returns TRUE if any fixups were needed/pushed.
9183 This is used by arm_memory_load_p() which needs to know about loads
9184 of constants that will be converted into minipool loads. */
9186 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9188 bool result = false;
9191 extract_insn (insn);
9193 if (!constrain_operands (1))
9194 fatal_insn_not_found (insn);
9196 if (recog_data.n_alternatives == 0)
9199 /* Fill in recog_op_alt with information about the constraints of
9201 preprocess_constraints ();
9203 for (opno = 0; opno < recog_data.n_operands; opno++)
9205 /* Things we need to fix can only occur in inputs. */
9206 if (recog_data.operand_type[opno] != OP_IN)
9209 /* If this alternative is a memory reference, then any mention
9210 of constants in this alternative is really to fool reload
9211 into allowing us to accept one there. We need to fix them up
9212 now so that we output the right code. */
9213 if (recog_op_alt[opno][which_alternative].memory_ok)
9215 rtx op = recog_data.operand[opno];
9217 if (CONSTANT_P (op))
9220 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9221 recog_data.operand_mode[opno], op);
9224 else if (GET_CODE (op) == MEM
9225 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9226 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9230 rtx cop = avoid_constant_pool_reference (op);
9232 /* Casting the address of something to a mode narrower
9233 than a word can cause avoid_constant_pool_reference()
9234 to return the pool reference itself. That's no good to
9235 us here. Lets just hope that we can use the
9236 constant pool value directly. */
9238 cop = get_pool_constant (XEXP (op, 0));
9240 push_minipool_fix (insn, address,
9241 recog_data.operand_loc[opno],
9242 recog_data.operand_mode[opno], cop);
9253 /* Gcc puts the pool in the wrong place for ARM, since we can only
9254 load addresses a limited distance around the pc. We do some
9255 special munging to move the constant pool values to the correct
9256 point in the code. */
9261 HOST_WIDE_INT address = 0;
9264 minipool_fix_head = minipool_fix_tail = NULL;
9266 /* The first insn must always be a note, or the code below won't
9267 scan it properly. */
9268 insn = get_insns ();
9269 gcc_assert (GET_CODE (insn) == NOTE);
9272 /* Scan all the insns and record the operands that will need fixing. */
9273 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9275 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9276 && (arm_cirrus_insn_p (insn)
9277 || GET_CODE (insn) == JUMP_INSN
9278 || arm_memory_load_p (insn)))
9279 cirrus_reorg (insn);
9281 if (GET_CODE (insn) == BARRIER)
9282 push_minipool_barrier (insn, address);
9283 else if (INSN_P (insn))
9287 note_invalid_constants (insn, address, true);
9288 address += get_attr_length (insn);
9290 /* If the insn is a vector jump, add the size of the table
9291 and skip the table. */
9292 if ((table = is_jump_table (insn)) != NULL)
9294 address += get_jump_table_size (table);
9300 fix = minipool_fix_head;
9302 /* Now scan the fixups and perform the required changes. */
9307 Mfix * last_added_fix;
9308 Mfix * last_barrier = NULL;
9311 /* Skip any further barriers before the next fix. */
9312 while (fix && GET_CODE (fix->insn) == BARRIER)
9315 /* No more fixes. */
9319 last_added_fix = NULL;
9321 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9323 if (GET_CODE (ftmp->insn) == BARRIER)
9325 if (ftmp->address >= minipool_vector_head->max_address)
9328 last_barrier = ftmp;
9330 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9333 last_added_fix = ftmp; /* Keep track of the last fix added. */
9336 /* If we found a barrier, drop back to that; any fixes that we
9337 could have reached but come after the barrier will now go in
9338 the next mini-pool. */
9339 if (last_barrier != NULL)
9341 /* Reduce the refcount for those fixes that won't go into this
9343 for (fdel = last_barrier->next;
9344 fdel && fdel != ftmp;
9347 fdel->minipool->refcount--;
9348 fdel->minipool = NULL;
9351 ftmp = last_barrier;
9355 /* ftmp is first fix that we can't fit into this pool and
9356 there no natural barriers that we could use. Insert a
9357 new barrier in the code somewhere between the previous
9358 fix and this one, and arrange to jump around it. */
9359 HOST_WIDE_INT max_address;
9361 /* The last item on the list of fixes must be a barrier, so
9362 we can never run off the end of the list of fixes without
9363 last_barrier being set. */
9366 max_address = minipool_vector_head->max_address;
9367 /* Check that there isn't another fix that is in range that
9368 we couldn't fit into this pool because the pool was
9369 already too large: we need to put the pool before such an
9370 instruction. The pool itself may come just after the
9371 fix because create_fix_barrier also allows space for a
9372 jump instruction. */
9373 if (ftmp->address < max_address)
9374 max_address = ftmp->address + 1;
9376 last_barrier = create_fix_barrier (last_added_fix, max_address);
9379 assign_minipool_offsets (last_barrier);
9383 if (GET_CODE (ftmp->insn) != BARRIER
9384 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9391 /* Scan over the fixes we have identified for this pool, fixing them
9392 up and adding the constants to the pool itself. */
9393 for (this_fix = fix; this_fix && ftmp != this_fix;
9394 this_fix = this_fix->next)
9395 if (GET_CODE (this_fix->insn) != BARRIER)
9398 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9399 minipool_vector_label),
9400 this_fix->minipool->offset);
9401 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9404 dump_minipool (last_barrier->insn);
9408 /* From now on we must synthesize any constants that we can't handle
9409 directly. This can happen if the RTL gets split during final
9410 instruction generation. */
9411 after_arm_reorg = 1;
9413 /* Free the minipool memory. */
9414 obstack_free (&minipool_obstack, minipool_startobj);
9417 /* Routines to output assembly language. */
9419 /* If the rtx is the correct value then return the string of the number.
9420 In this way we can ensure that valid double constants are generated even
9421 when cross compiling. */
9423 fp_immediate_constant (rtx x)
9428 if (!fp_consts_inited)
9431 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9432 for (i = 0; i < 8; i++)
9433 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9434 return strings_fp[i];
9439 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9441 fp_const_from_val (REAL_VALUE_TYPE *r)
9445 if (!fp_consts_inited)
9448 for (i = 0; i < 8; i++)
9449 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9450 return strings_fp[i];
9455 /* Output the operands of a LDM/STM instruction to STREAM.
9456 MASK is the ARM register set mask of which only bits 0-15 are important.
9457 REG is the base register, either the frame pointer or the stack pointer,
9458 INSTR is the possibly suffixed load or store instruction.
9459 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9462 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9463 unsigned long mask, int rfe)
9466 bool not_first = FALSE;
9468 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9469 fputc ('\t', stream);
9470 asm_fprintf (stream, instr, reg);
9471 fputc ('{', stream);
9473 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9474 if (mask & (1 << i))
9477 fprintf (stream, ", ");
9479 asm_fprintf (stream, "%r", i);
9484 fprintf (stream, "}^\n");
9486 fprintf (stream, "}\n");
9490 /* Output a FLDMD instruction to STREAM.
9491 BASE if the register containing the address.
9492 REG and COUNT specify the register range.
9493 Extra registers may be added to avoid hardware bugs.
9495 We output FLDMD even for ARMv5 VFP implementations. Although
9496 FLDMD is technically not supported until ARMv6, it is believed
9497 that all VFP implementations support its use in this context. */
9500 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9504 /* Workaround ARM10 VFPr1 bug. */
9505 if (count == 2 && !arm_arch6)
9512 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9513 load into multiple parts if we have to handle more than 16 registers. */
9516 vfp_output_fldmd (stream, base, reg, 16);
9517 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9521 fputc ('\t', stream);
9522 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9524 for (i = reg; i < reg + count; i++)
9527 fputs (", ", stream);
9528 asm_fprintf (stream, "d%d", i);
9530 fputs ("}\n", stream);
9535 /* Output the assembly for a store multiple. */
9538 vfp_output_fstmd (rtx * operands)
9545 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9546 p = strlen (pattern);
9548 gcc_assert (GET_CODE (operands[1]) == REG);
9550 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9551 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9553 p += sprintf (&pattern[p], ", d%d", base + i);
9555 strcpy (&pattern[p], "}");
9557 output_asm_insn (pattern, operands);
9562 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9563 number of bytes pushed. */
9566 vfp_emit_fstmd (int base_reg, int count)
9573 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9574 register pairs are stored by a store multiple insn. We avoid this
9575 by pushing an extra pair. */
9576 if (count == 2 && !arm_arch6)
9578 if (base_reg == LAST_VFP_REGNUM - 3)
9583 /* FSTMD may not store more than 16 doubleword registers at once. Split
9584 larger stores into multiple parts (up to a maximum of two, in
9589 /* NOTE: base_reg is an internal register number, so each D register
9591 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9592 saved += vfp_emit_fstmd (base_reg, 16);
9596 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9597 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9599 reg = gen_rtx_REG (DFmode, base_reg);
9603 = gen_rtx_SET (VOIDmode,
9604 gen_frame_mem (BLKmode,
9605 gen_rtx_PRE_DEC (BLKmode,
9606 stack_pointer_rtx)),
9607 gen_rtx_UNSPEC (BLKmode,
9611 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9612 plus_constant (stack_pointer_rtx, -(count * 8)));
9613 RTX_FRAME_RELATED_P (tmp) = 1;
9614 XVECEXP (dwarf, 0, 0) = tmp;
9616 tmp = gen_rtx_SET (VOIDmode,
9617 gen_frame_mem (DFmode, stack_pointer_rtx),
9619 RTX_FRAME_RELATED_P (tmp) = 1;
9620 XVECEXP (dwarf, 0, 1) = tmp;
9622 for (i = 1; i < count; i++)
9624 reg = gen_rtx_REG (DFmode, base_reg);
9626 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9628 tmp = gen_rtx_SET (VOIDmode,
9629 gen_frame_mem (DFmode,
9630 plus_constant (stack_pointer_rtx,
9633 RTX_FRAME_RELATED_P (tmp) = 1;
9634 XVECEXP (dwarf, 0, i + 1) = tmp;
9637 par = emit_insn (par);
9638 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9640 RTX_FRAME_RELATED_P (par) = 1;
9645 /* Emit a call instruction with pattern PAT. ADDR is the address of
9649 arm_emit_call_insn (rtx pat, rtx addr)
9653 insn = emit_call_insn (pat);
9655 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9656 If the call might use such an entry, add a use of the PIC register
9657 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9658 if (TARGET_VXWORKS_RTP
9660 && GET_CODE (addr) == SYMBOL_REF
9661 && (SYMBOL_REF_DECL (addr)
9662 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9663 : !SYMBOL_REF_LOCAL_P (addr)))
9665 require_pic_register ();
9666 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9670 /* Output a 'call' insn. */
9672 output_call (rtx *operands)
9674 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9676 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9677 if (REGNO (operands[0]) == LR_REGNUM)
9679 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9680 output_asm_insn ("mov%?\t%0, %|lr", operands);
9683 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9685 if (TARGET_INTERWORK || arm_arch4t)
9686 output_asm_insn ("bx%?\t%0", operands);
9688 output_asm_insn ("mov%?\t%|pc, %0", operands);
9693 /* Output a 'call' insn that is a reference in memory. */
9695 output_call_mem (rtx *operands)
9697 if (TARGET_INTERWORK && !arm_arch5)
9699 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9700 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9701 output_asm_insn ("bx%?\t%|ip", operands);
9703 else if (regno_use_in (LR_REGNUM, operands[0]))
9705 /* LR is used in the memory address. We load the address in the
9706 first instruction. It's safe to use IP as the target of the
9707 load since the call will kill it anyway. */
9708 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9710 output_asm_insn ("blx%?\t%|ip", operands);
9713 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9715 output_asm_insn ("bx%?\t%|ip", operands);
9717 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9722 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9723 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9730 /* Output a move from arm registers to an fpa registers.
9731 OPERANDS[0] is an fpa register.
9732 OPERANDS[1] is the first registers of an arm register pair. */
9734 output_mov_long_double_fpa_from_arm (rtx *operands)
9736 int arm_reg0 = REGNO (operands[1]);
9739 gcc_assert (arm_reg0 != IP_REGNUM);
9741 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9742 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9743 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9745 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9746 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9751 /* Output a move from an fpa register to arm registers.
9752 OPERANDS[0] is the first registers of an arm register pair.
9753 OPERANDS[1] is an fpa register. */
9755 output_mov_long_double_arm_from_fpa (rtx *operands)
9757 int arm_reg0 = REGNO (operands[0]);
9760 gcc_assert (arm_reg0 != IP_REGNUM);
9762 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9763 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9764 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9766 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9767 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9771 /* Output a move from arm registers to arm registers of a long double
9772 OPERANDS[0] is the destination.
9773 OPERANDS[1] is the source. */
9775 output_mov_long_double_arm_from_arm (rtx *operands)
9777 /* We have to be careful here because the two might overlap. */
9778 int dest_start = REGNO (operands[0]);
9779 int src_start = REGNO (operands[1]);
9783 if (dest_start < src_start)
9785 for (i = 0; i < 3; i++)
9787 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9788 ops[1] = gen_rtx_REG (SImode, src_start + i);
9789 output_asm_insn ("mov%?\t%0, %1", ops);
9794 for (i = 2; i >= 0; i--)
9796 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9797 ops[1] = gen_rtx_REG (SImode, src_start + i);
9798 output_asm_insn ("mov%?\t%0, %1", ops);
9806 /* Output a move from arm registers to an fpa registers.
9807 OPERANDS[0] is an fpa register.
9808 OPERANDS[1] is the first registers of an arm register pair. */
9810 output_mov_double_fpa_from_arm (rtx *operands)
9812 int arm_reg0 = REGNO (operands[1]);
9815 gcc_assert (arm_reg0 != IP_REGNUM);
9817 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9818 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9819 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9820 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9824 /* Output a move from an fpa register to arm registers.
9825 OPERANDS[0] is the first registers of an arm register pair.
9826 OPERANDS[1] is an fpa register. */
9828 output_mov_double_arm_from_fpa (rtx *operands)
9830 int arm_reg0 = REGNO (operands[0]);
9833 gcc_assert (arm_reg0 != IP_REGNUM);
9835 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9836 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9837 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9838 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9842 /* Output a move between double words.
9843 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9844 or MEM<-REG and all MEMs must be offsettable addresses. */
9846 output_move_double (rtx *operands)
9848 enum rtx_code code0 = GET_CODE (operands[0]);
9849 enum rtx_code code1 = GET_CODE (operands[1]);
9854 int reg0 = REGNO (operands[0]);
9856 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9858 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9860 switch (GET_CODE (XEXP (operands[1], 0)))
9863 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9867 gcc_assert (TARGET_LDRD);
9868 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9873 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9875 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9879 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9883 gcc_assert (TARGET_LDRD);
9884 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9889 otherops[0] = operands[0];
9890 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9891 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9893 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9895 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9897 /* Registers overlap so split out the increment. */
9898 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9899 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9903 /* IWMMXT allows offsets larger than ldrd can handle,
9904 fix these up with a pair of ldr. */
9905 if (GET_CODE (otherops[2]) == CONST_INT
9906 && (INTVAL(otherops[2]) <= -256
9907 || INTVAL(otherops[2]) >= 256))
9909 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9910 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9911 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9914 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9919 /* IWMMXT allows offsets larger than ldrd can handle,
9920 fix these up with a pair of ldr. */
9921 if (GET_CODE (otherops[2]) == CONST_INT
9922 && (INTVAL(otherops[2]) <= -256
9923 || INTVAL(otherops[2]) >= 256))
9925 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9926 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9927 otherops[0] = operands[0];
9928 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9931 /* We only allow constant increments, so this is safe. */
9932 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9938 output_asm_insn ("adr%?\t%0, %1", operands);
9939 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9942 /* ??? This needs checking for thumb2. */
9944 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9945 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9947 otherops[0] = operands[0];
9948 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9949 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9951 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9953 if (GET_CODE (otherops[2]) == CONST_INT)
9955 switch ((int) INTVAL (otherops[2]))
9958 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9963 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9968 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
9973 && (GET_CODE (otherops[2]) == REG
9974 || (GET_CODE (otherops[2]) == CONST_INT
9975 && INTVAL (otherops[2]) > -256
9976 && INTVAL (otherops[2]) < 256)))
9978 if (reg_overlap_mentioned_p (otherops[0],
9981 /* Swap base and index registers over to
9982 avoid a conflict. */
9983 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
9984 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
9986 /* If both registers conflict, it will usually
9987 have been fixed by a splitter. */
9988 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9990 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9991 output_asm_insn ("ldr%(d%)\t%0, [%1]",
9995 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
9999 if (GET_CODE (otherops[2]) == CONST_INT)
10001 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10002 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10004 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10007 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10010 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10012 return "ldm%(ia%)\t%0, %M0";
10016 otherops[1] = adjust_address (operands[1], SImode, 4);
10017 /* Take care of overlapping base/data reg. */
10018 if (reg_mentioned_p (operands[0], operands[1]))
10020 output_asm_insn ("ldr%?\t%0, %1", otherops);
10021 output_asm_insn ("ldr%?\t%0, %1", operands);
10025 output_asm_insn ("ldr%?\t%0, %1", operands);
10026 output_asm_insn ("ldr%?\t%0, %1", otherops);
10033 /* Constraints should ensure this. */
10034 gcc_assert (code0 == MEM && code1 == REG);
10035 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10037 switch (GET_CODE (XEXP (operands[0], 0)))
10040 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10044 gcc_assert (TARGET_LDRD);
10045 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10050 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10052 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10056 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10060 gcc_assert (TARGET_LDRD);
10061 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10066 otherops[0] = operands[1];
10067 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10068 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10070 /* IWMMXT allows offsets larger than ldrd can handle,
10071 fix these up with a pair of ldr. */
10072 if (GET_CODE (otherops[2]) == CONST_INT
10073 && (INTVAL(otherops[2]) <= -256
10074 || INTVAL(otherops[2]) >= 256))
10077 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10078 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10080 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10081 otherops[0] = reg1;
10082 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10086 otherops[0] = reg1;
10087 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10088 otherops[0] = operands[1];
10089 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10092 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10093 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10095 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10099 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10100 if (GET_CODE (otherops[2]) == CONST_INT)
10102 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10105 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10111 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10117 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10122 && (GET_CODE (otherops[2]) == REG
10123 || (GET_CODE (otherops[2]) == CONST_INT
10124 && INTVAL (otherops[2]) > -256
10125 && INTVAL (otherops[2]) < 256)))
10127 otherops[0] = operands[1];
10128 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10129 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10135 otherops[0] = adjust_address (operands[0], SImode, 4);
10136 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10137 output_asm_insn ("str%?\t%1, %0", operands);
10138 output_asm_insn ("str%?\t%1, %0", otherops);
10145 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10146 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10149 output_move_quad (rtx *operands)
10151 if (REG_P (operands[0]))
10153 /* Load, or reg->reg move. */
10155 if (MEM_P (operands[1]))
10157 switch (GET_CODE (XEXP (operands[1], 0)))
10160 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10165 output_asm_insn ("adr%?\t%0, %1", operands);
10166 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10170 gcc_unreachable ();
10178 gcc_assert (REG_P (operands[1]));
10180 dest = REGNO (operands[0]);
10181 src = REGNO (operands[1]);
10183 /* This seems pretty dumb, but hopefully GCC won't try to do it
10186 for (i = 0; i < 4; i++)
10188 ops[0] = gen_rtx_REG (SImode, dest + i);
10189 ops[1] = gen_rtx_REG (SImode, src + i);
10190 output_asm_insn ("mov%?\t%0, %1", ops);
10193 for (i = 3; i >= 0; i--)
10195 ops[0] = gen_rtx_REG (SImode, dest + i);
10196 ops[1] = gen_rtx_REG (SImode, src + i);
10197 output_asm_insn ("mov%?\t%0, %1", ops);
10203 gcc_assert (MEM_P (operands[0]));
10204 gcc_assert (REG_P (operands[1]));
10205 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10207 switch (GET_CODE (XEXP (operands[0], 0)))
10210 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10214 gcc_unreachable ();
10221 /* Output a VFP load or store instruction. */
10224 output_move_vfp (rtx *operands)
10226 rtx reg, mem, addr, ops[2];
10227 int load = REG_P (operands[0]);
10228 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10229 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10230 const char *template;
10232 enum machine_mode mode;
10234 reg = operands[!load];
10235 mem = operands[load];
10237 mode = GET_MODE (reg);
10239 gcc_assert (REG_P (reg));
10240 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10241 gcc_assert (mode == SFmode
10245 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10246 gcc_assert (MEM_P (mem));
10248 addr = XEXP (mem, 0);
10250 switch (GET_CODE (addr))
10253 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10254 ops[0] = XEXP (addr, 0);
10259 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10260 ops[0] = XEXP (addr, 0);
10265 template = "f%s%c%%?\t%%%s0, %%1%s";
10271 sprintf (buff, template,
10272 load ? "ld" : "st",
10275 integer_p ? "\t%@ int" : "");
10276 output_asm_insn (buff, ops);
10281 /* Output a Neon quad-word load or store, or a load or store for
10282 larger structure modes. We could also support post-modify forms using
10283 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10285 WARNING: The ordering of elements in memory is weird in big-endian mode,
10286 because we use VSTM instead of VST1, to make it easy to make vector stores
10287 via ARM registers write values in the same order as stores direct from Neon
10288 registers. For example, the byte ordering of a quadword vector with 16-byte
10289 elements like this:
10291 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10293 will be (with lowest address first, h = most-significant byte,
10294 l = least-significant byte of element):
10296 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10297 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10299 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10302 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10304 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10305 layout will result as if VSTM/VLDM were used. */
10308 output_move_neon (rtx *operands)
10310 rtx reg, mem, addr, ops[2];
10311 int regno, load = REG_P (operands[0]);
10312 const char *template;
10314 enum machine_mode mode;
10316 reg = operands[!load];
10317 mem = operands[load];
10319 mode = GET_MODE (reg);
10321 gcc_assert (REG_P (reg));
10322 regno = REGNO (reg);
10323 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10324 || NEON_REGNO_OK_FOR_QUAD (regno));
10325 gcc_assert (VALID_NEON_DREG_MODE (mode)
10326 || VALID_NEON_QREG_MODE (mode)
10327 || VALID_NEON_STRUCT_MODE (mode));
10328 gcc_assert (MEM_P (mem));
10330 addr = XEXP (mem, 0);
10332 /* Strip off const from addresses like (const (plus (...))). */
10333 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10334 addr = XEXP (addr, 0);
10336 switch (GET_CODE (addr))
10339 template = "v%smia%%?\t%%0!, %%h1";
10340 ops[0] = XEXP (addr, 0);
10345 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10346 gcc_unreachable ();
10351 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10354 for (i = 0; i < nregs; i++)
10356 /* We're only using DImode here because it's a convenient size. */
10357 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10358 ops[1] = adjust_address (mem, SImode, 8 * i);
10359 if (reg_overlap_mentioned_p (ops[0], mem))
10361 gcc_assert (overlap == -1);
10366 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10367 output_asm_insn (buff, ops);
10372 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10373 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10374 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10375 output_asm_insn (buff, ops);
10382 template = "v%smia%%?\t%%m0, %%h1";
10387 sprintf (buff, template, load ? "ld" : "st");
10388 output_asm_insn (buff, ops);
10393 /* Output an ADD r, s, #n where n may be too big for one instruction.
10394 If adding zero to one register, output nothing. */
10396 output_add_immediate (rtx *operands)
10398 HOST_WIDE_INT n = INTVAL (operands[2]);
10400 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10403 output_multi_immediate (operands,
10404 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10407 output_multi_immediate (operands,
10408 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10415 /* Output a multiple immediate operation.
10416 OPERANDS is the vector of operands referred to in the output patterns.
10417 INSTR1 is the output pattern to use for the first constant.
10418 INSTR2 is the output pattern to use for subsequent constants.
10419 IMMED_OP is the index of the constant slot in OPERANDS.
10420 N is the constant value. */
10421 static const char *
10422 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10423 int immed_op, HOST_WIDE_INT n)
10425 #if HOST_BITS_PER_WIDE_INT > 32
10431 /* Quick and easy output. */
10432 operands[immed_op] = const0_rtx;
10433 output_asm_insn (instr1, operands);
10438 const char * instr = instr1;
10440 /* Note that n is never zero here (which would give no output). */
10441 for (i = 0; i < 32; i += 2)
10445 operands[immed_op] = GEN_INT (n & (255 << i));
10446 output_asm_insn (instr, operands);
10456 /* Return the name of a shifter operation. */
10457 static const char *
10458 arm_shift_nmem(enum rtx_code code)
10463 return ARM_LSL_NAME;
10479 /* Return the appropriate ARM instruction for the operation code.
10480 The returned result should not be overwritten. OP is the rtx of the
10481 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10484 arithmetic_instr (rtx op, int shift_first_arg)
10486 switch (GET_CODE (op))
10492 return shift_first_arg ? "rsb" : "sub";
10507 return arm_shift_nmem(GET_CODE(op));
10510 gcc_unreachable ();
10514 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10515 for the operation code. The returned result should not be overwritten.
10516 OP is the rtx code of the shift.
10517 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10519 static const char *
10520 shift_op (rtx op, HOST_WIDE_INT *amountp)
10523 enum rtx_code code = GET_CODE (op);
10525 switch (GET_CODE (XEXP (op, 1)))
10533 *amountp = INTVAL (XEXP (op, 1));
10537 gcc_unreachable ();
10543 gcc_assert (*amountp != -1);
10544 *amountp = 32 - *amountp;
10547 /* Fall through. */
10553 mnem = arm_shift_nmem(code);
10557 /* We never have to worry about the amount being other than a
10558 power of 2, since this case can never be reloaded from a reg. */
10559 gcc_assert (*amountp != -1);
10560 *amountp = int_log2 (*amountp);
10561 return ARM_LSL_NAME;
10564 gcc_unreachable ();
10567 if (*amountp != -1)
10569 /* This is not 100% correct, but follows from the desire to merge
10570 multiplication by a power of 2 with the recognizer for a
10571 shift. >=32 is not a valid shift for "lsl", so we must try and
10572 output a shift that produces the correct arithmetical result.
10573 Using lsr #32 is identical except for the fact that the carry bit
10574 is not set correctly if we set the flags; but we never use the
10575 carry bit from such an operation, so we can ignore that. */
10576 if (code == ROTATERT)
10577 /* Rotate is just modulo 32. */
10579 else if (*amountp != (*amountp & 31))
10581 if (code == ASHIFT)
10586 /* Shifts of 0 are no-ops. */
10594 /* Obtain the shift from the POWER of two. */
10596 static HOST_WIDE_INT
10597 int_log2 (HOST_WIDE_INT power)
10599 HOST_WIDE_INT shift = 0;
10601 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10603 gcc_assert (shift <= 31);
10610 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10611 because /bin/as is horribly restrictive. The judgement about
10612 whether or not each character is 'printable' (and can be output as
10613 is) or not (and must be printed with an octal escape) must be made
10614 with reference to the *host* character set -- the situation is
10615 similar to that discussed in the comments above pp_c_char in
10616 c-pretty-print.c. */
10618 #define MAX_ASCII_LEN 51
10621 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10624 int len_so_far = 0;
10626 fputs ("\t.ascii\t\"", stream);
10628 for (i = 0; i < len; i++)
10632 if (len_so_far >= MAX_ASCII_LEN)
10634 fputs ("\"\n\t.ascii\t\"", stream);
10640 if (c == '\\' || c == '\"')
10642 putc ('\\', stream);
10650 fprintf (stream, "\\%03o", c);
10655 fputs ("\"\n", stream);
10658 /* Compute the register save mask for registers 0 through 12
10659 inclusive. This code is used by arm_compute_save_reg_mask. */
10661 static unsigned long
10662 arm_compute_save_reg0_reg12_mask (void)
10664 unsigned long func_type = arm_current_func_type ();
10665 unsigned long save_reg_mask = 0;
10668 if (IS_INTERRUPT (func_type))
10670 unsigned int max_reg;
10671 /* Interrupt functions must not corrupt any registers,
10672 even call clobbered ones. If this is a leaf function
10673 we can just examine the registers used by the RTL, but
10674 otherwise we have to assume that whatever function is
10675 called might clobber anything, and so we have to save
10676 all the call-clobbered registers as well. */
10677 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10678 /* FIQ handlers have registers r8 - r12 banked, so
10679 we only need to check r0 - r7, Normal ISRs only
10680 bank r14 and r15, so we must check up to r12.
10681 r13 is the stack pointer which is always preserved,
10682 so we do not need to consider it here. */
10687 for (reg = 0; reg <= max_reg; reg++)
10688 if (df_regs_ever_live_p (reg)
10689 || (! current_function_is_leaf && call_used_regs[reg]))
10690 save_reg_mask |= (1 << reg);
10692 /* Also save the pic base register if necessary. */
10694 && !TARGET_SINGLE_PIC_BASE
10695 && arm_pic_register != INVALID_REGNUM
10696 && current_function_uses_pic_offset_table)
10697 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10701 /* In arm mode we handle r11 (FP) as a special case. */
10702 unsigned last_reg = TARGET_ARM ? 10 : 11;
10704 /* In the normal case we only need to save those registers
10705 which are call saved and which are used by this function. */
10706 for (reg = 0; reg <= last_reg; reg++)
10707 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10708 save_reg_mask |= (1 << reg);
10710 /* Handle the frame pointer as a special case. */
10711 if (! TARGET_APCS_FRAME
10712 && ! frame_pointer_needed
10713 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10714 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10715 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10716 else if (! TARGET_APCS_FRAME
10717 && ! frame_pointer_needed
10718 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10719 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10720 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10722 /* If we aren't loading the PIC register,
10723 don't stack it even though it may be live. */
10725 && !TARGET_SINGLE_PIC_BASE
10726 && arm_pic_register != INVALID_REGNUM
10727 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10728 || current_function_uses_pic_offset_table))
10729 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10731 /* The prologue will copy SP into R0, so save it. */
10732 if (IS_STACKALIGN (func_type))
10733 save_reg_mask |= 1;
10736 /* Save registers so the exception handler can modify them. */
10737 if (current_function_calls_eh_return)
10743 reg = EH_RETURN_DATA_REGNO (i);
10744 if (reg == INVALID_REGNUM)
10746 save_reg_mask |= 1 << reg;
10750 return save_reg_mask;
10754 /* Compute a bit mask of which registers need to be
10755 saved on the stack for the current function. */
10757 static unsigned long
10758 arm_compute_save_reg_mask (void)
10760 unsigned int save_reg_mask = 0;
10761 unsigned long func_type = arm_current_func_type ();
10764 if (IS_NAKED (func_type))
10765 /* This should never really happen. */
10768 /* If we are creating a stack frame, then we must save the frame pointer,
10769 IP (which will hold the old stack pointer), LR and the PC. */
10770 if (frame_pointer_needed && TARGET_ARM)
10772 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10775 | (1 << PC_REGNUM);
10777 /* Volatile functions do not return, so there
10778 is no need to save any other registers. */
10779 if (IS_VOLATILE (func_type))
10780 return save_reg_mask;
10782 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10784 /* Decide if we need to save the link register.
10785 Interrupt routines have their own banked link register,
10786 so they never need to save it.
10787 Otherwise if we do not use the link register we do not need to save
10788 it. If we are pushing other registers onto the stack however, we
10789 can save an instruction in the epilogue by pushing the link register
10790 now and then popping it back into the PC. This incurs extra memory
10791 accesses though, so we only do it when optimizing for size, and only
10792 if we know that we will not need a fancy return sequence. */
10793 if (df_regs_ever_live_p (LR_REGNUM)
10796 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10797 && !current_function_calls_eh_return))
10798 save_reg_mask |= 1 << LR_REGNUM;
10800 if (cfun->machine->lr_save_eliminated)
10801 save_reg_mask &= ~ (1 << LR_REGNUM);
10803 if (TARGET_REALLY_IWMMXT
10804 && ((bit_count (save_reg_mask)
10805 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10807 /* The total number of registers that are going to be pushed
10808 onto the stack is odd. We need to ensure that the stack
10809 is 64-bit aligned before we start to save iWMMXt registers,
10810 and also before we start to create locals. (A local variable
10811 might be a double or long long which we will load/store using
10812 an iWMMXt instruction). Therefore we need to push another
10813 ARM register, so that the stack will be 64-bit aligned. We
10814 try to avoid using the arg registers (r0 -r3) as they might be
10815 used to pass values in a tail call. */
10816 for (reg = 4; reg <= 12; reg++)
10817 if ((save_reg_mask & (1 << reg)) == 0)
10821 save_reg_mask |= (1 << reg);
10824 cfun->machine->sibcall_blocked = 1;
10825 save_reg_mask |= (1 << 3);
10829 /* We may need to push an additional register for use initializing the
10830 PIC base register. */
10831 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10832 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10834 reg = thumb_find_work_register (1 << 4);
10835 if (!call_used_regs[reg])
10836 save_reg_mask |= (1 << reg);
10839 return save_reg_mask;
10843 /* Compute a bit mask of which registers need to be
10844 saved on the stack for the current function. */
10845 static unsigned long
10846 thumb1_compute_save_reg_mask (void)
10848 unsigned long mask;
10852 for (reg = 0; reg < 12; reg ++)
10853 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10857 && !TARGET_SINGLE_PIC_BASE
10858 && arm_pic_register != INVALID_REGNUM
10859 && current_function_uses_pic_offset_table)
10860 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10862 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10863 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10864 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10866 /* LR will also be pushed if any lo regs are pushed. */
10867 if (mask & 0xff || thumb_force_lr_save ())
10868 mask |= (1 << LR_REGNUM);
10870 /* Make sure we have a low work register if we need one.
10871 We will need one if we are going to push a high register,
10872 but we are not currently intending to push a low register. */
10873 if ((mask & 0xff) == 0
10874 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10876 /* Use thumb_find_work_register to choose which register
10877 we will use. If the register is live then we will
10878 have to push it. Use LAST_LO_REGNUM as our fallback
10879 choice for the register to select. */
10880 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10881 /* Make sure the register returned by thumb_find_work_register is
10882 not part of the return value. */
10883 if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
10884 reg = LAST_LO_REGNUM;
10886 if (! call_used_regs[reg])
10894 /* Return the number of bytes required to save VFP registers. */
10896 arm_get_vfp_saved_size (void)
10898 unsigned int regno;
10903 /* Space for saved VFP registers. */
10904 if (TARGET_HARD_FLOAT && TARGET_VFP)
10907 for (regno = FIRST_VFP_REGNUM;
10908 regno < LAST_VFP_REGNUM;
10911 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10912 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10916 /* Workaround ARM10 VFPr1 bug. */
10917 if (count == 2 && !arm_arch6)
10919 saved += count * 8;
10928 if (count == 2 && !arm_arch6)
10930 saved += count * 8;
10937 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10938 everything bar the final return instruction. */
10940 output_return_instruction (rtx operand, int really_return, int reverse)
10942 char conditional[10];
10945 unsigned long live_regs_mask;
10946 unsigned long func_type;
10947 arm_stack_offsets *offsets;
10949 func_type = arm_current_func_type ();
10951 if (IS_NAKED (func_type))
10954 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10956 /* If this function was declared non-returning, and we have
10957 found a tail call, then we have to trust that the called
10958 function won't return. */
10963 /* Otherwise, trap an attempted return by aborting. */
10965 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10967 assemble_external_libcall (ops[1]);
10968 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
10974 gcc_assert (!current_function_calls_alloca || really_return);
10976 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
10978 return_used_this_function = 1;
10980 live_regs_mask = arm_compute_save_reg_mask ();
10982 if (live_regs_mask)
10984 const char * return_reg;
10986 /* If we do not have any special requirements for function exit
10987 (e.g. interworking) then we can load the return address
10988 directly into the PC. Otherwise we must load it into LR. */
10990 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
10991 return_reg = reg_names[PC_REGNUM];
10993 return_reg = reg_names[LR_REGNUM];
10995 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
10997 /* There are three possible reasons for the IP register
10998 being saved. 1) a stack frame was created, in which case
10999 IP contains the old stack pointer, or 2) an ISR routine
11000 corrupted it, or 3) it was saved to align the stack on
11001 iWMMXt. In case 1, restore IP into SP, otherwise just
11003 if (frame_pointer_needed)
11005 live_regs_mask &= ~ (1 << IP_REGNUM);
11006 live_regs_mask |= (1 << SP_REGNUM);
11009 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11012 /* On some ARM architectures it is faster to use LDR rather than
11013 LDM to load a single register. On other architectures, the
11014 cost is the same. In 26 bit mode, or for exception handlers,
11015 we have to use LDM to load the PC so that the CPSR is also
11017 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11018 if (live_regs_mask == (1U << reg))
11021 if (reg <= LAST_ARM_REGNUM
11022 && (reg != LR_REGNUM
11024 || ! IS_INTERRUPT (func_type)))
11026 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11027 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11034 /* Generate the load multiple instruction to restore the
11035 registers. Note we can get here, even if
11036 frame_pointer_needed is true, but only if sp already
11037 points to the base of the saved core registers. */
11038 if (live_regs_mask & (1 << SP_REGNUM))
11040 unsigned HOST_WIDE_INT stack_adjust;
11042 offsets = arm_get_frame_offsets ();
11043 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11044 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11046 if (stack_adjust && arm_arch5 && TARGET_ARM)
11047 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11050 /* If we can't use ldmib (SA110 bug),
11051 then try to pop r3 instead. */
11053 live_regs_mask |= 1 << 3;
11054 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11058 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11060 p = instr + strlen (instr);
11062 for (reg = 0; reg <= SP_REGNUM; reg++)
11063 if (live_regs_mask & (1 << reg))
11065 int l = strlen (reg_names[reg]);
11071 memcpy (p, ", ", 2);
11075 memcpy (p, "%|", 2);
11076 memcpy (p + 2, reg_names[reg], l);
11080 if (live_regs_mask & (1 << LR_REGNUM))
11082 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11083 /* If returning from an interrupt, restore the CPSR. */
11084 if (IS_INTERRUPT (func_type))
11091 output_asm_insn (instr, & operand);
11093 /* See if we need to generate an extra instruction to
11094 perform the actual function return. */
11096 && func_type != ARM_FT_INTERWORKED
11097 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11099 /* The return has already been handled
11100 by loading the LR into the PC. */
11107 switch ((int) ARM_FUNC_TYPE (func_type))
11111 /* ??? This is wrong for unified assembly syntax. */
11112 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11115 case ARM_FT_INTERWORKED:
11116 sprintf (instr, "bx%s\t%%|lr", conditional);
11119 case ARM_FT_EXCEPTION:
11120 /* ??? This is wrong for unified assembly syntax. */
11121 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11125 /* Use bx if it's available. */
11126 if (arm_arch5 || arm_arch4t)
11127 sprintf (instr, "bx%s\t%%|lr", conditional);
11129 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11133 output_asm_insn (instr, & operand);
11139 /* Write the function name into the code section, directly preceding
11140 the function prologue.
11142 Code will be output similar to this:
11144 .ascii "arm_poke_function_name", 0
11147 .word 0xff000000 + (t1 - t0)
11148 arm_poke_function_name
11150 stmfd sp!, {fp, ip, lr, pc}
11153 When performing a stack backtrace, code can inspect the value
11154 of 'pc' stored at 'fp' + 0. If the trace function then looks
11155 at location pc - 12 and the top 8 bits are set, then we know
11156 that there is a function name embedded immediately preceding this
11157 location and has length ((pc[-3]) & 0xff000000).
11159 We assume that pc is declared as a pointer to an unsigned long.
11161 It is of no benefit to output the function name if we are assembling
11162 a leaf function. These function types will not contain a stack
11163 backtrace structure, therefore it is not possible to determine the
11166 arm_poke_function_name (FILE *stream, const char *name)
11168 unsigned long alignlength;
11169 unsigned long length;
11172 length = strlen (name) + 1;
11173 alignlength = ROUND_UP_WORD (length);
11175 ASM_OUTPUT_ASCII (stream, name, length);
11176 ASM_OUTPUT_ALIGN (stream, 2);
11177 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11178 assemble_aligned_integer (UNITS_PER_WORD, x);
11181 /* Place some comments into the assembler stream
11182 describing the current function. */
11184 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11186 unsigned long func_type;
11190 thumb1_output_function_prologue (f, frame_size);
11194 /* Sanity check. */
11195 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11197 func_type = arm_current_func_type ();
11199 switch ((int) ARM_FUNC_TYPE (func_type))
11202 case ARM_FT_NORMAL:
11204 case ARM_FT_INTERWORKED:
11205 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11208 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11211 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11213 case ARM_FT_EXCEPTION:
11214 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11218 if (IS_NAKED (func_type))
11219 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11221 if (IS_VOLATILE (func_type))
11222 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11224 if (IS_NESTED (func_type))
11225 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11226 if (IS_STACKALIGN (func_type))
11227 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11229 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11230 current_function_args_size,
11231 current_function_pretend_args_size, frame_size);
11233 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11234 frame_pointer_needed,
11235 cfun->machine->uses_anonymous_args);
11237 if (cfun->machine->lr_save_eliminated)
11238 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11240 if (current_function_calls_eh_return)
11241 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11243 #ifdef AOF_ASSEMBLER
11245 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
11248 return_used_this_function = 0;
11252 arm_output_epilogue (rtx sibling)
11255 unsigned long saved_regs_mask;
11256 unsigned long func_type;
11257 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11258 frame that is $fp + 4 for a non-variadic function. */
11259 int floats_offset = 0;
11261 FILE * f = asm_out_file;
11262 unsigned int lrm_count = 0;
11263 int really_return = (sibling == NULL);
11265 arm_stack_offsets *offsets;
11267 /* If we have already generated the return instruction
11268 then it is futile to generate anything else. */
11269 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11272 func_type = arm_current_func_type ();
11274 if (IS_NAKED (func_type))
11275 /* Naked functions don't have epilogues. */
11278 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11282 /* A volatile function should never return. Call abort. */
11283 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11284 assemble_external_libcall (op);
11285 output_asm_insn ("bl\t%a0", &op);
11290 /* If we are throwing an exception, then we really must be doing a
11291 return, so we can't tail-call. */
11292 gcc_assert (!current_function_calls_eh_return || really_return);
11294 offsets = arm_get_frame_offsets ();
11295 saved_regs_mask = arm_compute_save_reg_mask ();
11298 lrm_count = bit_count (saved_regs_mask);
11300 floats_offset = offsets->saved_args;
11301 /* Compute how far away the floats will be. */
11302 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11303 if (saved_regs_mask & (1 << reg))
11304 floats_offset += 4;
11306 if (frame_pointer_needed && TARGET_ARM)
11308 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11309 int vfp_offset = offsets->frame;
11311 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11313 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11314 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11316 floats_offset += 12;
11317 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11318 reg, FP_REGNUM, floats_offset - vfp_offset);
11323 start_reg = LAST_FPA_REGNUM;
11325 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11327 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11329 floats_offset += 12;
11331 /* We can't unstack more than four registers at once. */
11332 if (start_reg - reg == 3)
11334 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11335 reg, FP_REGNUM, floats_offset - vfp_offset);
11336 start_reg = reg - 1;
11341 if (reg != start_reg)
11342 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11343 reg + 1, start_reg - reg,
11344 FP_REGNUM, floats_offset - vfp_offset);
11345 start_reg = reg - 1;
11349 /* Just in case the last register checked also needs unstacking. */
11350 if (reg != start_reg)
11351 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11352 reg + 1, start_reg - reg,
11353 FP_REGNUM, floats_offset - vfp_offset);
11356 if (TARGET_HARD_FLOAT && TARGET_VFP)
11360 /* The fldmd insns do not have base+offset addressing
11361 modes, so we use IP to hold the address. */
11362 saved_size = arm_get_vfp_saved_size ();
11364 if (saved_size > 0)
11366 floats_offset += saved_size;
11367 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11368 FP_REGNUM, floats_offset - vfp_offset);
11370 start_reg = FIRST_VFP_REGNUM;
11371 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11373 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11374 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11376 if (start_reg != reg)
11377 vfp_output_fldmd (f, IP_REGNUM,
11378 (start_reg - FIRST_VFP_REGNUM) / 2,
11379 (reg - start_reg) / 2);
11380 start_reg = reg + 2;
11383 if (start_reg != reg)
11384 vfp_output_fldmd (f, IP_REGNUM,
11385 (start_reg - FIRST_VFP_REGNUM) / 2,
11386 (reg - start_reg) / 2);
11391 /* The frame pointer is guaranteed to be non-double-word aligned.
11392 This is because it is set to (old_stack_pointer - 4) and the
11393 old_stack_pointer was double word aligned. Thus the offset to
11394 the iWMMXt registers to be loaded must also be non-double-word
11395 sized, so that the resultant address *is* double-word aligned.
11396 We can ignore floats_offset since that was already included in
11397 the live_regs_mask. */
11398 lrm_count += (lrm_count % 2 ? 2 : 1);
11400 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11401 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11403 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11404 reg, FP_REGNUM, lrm_count * 4);
11409 /* saved_regs_mask should contain the IP, which at the time of stack
11410 frame generation actually contains the old stack pointer. So a
11411 quick way to unwind the stack is just pop the IP register directly
11412 into the stack pointer. */
11413 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11414 saved_regs_mask &= ~ (1 << IP_REGNUM);
11415 saved_regs_mask |= (1 << SP_REGNUM);
11417 /* There are two registers left in saved_regs_mask - LR and PC. We
11418 only need to restore the LR register (the return address), but to
11419 save time we can load it directly into the PC, unless we need a
11420 special function exit sequence, or we are not really returning. */
11422 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11423 && !current_function_calls_eh_return)
11424 /* Delete the LR from the register mask, so that the LR on
11425 the stack is loaded into the PC in the register mask. */
11426 saved_regs_mask &= ~ (1 << LR_REGNUM);
11428 saved_regs_mask &= ~ (1 << PC_REGNUM);
11430 /* We must use SP as the base register, because SP is one of the
11431 registers being restored. If an interrupt or page fault
11432 happens in the ldm instruction, the SP might or might not
11433 have been restored. That would be bad, as then SP will no
11434 longer indicate the safe area of stack, and we can get stack
11435 corruption. Using SP as the base register means that it will
11436 be reset correctly to the original value, should an interrupt
11437 occur. If the stack pointer already points at the right
11438 place, then omit the subtraction. */
11439 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11440 || current_function_calls_alloca)
11441 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11442 4 * bit_count (saved_regs_mask));
11443 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11445 if (IS_INTERRUPT (func_type))
11446 /* Interrupt handlers will have pushed the
11447 IP onto the stack, so restore it now. */
11448 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11452 HOST_WIDE_INT amount;
11454 /* Restore stack pointer if necessary. */
11455 if (frame_pointer_needed)
11457 /* For Thumb-2 restore sp from the frame pointer.
11458 Operand restrictions mean we have to increment FP, then copy
11460 amount = offsets->locals_base - offsets->saved_regs;
11461 operands[0] = hard_frame_pointer_rtx;
11465 operands[0] = stack_pointer_rtx;
11466 amount = offsets->outgoing_args - offsets->saved_regs;
11471 operands[1] = operands[0];
11472 operands[2] = GEN_INT (amount);
11473 output_add_immediate (operands);
11475 if (frame_pointer_needed)
11476 asm_fprintf (f, "\tmov\t%r, %r\n",
11477 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11479 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11481 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11482 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11483 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11488 start_reg = FIRST_FPA_REGNUM;
11490 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11492 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11494 if (reg - start_reg == 3)
11496 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11497 start_reg, SP_REGNUM);
11498 start_reg = reg + 1;
11503 if (reg != start_reg)
11504 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11505 start_reg, reg - start_reg,
11508 start_reg = reg + 1;
11512 /* Just in case the last register checked also needs unstacking. */
11513 if (reg != start_reg)
11514 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11515 start_reg, reg - start_reg, SP_REGNUM);
11518 if (TARGET_HARD_FLOAT && TARGET_VFP)
11520 start_reg = FIRST_VFP_REGNUM;
11521 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11523 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11524 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11526 if (start_reg != reg)
11527 vfp_output_fldmd (f, SP_REGNUM,
11528 (start_reg - FIRST_VFP_REGNUM) / 2,
11529 (reg - start_reg) / 2);
11530 start_reg = reg + 2;
11533 if (start_reg != reg)
11534 vfp_output_fldmd (f, SP_REGNUM,
11535 (start_reg - FIRST_VFP_REGNUM) / 2,
11536 (reg - start_reg) / 2);
11539 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11540 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11541 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11543 /* If we can, restore the LR into the PC. */
11544 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11545 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11546 && !IS_STACKALIGN (func_type)
11548 && current_function_pretend_args_size == 0
11549 && saved_regs_mask & (1 << LR_REGNUM)
11550 && !current_function_calls_eh_return)
11552 saved_regs_mask &= ~ (1 << LR_REGNUM);
11553 saved_regs_mask |= (1 << PC_REGNUM);
11554 rfe = IS_INTERRUPT (func_type);
11559 /* Load the registers off the stack. If we only have one register
11560 to load use the LDR instruction - it is faster. For Thumb-2
11561 always use pop and the assembler will pick the best instruction.*/
11562 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11563 && !IS_INTERRUPT(func_type))
11565 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11567 else if (saved_regs_mask)
11569 if (saved_regs_mask & (1 << SP_REGNUM))
11570 /* Note - write back to the stack register is not enabled
11571 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11572 in the list of registers and if we add writeback the
11573 instruction becomes UNPREDICTABLE. */
11574 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11576 else if (TARGET_ARM)
11577 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11580 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11583 if (current_function_pretend_args_size)
11585 /* Unwind the pre-pushed regs. */
11586 operands[0] = operands[1] = stack_pointer_rtx;
11587 operands[2] = GEN_INT (current_function_pretend_args_size);
11588 output_add_immediate (operands);
11592 /* We may have already restored PC directly from the stack. */
11593 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11596 /* Stack adjustment for exception handler. */
11597 if (current_function_calls_eh_return)
11598 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11599 ARM_EH_STACKADJ_REGNUM);
11601 /* Generate the return instruction. */
11602 switch ((int) ARM_FUNC_TYPE (func_type))
11606 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11609 case ARM_FT_EXCEPTION:
11610 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11613 case ARM_FT_INTERWORKED:
11614 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11618 if (IS_STACKALIGN (func_type))
11620 /* See comment in arm_expand_prologue. */
11621 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11623 if (arm_arch5 || arm_arch4t)
11624 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11626 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11634 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11635 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11637 arm_stack_offsets *offsets;
11643 /* Emit any call-via-reg trampolines that are needed for v4t support
11644 of call_reg and call_value_reg type insns. */
11645 for (regno = 0; regno < LR_REGNUM; regno++)
11647 rtx label = cfun->machine->call_via[regno];
11651 switch_to_section (function_section (current_function_decl));
11652 targetm.asm_out.internal_label (asm_out_file, "L",
11653 CODE_LABEL_NUMBER (label));
11654 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11658 /* ??? Probably not safe to set this here, since it assumes that a
11659 function will be emitted as assembly immediately after we generate
11660 RTL for it. This does not happen for inline functions. */
11661 return_used_this_function = 0;
11663 else /* TARGET_32BIT */
11665 /* We need to take into account any stack-frame rounding. */
11666 offsets = arm_get_frame_offsets ();
11668 gcc_assert (!use_return_insn (FALSE, NULL)
11669 || !return_used_this_function
11670 || offsets->saved_regs == offsets->outgoing_args
11671 || frame_pointer_needed);
11673 /* Reset the ARM-specific per-function variables. */
11674 after_arm_reorg = 0;
11678 /* Generate and emit an insn that we will recognize as a push_multi.
11679 Unfortunately, since this insn does not reflect very well the actual
11680 semantics of the operation, we need to annotate the insn for the benefit
11681 of DWARF2 frame unwind information. */
11683 emit_multi_reg_push (unsigned long mask)
11686 int num_dwarf_regs;
11690 int dwarf_par_index;
11693 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11694 if (mask & (1 << i))
11697 gcc_assert (num_regs && num_regs <= 16);
11699 /* We don't record the PC in the dwarf frame information. */
11700 num_dwarf_regs = num_regs;
11701 if (mask & (1 << PC_REGNUM))
11704 /* For the body of the insn we are going to generate an UNSPEC in
11705 parallel with several USEs. This allows the insn to be recognized
11706 by the push_multi pattern in the arm.md file. The insn looks
11707 something like this:
11710 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11711 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11712 (use (reg:SI 11 fp))
11713 (use (reg:SI 12 ip))
11714 (use (reg:SI 14 lr))
11715 (use (reg:SI 15 pc))
11718 For the frame note however, we try to be more explicit and actually
11719 show each register being stored into the stack frame, plus a (single)
11720 decrement of the stack pointer. We do it this way in order to be
11721 friendly to the stack unwinding code, which only wants to see a single
11722 stack decrement per instruction. The RTL we generate for the note looks
11723 something like this:
11726 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11727 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11728 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11729 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11730 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11733 This sequence is used both by the code to support stack unwinding for
11734 exceptions handlers and the code to generate dwarf2 frame debugging. */
11736 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11737 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11738 dwarf_par_index = 1;
11740 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11742 if (mask & (1 << i))
11744 reg = gen_rtx_REG (SImode, i);
11746 XVECEXP (par, 0, 0)
11747 = gen_rtx_SET (VOIDmode,
11748 gen_frame_mem (BLKmode,
11749 gen_rtx_PRE_DEC (BLKmode,
11750 stack_pointer_rtx)),
11751 gen_rtx_UNSPEC (BLKmode,
11752 gen_rtvec (1, reg),
11753 UNSPEC_PUSH_MULT));
11755 if (i != PC_REGNUM)
11757 tmp = gen_rtx_SET (VOIDmode,
11758 gen_frame_mem (SImode, stack_pointer_rtx),
11760 RTX_FRAME_RELATED_P (tmp) = 1;
11761 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11769 for (j = 1, i++; j < num_regs; i++)
11771 if (mask & (1 << i))
11773 reg = gen_rtx_REG (SImode, i);
11775 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11777 if (i != PC_REGNUM)
11780 = gen_rtx_SET (VOIDmode,
11781 gen_frame_mem (SImode,
11782 plus_constant (stack_pointer_rtx,
11785 RTX_FRAME_RELATED_P (tmp) = 1;
11786 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11793 par = emit_insn (par);
11795 tmp = gen_rtx_SET (VOIDmode,
11797 plus_constant (stack_pointer_rtx, -4 * num_regs));
11798 RTX_FRAME_RELATED_P (tmp) = 1;
11799 XVECEXP (dwarf, 0, 0) = tmp;
11801 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11806 /* Calculate the size of the return value that is passed in registers. */
11808 arm_size_return_regs (void)
11810 enum machine_mode mode;
11812 if (current_function_return_rtx != 0)
11813 mode = GET_MODE (current_function_return_rtx);
11815 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11817 return GET_MODE_SIZE (mode);
11821 emit_sfm (int base_reg, int count)
11828 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11829 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11831 reg = gen_rtx_REG (XFmode, base_reg++);
11833 XVECEXP (par, 0, 0)
11834 = gen_rtx_SET (VOIDmode,
11835 gen_frame_mem (BLKmode,
11836 gen_rtx_PRE_DEC (BLKmode,
11837 stack_pointer_rtx)),
11838 gen_rtx_UNSPEC (BLKmode,
11839 gen_rtvec (1, reg),
11840 UNSPEC_PUSH_MULT));
11841 tmp = gen_rtx_SET (VOIDmode,
11842 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11843 RTX_FRAME_RELATED_P (tmp) = 1;
11844 XVECEXP (dwarf, 0, 1) = tmp;
11846 for (i = 1; i < count; i++)
11848 reg = gen_rtx_REG (XFmode, base_reg++);
11849 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11851 tmp = gen_rtx_SET (VOIDmode,
11852 gen_frame_mem (XFmode,
11853 plus_constant (stack_pointer_rtx,
11856 RTX_FRAME_RELATED_P (tmp) = 1;
11857 XVECEXP (dwarf, 0, i + 1) = tmp;
11860 tmp = gen_rtx_SET (VOIDmode,
11862 plus_constant (stack_pointer_rtx, -12 * count));
11864 RTX_FRAME_RELATED_P (tmp) = 1;
11865 XVECEXP (dwarf, 0, 0) = tmp;
11867 par = emit_insn (par);
11868 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11874 /* Return true if the current function needs to save/restore LR. */
11877 thumb_force_lr_save (void)
11879 return !cfun->machine->lr_save_eliminated
11880 && (!leaf_function_p ()
11881 || thumb_far_jump_used_p ()
11882 || df_regs_ever_live_p (LR_REGNUM));
11886 /* Compute the distance from register FROM to register TO.
11887 These can be the arg pointer (26), the soft frame pointer (25),
11888 the stack pointer (13) or the hard frame pointer (11).
11889 In thumb mode r7 is used as the soft frame pointer, if needed.
11890 Typical stack layout looks like this:
11892 old stack pointer -> | |
11895 | | saved arguments for
11896 | | vararg functions
11899 hard FP & arg pointer -> | | \
11907 soft frame pointer -> | | /
11912 locals base pointer -> | | /
11917 current stack pointer -> | | /
11920 For a given function some or all of these stack components
11921 may not be needed, giving rise to the possibility of
11922 eliminating some of the registers.
11924 The values returned by this function must reflect the behavior
11925 of arm_expand_prologue() and arm_compute_save_reg_mask().
11927 The sign of the number returned reflects the direction of stack
11928 growth, so the values are positive for all eliminations except
11929 from the soft frame pointer to the hard frame pointer.
11931 SFP may point just inside the local variables block to ensure correct
11935 /* Calculate stack offsets. These are used to calculate register elimination
11936 offsets and in prologue/epilogue code. */
11938 static arm_stack_offsets *
11939 arm_get_frame_offsets (void)
11941 struct arm_stack_offsets *offsets;
11942 unsigned long func_type;
11945 HOST_WIDE_INT frame_size;
11947 offsets = &cfun->machine->stack_offsets;
11949 /* We need to know if we are a leaf function. Unfortunately, it
11950 is possible to be called after start_sequence has been called,
11951 which causes get_insns to return the insns for the sequence,
11952 not the function, which will cause leaf_function_p to return
11953 the incorrect result.
11955 to know about leaf functions once reload has completed, and the
11956 frame size cannot be changed after that time, so we can safely
11957 use the cached value. */
11959 if (reload_completed)
11962 /* Initially this is the size of the local variables. It will translated
11963 into an offset once we have determined the size of preceding data. */
11964 frame_size = ROUND_UP_WORD (get_frame_size ());
11966 leaf = leaf_function_p ();
11968 /* Space for variadic functions. */
11969 offsets->saved_args = current_function_pretend_args_size;
11971 /* In Thumb mode this is incorrect, but never used. */
11972 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
11976 unsigned int regno;
11978 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
11980 /* We know that SP will be doubleword aligned on entry, and we must
11981 preserve that condition at any subroutine call. We also require the
11982 soft frame pointer to be doubleword aligned. */
11984 if (TARGET_REALLY_IWMMXT)
11986 /* Check for the call-saved iWMMXt registers. */
11987 for (regno = FIRST_IWMMXT_REGNUM;
11988 regno <= LAST_IWMMXT_REGNUM;
11990 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
11994 func_type = arm_current_func_type ();
11995 if (! IS_VOLATILE (func_type))
11997 /* Space for saved FPA registers. */
11998 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
11999 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12002 /* Space for saved VFP registers. */
12003 if (TARGET_HARD_FLOAT && TARGET_VFP)
12004 saved += arm_get_vfp_saved_size ();
12007 else /* TARGET_THUMB1 */
12009 saved = bit_count (thumb1_compute_save_reg_mask ()) * 4;
12010 if (TARGET_BACKTRACE)
12014 /* Saved registers include the stack frame. */
12015 offsets->saved_regs = offsets->saved_args + saved;
12016 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12017 /* A leaf function does not need any stack alignment if it has nothing
12019 if (leaf && frame_size == 0)
12021 offsets->outgoing_args = offsets->soft_frame;
12022 offsets->locals_base = offsets->soft_frame;
12026 /* Ensure SFP has the correct alignment. */
12027 if (ARM_DOUBLEWORD_ALIGN
12028 && (offsets->soft_frame & 7))
12029 offsets->soft_frame += 4;
12031 offsets->locals_base = offsets->soft_frame + frame_size;
12032 offsets->outgoing_args = (offsets->locals_base
12033 + current_function_outgoing_args_size);
12035 if (ARM_DOUBLEWORD_ALIGN)
12037 /* Ensure SP remains doubleword aligned. */
12038 if (offsets->outgoing_args & 7)
12039 offsets->outgoing_args += 4;
12040 gcc_assert (!(offsets->outgoing_args & 7));
12047 /* Calculate the relative offsets for the different stack pointers. Positive
12048 offsets are in the direction of stack growth. */
12051 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12053 arm_stack_offsets *offsets;
12055 offsets = arm_get_frame_offsets ();
12057 /* OK, now we have enough information to compute the distances.
12058 There must be an entry in these switch tables for each pair
12059 of registers in ELIMINABLE_REGS, even if some of the entries
12060 seem to be redundant or useless. */
12063 case ARG_POINTER_REGNUM:
12066 case THUMB_HARD_FRAME_POINTER_REGNUM:
12069 case FRAME_POINTER_REGNUM:
12070 /* This is the reverse of the soft frame pointer
12071 to hard frame pointer elimination below. */
12072 return offsets->soft_frame - offsets->saved_args;
12074 case ARM_HARD_FRAME_POINTER_REGNUM:
12075 /* If there is no stack frame then the hard
12076 frame pointer and the arg pointer coincide. */
12077 if (offsets->frame == offsets->saved_regs)
12079 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12080 return (frame_pointer_needed
12081 && cfun->static_chain_decl != NULL
12082 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12084 case STACK_POINTER_REGNUM:
12085 /* If nothing has been pushed on the stack at all
12086 then this will return -4. This *is* correct! */
12087 return offsets->outgoing_args - (offsets->saved_args + 4);
12090 gcc_unreachable ();
12092 gcc_unreachable ();
12094 case FRAME_POINTER_REGNUM:
12097 case THUMB_HARD_FRAME_POINTER_REGNUM:
12100 case ARM_HARD_FRAME_POINTER_REGNUM:
12101 /* The hard frame pointer points to the top entry in the
12102 stack frame. The soft frame pointer to the bottom entry
12103 in the stack frame. If there is no stack frame at all,
12104 then they are identical. */
12106 return offsets->frame - offsets->soft_frame;
12108 case STACK_POINTER_REGNUM:
12109 return offsets->outgoing_args - offsets->soft_frame;
12112 gcc_unreachable ();
12114 gcc_unreachable ();
12117 /* You cannot eliminate from the stack pointer.
12118 In theory you could eliminate from the hard frame
12119 pointer to the stack pointer, but this will never
12120 happen, since if a stack frame is not needed the
12121 hard frame pointer will never be used. */
12122 gcc_unreachable ();
12127 /* Emit RTL to save coprocessor registers on function entry. Returns the
12128 number of bytes pushed. */
12131 arm_save_coproc_regs(void)
12133 int saved_size = 0;
12135 unsigned start_reg;
12138 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12139 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12141 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12142 insn = gen_rtx_MEM (V2SImode, insn);
12143 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12144 RTX_FRAME_RELATED_P (insn) = 1;
12148 /* Save any floating point call-saved registers used by this
12150 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12152 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12153 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12155 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12156 insn = gen_rtx_MEM (XFmode, insn);
12157 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12158 RTX_FRAME_RELATED_P (insn) = 1;
12164 start_reg = LAST_FPA_REGNUM;
12166 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12168 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12170 if (start_reg - reg == 3)
12172 insn = emit_sfm (reg, 4);
12173 RTX_FRAME_RELATED_P (insn) = 1;
12175 start_reg = reg - 1;
12180 if (start_reg != reg)
12182 insn = emit_sfm (reg + 1, start_reg - reg);
12183 RTX_FRAME_RELATED_P (insn) = 1;
12184 saved_size += (start_reg - reg) * 12;
12186 start_reg = reg - 1;
12190 if (start_reg != reg)
12192 insn = emit_sfm (reg + 1, start_reg - reg);
12193 saved_size += (start_reg - reg) * 12;
12194 RTX_FRAME_RELATED_P (insn) = 1;
12197 if (TARGET_HARD_FLOAT && TARGET_VFP)
12199 start_reg = FIRST_VFP_REGNUM;
12201 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12203 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12204 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12206 if (start_reg != reg)
12207 saved_size += vfp_emit_fstmd (start_reg,
12208 (reg - start_reg) / 2);
12209 start_reg = reg + 2;
12212 if (start_reg != reg)
12213 saved_size += vfp_emit_fstmd (start_reg,
12214 (reg - start_reg) / 2);
12220 /* Set the Thumb frame pointer from the stack pointer. */
12223 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12225 HOST_WIDE_INT amount;
12228 amount = offsets->outgoing_args - offsets->locals_base;
12230 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12231 stack_pointer_rtx, GEN_INT (amount)));
12234 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12235 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12236 hard_frame_pointer_rtx,
12237 stack_pointer_rtx));
12238 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12239 plus_constant (stack_pointer_rtx, amount));
12240 RTX_FRAME_RELATED_P (dwarf) = 1;
12241 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12245 RTX_FRAME_RELATED_P (insn) = 1;
12248 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12251 arm_expand_prologue (void)
12256 unsigned long live_regs_mask;
12257 unsigned long func_type;
12259 int saved_pretend_args = 0;
12260 int saved_regs = 0;
12261 unsigned HOST_WIDE_INT args_to_push;
12262 arm_stack_offsets *offsets;
12264 func_type = arm_current_func_type ();
12266 /* Naked functions don't have prologues. */
12267 if (IS_NAKED (func_type))
12270 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12271 args_to_push = current_function_pretend_args_size;
12273 /* Compute which register we will have to save onto the stack. */
12274 live_regs_mask = arm_compute_save_reg_mask ();
12276 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12278 if (IS_STACKALIGN (func_type))
12283 /* Handle a word-aligned stack pointer. We generate the following:
12288 <save and restore r0 in normal prologue/epilogue>
12292 The unwinder doesn't need to know about the stack realignment.
12293 Just tell it we saved SP in r0. */
12294 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12296 r0 = gen_rtx_REG (SImode, 0);
12297 r1 = gen_rtx_REG (SImode, 1);
12298 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12299 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12300 insn = gen_movsi (r0, stack_pointer_rtx);
12301 RTX_FRAME_RELATED_P (insn) = 1;
12302 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12303 dwarf, REG_NOTES (insn));
12305 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12306 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12309 if (frame_pointer_needed && TARGET_ARM)
12311 if (IS_INTERRUPT (func_type))
12313 /* Interrupt functions must not corrupt any registers.
12314 Creating a frame pointer however, corrupts the IP
12315 register, so we must push it first. */
12316 insn = emit_multi_reg_push (1 << IP_REGNUM);
12318 /* Do not set RTX_FRAME_RELATED_P on this insn.
12319 The dwarf stack unwinding code only wants to see one
12320 stack decrement per function, and this is not it. If
12321 this instruction is labeled as being part of the frame
12322 creation sequence then dwarf2out_frame_debug_expr will
12323 die when it encounters the assignment of IP to FP
12324 later on, since the use of SP here establishes SP as
12325 the CFA register and not IP.
12327 Anyway this instruction is not really part of the stack
12328 frame creation although it is part of the prologue. */
12330 else if (IS_NESTED (func_type))
12332 /* The Static chain register is the same as the IP register
12333 used as a scratch register during stack frame creation.
12334 To get around this need to find somewhere to store IP
12335 whilst the frame is being created. We try the following
12338 1. The last argument register.
12339 2. A slot on the stack above the frame. (This only
12340 works if the function is not a varargs function).
12341 3. Register r3, after pushing the argument registers
12344 Note - we only need to tell the dwarf2 backend about the SP
12345 adjustment in the second variant; the static chain register
12346 doesn't need to be unwound, as it doesn't contain a value
12347 inherited from the caller. */
12349 if (df_regs_ever_live_p (3) == false)
12350 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12351 else if (args_to_push == 0)
12355 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12356 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12359 /* Just tell the dwarf backend that we adjusted SP. */
12360 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12361 plus_constant (stack_pointer_rtx,
12363 RTX_FRAME_RELATED_P (insn) = 1;
12364 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12365 dwarf, REG_NOTES (insn));
12369 /* Store the args on the stack. */
12370 if (cfun->machine->uses_anonymous_args)
12371 insn = emit_multi_reg_push
12372 ((0xf0 >> (args_to_push / 4)) & 0xf);
12375 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12376 GEN_INT (- args_to_push)));
12378 RTX_FRAME_RELATED_P (insn) = 1;
12380 saved_pretend_args = 1;
12381 fp_offset = args_to_push;
12384 /* Now reuse r3 to preserve IP. */
12385 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12389 insn = emit_set_insn (ip_rtx,
12390 plus_constant (stack_pointer_rtx, fp_offset));
12391 RTX_FRAME_RELATED_P (insn) = 1;
12396 /* Push the argument registers, or reserve space for them. */
12397 if (cfun->machine->uses_anonymous_args)
12398 insn = emit_multi_reg_push
12399 ((0xf0 >> (args_to_push / 4)) & 0xf);
12402 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12403 GEN_INT (- args_to_push)));
12404 RTX_FRAME_RELATED_P (insn) = 1;
12407 /* If this is an interrupt service routine, and the link register
12408 is going to be pushed, and we are not creating a stack frame,
12409 (which would involve an extra push of IP and a pop in the epilogue)
12410 subtracting four from LR now will mean that the function return
12411 can be done with a single instruction. */
12412 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12413 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12414 && ! frame_pointer_needed
12417 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12419 emit_set_insn (lr, plus_constant (lr, -4));
12422 if (live_regs_mask)
12424 insn = emit_multi_reg_push (live_regs_mask);
12425 saved_regs += bit_count (live_regs_mask) * 4;
12426 RTX_FRAME_RELATED_P (insn) = 1;
12429 if (! IS_VOLATILE (func_type))
12430 saved_regs += arm_save_coproc_regs ();
12432 if (frame_pointer_needed && TARGET_ARM)
12434 /* Create the new frame pointer. */
12436 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12437 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12438 RTX_FRAME_RELATED_P (insn) = 1;
12440 if (IS_NESTED (func_type))
12442 /* Recover the static chain register. */
12443 if (!df_regs_ever_live_p (3)
12444 || saved_pretend_args)
12445 insn = gen_rtx_REG (SImode, 3);
12446 else /* if (current_function_pretend_args_size == 0) */
12448 insn = plus_constant (hard_frame_pointer_rtx, 4);
12449 insn = gen_frame_mem (SImode, insn);
12451 emit_set_insn (ip_rtx, insn);
12452 /* Add a USE to stop propagate_one_insn() from barfing. */
12453 emit_insn (gen_prologue_use (ip_rtx));
12458 offsets = arm_get_frame_offsets ();
12459 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12461 /* This add can produce multiple insns for a large constant, so we
12462 need to get tricky. */
12463 rtx last = get_last_insn ();
12465 amount = GEN_INT (offsets->saved_args + saved_regs
12466 - offsets->outgoing_args);
12468 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12472 last = last ? NEXT_INSN (last) : get_insns ();
12473 RTX_FRAME_RELATED_P (last) = 1;
12475 while (last != insn);
12477 /* If the frame pointer is needed, emit a special barrier that
12478 will prevent the scheduler from moving stores to the frame
12479 before the stack adjustment. */
12480 if (frame_pointer_needed)
12481 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12482 hard_frame_pointer_rtx));
12486 if (frame_pointer_needed && TARGET_THUMB2)
12487 thumb_set_frame_pointer (offsets);
12489 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12491 unsigned long mask;
12493 mask = live_regs_mask;
12494 mask &= THUMB2_WORK_REGS;
12495 if (!IS_NESTED (func_type))
12496 mask |= (1 << IP_REGNUM);
12497 arm_load_pic_register (mask);
12500 /* If we are profiling, make sure no instructions are scheduled before
12501 the call to mcount. Similarly if the user has requested no
12502 scheduling in the prolog. Similarly if we want non-call exceptions
12503 using the EABI unwinder, to prevent faulting instructions from being
12504 swapped with a stack adjustment. */
12505 if (current_function_profile || !TARGET_SCHED_PROLOG
12506 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12507 emit_insn (gen_blockage ());
12509 /* If the link register is being kept alive, with the return address in it,
12510 then make sure that it does not get reused by the ce2 pass. */
12511 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12512 cfun->machine->lr_save_eliminated = 1;
12515 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12517 arm_print_condition (FILE *stream)
12519 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12521 /* Branch conversion is not implemented for Thumb-2. */
12524 output_operand_lossage ("predicated Thumb instruction");
12527 if (current_insn_predicate != NULL)
12529 output_operand_lossage
12530 ("predicated instruction in conditional sequence");
12534 fputs (arm_condition_codes[arm_current_cc], stream);
12536 else if (current_insn_predicate)
12538 enum arm_cond_code code;
12542 output_operand_lossage ("predicated Thumb instruction");
12546 code = get_arm_condition_code (current_insn_predicate);
12547 fputs (arm_condition_codes[code], stream);
12552 /* If CODE is 'd', then the X is a condition operand and the instruction
12553 should only be executed if the condition is true.
12554 if CODE is 'D', then the X is a condition operand and the instruction
12555 should only be executed if the condition is false: however, if the mode
12556 of the comparison is CCFPEmode, then always execute the instruction -- we
12557 do this because in these circumstances !GE does not necessarily imply LT;
12558 in these cases the instruction pattern will take care to make sure that
12559 an instruction containing %d will follow, thereby undoing the effects of
12560 doing this instruction unconditionally.
12561 If CODE is 'N' then X is a floating point operand that must be negated
12563 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12564 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12566 arm_print_operand (FILE *stream, rtx x, int code)
12571 fputs (ASM_COMMENT_START, stream);
12575 fputs (user_label_prefix, stream);
12579 fputs (REGISTER_PREFIX, stream);
12583 arm_print_condition (stream);
12587 /* Nothing in unified syntax, otherwise the current condition code. */
12588 if (!TARGET_UNIFIED_ASM)
12589 arm_print_condition (stream);
12593 /* The current condition code in unified syntax, otherwise nothing. */
12594 if (TARGET_UNIFIED_ASM)
12595 arm_print_condition (stream);
12599 /* The current condition code for a condition code setting instruction.
12600 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12601 if (TARGET_UNIFIED_ASM)
12603 fputc('s', stream);
12604 arm_print_condition (stream);
12608 arm_print_condition (stream);
12609 fputc('s', stream);
12614 /* If the instruction is conditionally executed then print
12615 the current condition code, otherwise print 's'. */
12616 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12617 if (current_insn_predicate)
12618 arm_print_condition (stream);
12620 fputc('s', stream);
12623 /* %# is a "break" sequence. It doesn't output anything, but is used to
12624 seperate e.g. operand numbers from following text, if that text consists
12625 of further digits which we don't want to be part of the operand
12633 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12634 r = REAL_VALUE_NEGATE (r);
12635 fprintf (stream, "%s", fp_const_from_val (&r));
12639 /* An integer without a preceding # sign. */
12641 gcc_assert (GET_CODE (x) == CONST_INT);
12642 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12646 if (GET_CODE (x) == CONST_INT)
12649 val = ARM_SIGN_EXTEND (~INTVAL (x));
12650 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12654 putc ('~', stream);
12655 output_addr_const (stream, x);
12660 /* The low 16 bits of an immediate constant. */
12661 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12665 fprintf (stream, "%s", arithmetic_instr (x, 1));
12668 /* Truncate Cirrus shift counts. */
12670 if (GET_CODE (x) == CONST_INT)
12672 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12675 arm_print_operand (stream, x, 0);
12679 fprintf (stream, "%s", arithmetic_instr (x, 0));
12687 if (!shift_operator (x, SImode))
12689 output_operand_lossage ("invalid shift operand");
12693 shift = shift_op (x, &val);
12697 fprintf (stream, ", %s ", shift);
12699 arm_print_operand (stream, XEXP (x, 1), 0);
12701 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12706 /* An explanation of the 'Q', 'R' and 'H' register operands:
12708 In a pair of registers containing a DI or DF value the 'Q'
12709 operand returns the register number of the register containing
12710 the least significant part of the value. The 'R' operand returns
12711 the register number of the register containing the most
12712 significant part of the value.
12714 The 'H' operand returns the higher of the two register numbers.
12715 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12716 same as the 'Q' operand, since the most significant part of the
12717 value is held in the lower number register. The reverse is true
12718 on systems where WORDS_BIG_ENDIAN is false.
12720 The purpose of these operands is to distinguish between cases
12721 where the endian-ness of the values is important (for example
12722 when they are added together), and cases where the endian-ness
12723 is irrelevant, but the order of register operations is important.
12724 For example when loading a value from memory into a register
12725 pair, the endian-ness does not matter. Provided that the value
12726 from the lower memory address is put into the lower numbered
12727 register, and the value from the higher address is put into the
12728 higher numbered register, the load will work regardless of whether
12729 the value being loaded is big-wordian or little-wordian. The
12730 order of the two register loads can matter however, if the address
12731 of the memory location is actually held in one of the registers
12732 being overwritten by the load. */
12734 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12736 output_operand_lossage ("invalid operand for code '%c'", code);
12740 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12744 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12746 output_operand_lossage ("invalid operand for code '%c'", code);
12750 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12754 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12756 output_operand_lossage ("invalid operand for code '%c'", code);
12760 asm_fprintf (stream, "%r", REGNO (x) + 1);
12764 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12766 output_operand_lossage ("invalid operand for code '%c'", code);
12770 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12774 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12776 output_operand_lossage ("invalid operand for code '%c'", code);
12780 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12784 asm_fprintf (stream, "%r",
12785 GET_CODE (XEXP (x, 0)) == REG
12786 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12790 asm_fprintf (stream, "{%r-%r}",
12792 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12795 /* Like 'M', but writing doubleword vector registers, for use by Neon
12799 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12800 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12802 asm_fprintf (stream, "{d%d}", regno);
12804 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12809 /* CONST_TRUE_RTX means always -- that's the default. */
12810 if (x == const_true_rtx)
12813 if (!COMPARISON_P (x))
12815 output_operand_lossage ("invalid operand for code '%c'", code);
12819 fputs (arm_condition_codes[get_arm_condition_code (x)],
12824 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12825 want to do that. */
12826 if (x == const_true_rtx)
12828 output_operand_lossage ("instruction never executed");
12831 if (!COMPARISON_P (x))
12833 output_operand_lossage ("invalid operand for code '%c'", code);
12837 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12838 (get_arm_condition_code (x))],
12842 /* Cirrus registers can be accessed in a variety of ways:
12843 single floating point (f)
12844 double floating point (d)
12846 64bit integer (dx). */
12847 case 'W': /* Cirrus register in F mode. */
12848 case 'X': /* Cirrus register in D mode. */
12849 case 'Y': /* Cirrus register in FX mode. */
12850 case 'Z': /* Cirrus register in DX mode. */
12851 gcc_assert (GET_CODE (x) == REG
12852 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12854 fprintf (stream, "mv%s%s",
12856 : code == 'X' ? "d"
12857 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12861 /* Print cirrus register in the mode specified by the register's mode. */
12864 int mode = GET_MODE (x);
12866 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12868 output_operand_lossage ("invalid operand for code '%c'", code);
12872 fprintf (stream, "mv%s%s",
12873 mode == DFmode ? "d"
12874 : mode == SImode ? "fx"
12875 : mode == DImode ? "dx"
12876 : "f", reg_names[REGNO (x)] + 2);
12882 if (GET_CODE (x) != REG
12883 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
12884 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
12885 /* Bad value for wCG register number. */
12887 output_operand_lossage ("invalid operand for code '%c'", code);
12892 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
12895 /* Print an iWMMXt control register name. */
12897 if (GET_CODE (x) != CONST_INT
12899 || INTVAL (x) >= 16)
12900 /* Bad value for wC register number. */
12902 output_operand_lossage ("invalid operand for code '%c'", code);
12908 static const char * wc_reg_names [16] =
12910 "wCID", "wCon", "wCSSF", "wCASF",
12911 "wC4", "wC5", "wC6", "wC7",
12912 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
12913 "wC12", "wC13", "wC14", "wC15"
12916 fprintf (stream, wc_reg_names [INTVAL (x)]);
12920 /* Print a VFP/Neon double precision or quad precision register name. */
12924 int mode = GET_MODE (x);
12925 int is_quad = (code == 'q');
12928 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
12930 output_operand_lossage ("invalid operand for code '%c'", code);
12934 if (GET_CODE (x) != REG
12935 || !IS_VFP_REGNUM (REGNO (x)))
12937 output_operand_lossage ("invalid operand for code '%c'", code);
12942 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
12943 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
12945 output_operand_lossage ("invalid operand for code '%c'", code);
12949 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
12950 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
12954 /* These two codes print the low/high doubleword register of a Neon quad
12955 register, respectively. For pair-structure types, can also print
12956 low/high quadword registers. */
12960 int mode = GET_MODE (x);
12963 if ((GET_MODE_SIZE (mode) != 16
12964 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
12966 output_operand_lossage ("invalid operand for code '%c'", code);
12971 if (!NEON_REGNO_OK_FOR_QUAD (regno))
12973 output_operand_lossage ("invalid operand for code '%c'", code);
12977 if (GET_MODE_SIZE (mode) == 16)
12978 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
12979 + (code == 'f' ? 1 : 0));
12981 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
12982 + (code == 'f' ? 1 : 0));
12986 /* Print a VFPv3 floating-point constant, represented as an integer
12990 int index = vfp3_const_double_index (x);
12991 gcc_assert (index != -1);
12992 fprintf (stream, "%d", index);
12996 /* Print bits representing opcode features for Neon.
12998 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
12999 and polynomials as unsigned.
13001 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13003 Bit 2 is 1 for rounding functions, 0 otherwise. */
13005 /* Identify the type as 's', 'u', 'p' or 'f'. */
13008 HOST_WIDE_INT bits = INTVAL (x);
13009 fputc ("uspf"[bits & 3], stream);
13013 /* Likewise, but signed and unsigned integers are both 'i'. */
13016 HOST_WIDE_INT bits = INTVAL (x);
13017 fputc ("iipf"[bits & 3], stream);
13021 /* As for 'T', but emit 'u' instead of 'p'. */
13024 HOST_WIDE_INT bits = INTVAL (x);
13025 fputc ("usuf"[bits & 3], stream);
13029 /* Bit 2: rounding (vs none). */
13032 HOST_WIDE_INT bits = INTVAL (x);
13033 fputs ((bits & 4) != 0 ? "r" : "", stream);
13040 output_operand_lossage ("missing operand");
13044 switch (GET_CODE (x))
13047 asm_fprintf (stream, "%r", REGNO (x));
13051 output_memory_reference_mode = GET_MODE (x);
13052 output_address (XEXP (x, 0));
13059 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13060 sizeof (fpstr), 0, 1);
13061 fprintf (stream, "#%s", fpstr);
13064 fprintf (stream, "#%s", fp_immediate_constant (x));
13068 gcc_assert (GET_CODE (x) != NEG);
13069 fputc ('#', stream);
13070 output_addr_const (stream, x);
13076 #ifndef AOF_ASSEMBLER
13077 /* Target hook for assembling integer objects. The ARM version needs to
13078 handle word-sized values specially. */
13080 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13082 enum machine_mode mode;
13084 if (size == UNITS_PER_WORD && aligned_p)
13086 fputs ("\t.word\t", asm_out_file);
13087 output_addr_const (asm_out_file, x);
13089 /* Mark symbols as position independent. We only do this in the
13090 .text segment, not in the .data segment. */
13091 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13092 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13094 /* See legitimize_pic_address for an explanation of the
13095 TARGET_VXWORKS_RTP check. */
13096 if (TARGET_VXWORKS_RTP
13097 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13098 fputs ("(GOT)", asm_out_file);
13100 fputs ("(GOTOFF)", asm_out_file);
13102 fputc ('\n', asm_out_file);
13106 mode = GET_MODE (x);
13108 if (arm_vector_mode_supported_p (mode))
13111 unsigned int invmask = 0, parts_per_word;
13113 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13115 units = CONST_VECTOR_NUNITS (x);
13116 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13118 /* For big-endian Neon vectors, we must permute the vector to the form
13119 which, when loaded by a VLDR or VLDM instruction, will give a vector
13120 with the elements in the right order. */
13121 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13123 parts_per_word = UNITS_PER_WORD / size;
13124 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13125 support those anywhere yet. */
13126 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13129 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13130 for (i = 0; i < units; i++)
13132 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13134 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13137 for (i = 0; i < units; i++)
13139 rtx elt = CONST_VECTOR_ELT (x, i);
13140 REAL_VALUE_TYPE rval;
13142 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13145 (rval, GET_MODE_INNER (mode),
13146 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13152 return default_assemble_integer (x, size, aligned_p);
13156 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13160 if (!TARGET_AAPCS_BASED)
13163 default_named_section_asm_out_constructor
13164 : default_named_section_asm_out_destructor) (symbol, priority);
13168 /* Put these in the .init_array section, using a special relocation. */
13169 if (priority != DEFAULT_INIT_PRIORITY)
13172 sprintf (buf, "%s.%.5u",
13173 is_ctor ? ".init_array" : ".fini_array",
13175 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13182 switch_to_section (s);
13183 assemble_align (POINTER_SIZE);
13184 fputs ("\t.word\t", asm_out_file);
13185 output_addr_const (asm_out_file, symbol);
13186 fputs ("(target1)\n", asm_out_file);
13189 /* Add a function to the list of static constructors. */
13192 arm_elf_asm_constructor (rtx symbol, int priority)
13194 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13197 /* Add a function to the list of static destructors. */
13200 arm_elf_asm_destructor (rtx symbol, int priority)
13202 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13206 /* A finite state machine takes care of noticing whether or not instructions
13207 can be conditionally executed, and thus decrease execution time and code
13208 size by deleting branch instructions. The fsm is controlled by
13209 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13211 /* The state of the fsm controlling condition codes are:
13212 0: normal, do nothing special
13213 1: make ASM_OUTPUT_OPCODE not output this instruction
13214 2: make ASM_OUTPUT_OPCODE not output this instruction
13215 3: make instructions conditional
13216 4: make instructions conditional
13218 State transitions (state->state by whom under condition):
13219 0 -> 1 final_prescan_insn if the `target' is a label
13220 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13221 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13222 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13223 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13224 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13225 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13226 (the target insn is arm_target_insn).
13228 If the jump clobbers the conditions then we use states 2 and 4.
13230 A similar thing can be done with conditional return insns.
13232 XXX In case the `target' is an unconditional branch, this conditionalising
13233 of the instructions always reduces code size, but not always execution
13234 time. But then, I want to reduce the code size to somewhere near what
13235 /bin/cc produces. */
13237 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13238 instructions. When a COND_EXEC instruction is seen the subsequent
13239 instructions are scanned so that multiple conditional instructions can be
13240 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13241 specify the length and true/false mask for the IT block. These will be
13242 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13244 /* Returns the index of the ARM condition code string in
13245 `arm_condition_codes'. COMPARISON should be an rtx like
13246 `(eq (...) (...))'. */
13247 static enum arm_cond_code
13248 get_arm_condition_code (rtx comparison)
13250 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13252 enum rtx_code comp_code = GET_CODE (comparison);
13254 if (GET_MODE_CLASS (mode) != MODE_CC)
13255 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13256 XEXP (comparison, 1));
13260 case CC_DNEmode: code = ARM_NE; goto dominance;
13261 case CC_DEQmode: code = ARM_EQ; goto dominance;
13262 case CC_DGEmode: code = ARM_GE; goto dominance;
13263 case CC_DGTmode: code = ARM_GT; goto dominance;
13264 case CC_DLEmode: code = ARM_LE; goto dominance;
13265 case CC_DLTmode: code = ARM_LT; goto dominance;
13266 case CC_DGEUmode: code = ARM_CS; goto dominance;
13267 case CC_DGTUmode: code = ARM_HI; goto dominance;
13268 case CC_DLEUmode: code = ARM_LS; goto dominance;
13269 case CC_DLTUmode: code = ARM_CC;
13272 gcc_assert (comp_code == EQ || comp_code == NE);
13274 if (comp_code == EQ)
13275 return ARM_INVERSE_CONDITION_CODE (code);
13281 case NE: return ARM_NE;
13282 case EQ: return ARM_EQ;
13283 case GE: return ARM_PL;
13284 case LT: return ARM_MI;
13285 default: gcc_unreachable ();
13291 case NE: return ARM_NE;
13292 case EQ: return ARM_EQ;
13293 default: gcc_unreachable ();
13299 case NE: return ARM_MI;
13300 case EQ: return ARM_PL;
13301 default: gcc_unreachable ();
13306 /* These encodings assume that AC=1 in the FPA system control
13307 byte. This allows us to handle all cases except UNEQ and
13311 case GE: return ARM_GE;
13312 case GT: return ARM_GT;
13313 case LE: return ARM_LS;
13314 case LT: return ARM_MI;
13315 case NE: return ARM_NE;
13316 case EQ: return ARM_EQ;
13317 case ORDERED: return ARM_VC;
13318 case UNORDERED: return ARM_VS;
13319 case UNLT: return ARM_LT;
13320 case UNLE: return ARM_LE;
13321 case UNGT: return ARM_HI;
13322 case UNGE: return ARM_PL;
13323 /* UNEQ and LTGT do not have a representation. */
13324 case UNEQ: /* Fall through. */
13325 case LTGT: /* Fall through. */
13326 default: gcc_unreachable ();
13332 case NE: return ARM_NE;
13333 case EQ: return ARM_EQ;
13334 case GE: return ARM_LE;
13335 case GT: return ARM_LT;
13336 case LE: return ARM_GE;
13337 case LT: return ARM_GT;
13338 case GEU: return ARM_LS;
13339 case GTU: return ARM_CC;
13340 case LEU: return ARM_CS;
13341 case LTU: return ARM_HI;
13342 default: gcc_unreachable ();
13348 case LTU: return ARM_CS;
13349 case GEU: return ARM_CC;
13350 default: gcc_unreachable ();
13356 case NE: return ARM_NE;
13357 case EQ: return ARM_EQ;
13358 case GE: return ARM_GE;
13359 case GT: return ARM_GT;
13360 case LE: return ARM_LE;
13361 case LT: return ARM_LT;
13362 case GEU: return ARM_CS;
13363 case GTU: return ARM_HI;
13364 case LEU: return ARM_LS;
13365 case LTU: return ARM_CC;
13366 default: gcc_unreachable ();
13369 default: gcc_unreachable ();
13373 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13376 thumb2_final_prescan_insn (rtx insn)
13378 rtx first_insn = insn;
13379 rtx body = PATTERN (insn);
13381 enum arm_cond_code code;
13385 /* Remove the previous insn from the count of insns to be output. */
13386 if (arm_condexec_count)
13387 arm_condexec_count--;
13389 /* Nothing to do if we are already inside a conditional block. */
13390 if (arm_condexec_count)
13393 if (GET_CODE (body) != COND_EXEC)
13396 /* Conditional jumps are implemented directly. */
13397 if (GET_CODE (insn) == JUMP_INSN)
13400 predicate = COND_EXEC_TEST (body);
13401 arm_current_cc = get_arm_condition_code (predicate);
13403 n = get_attr_ce_count (insn);
13404 arm_condexec_count = 1;
13405 arm_condexec_mask = (1 << n) - 1;
13406 arm_condexec_masklen = n;
13407 /* See if subsequent instructions can be combined into the same block. */
13410 insn = next_nonnote_insn (insn);
13412 /* Jumping into the middle of an IT block is illegal, so a label or
13413 barrier terminates the block. */
13414 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13417 body = PATTERN (insn);
13418 /* USE and CLOBBER aren't really insns, so just skip them. */
13419 if (GET_CODE (body) == USE
13420 || GET_CODE (body) == CLOBBER)
13423 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13424 if (GET_CODE (body) != COND_EXEC)
13426 /* Allow up to 4 conditionally executed instructions in a block. */
13427 n = get_attr_ce_count (insn);
13428 if (arm_condexec_masklen + n > 4)
13431 predicate = COND_EXEC_TEST (body);
13432 code = get_arm_condition_code (predicate);
13433 mask = (1 << n) - 1;
13434 if (arm_current_cc == code)
13435 arm_condexec_mask |= (mask << arm_condexec_masklen);
13436 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13439 arm_condexec_count++;
13440 arm_condexec_masklen += n;
13442 /* A jump must be the last instruction in a conditional block. */
13443 if (GET_CODE(insn) == JUMP_INSN)
13446 /* Restore recog_data (getting the attributes of other insns can
13447 destroy this array, but final.c assumes that it remains intact
13448 across this call). */
13449 extract_constrain_insn_cached (first_insn);
13453 arm_final_prescan_insn (rtx insn)
13455 /* BODY will hold the body of INSN. */
13456 rtx body = PATTERN (insn);
13458 /* This will be 1 if trying to repeat the trick, and things need to be
13459 reversed if it appears to fail. */
13462 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13463 taken are clobbered, even if the rtl suggests otherwise. It also
13464 means that we have to grub around within the jump expression to find
13465 out what the conditions are when the jump isn't taken. */
13466 int jump_clobbers = 0;
13468 /* If we start with a return insn, we only succeed if we find another one. */
13469 int seeking_return = 0;
13471 /* START_INSN will hold the insn from where we start looking. This is the
13472 first insn after the following code_label if REVERSE is true. */
13473 rtx start_insn = insn;
13475 /* If in state 4, check if the target branch is reached, in order to
13476 change back to state 0. */
13477 if (arm_ccfsm_state == 4)
13479 if (insn == arm_target_insn)
13481 arm_target_insn = NULL;
13482 arm_ccfsm_state = 0;
13487 /* If in state 3, it is possible to repeat the trick, if this insn is an
13488 unconditional branch to a label, and immediately following this branch
13489 is the previous target label which is only used once, and the label this
13490 branch jumps to is not too far off. */
13491 if (arm_ccfsm_state == 3)
13493 if (simplejump_p (insn))
13495 start_insn = next_nonnote_insn (start_insn);
13496 if (GET_CODE (start_insn) == BARRIER)
13498 /* XXX Isn't this always a barrier? */
13499 start_insn = next_nonnote_insn (start_insn);
13501 if (GET_CODE (start_insn) == CODE_LABEL
13502 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13503 && LABEL_NUSES (start_insn) == 1)
13508 else if (GET_CODE (body) == RETURN)
13510 start_insn = next_nonnote_insn (start_insn);
13511 if (GET_CODE (start_insn) == BARRIER)
13512 start_insn = next_nonnote_insn (start_insn);
13513 if (GET_CODE (start_insn) == CODE_LABEL
13514 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13515 && LABEL_NUSES (start_insn) == 1)
13518 seeking_return = 1;
13527 gcc_assert (!arm_ccfsm_state || reverse);
13528 if (GET_CODE (insn) != JUMP_INSN)
13531 /* This jump might be paralleled with a clobber of the condition codes
13532 the jump should always come first */
13533 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13534 body = XVECEXP (body, 0, 0);
13537 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13538 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13541 int fail = FALSE, succeed = FALSE;
13542 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13543 int then_not_else = TRUE;
13544 rtx this_insn = start_insn, label = 0;
13546 /* If the jump cannot be done with one instruction, we cannot
13547 conditionally execute the instruction in the inverse case. */
13548 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13554 /* Register the insn jumped to. */
13557 if (!seeking_return)
13558 label = XEXP (SET_SRC (body), 0);
13560 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13561 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13562 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13564 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13565 then_not_else = FALSE;
13567 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13568 seeking_return = 1;
13569 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13571 seeking_return = 1;
13572 then_not_else = FALSE;
13575 gcc_unreachable ();
13577 /* See how many insns this branch skips, and what kind of insns. If all
13578 insns are okay, and the label or unconditional branch to the same
13579 label is not too far away, succeed. */
13580 for (insns_skipped = 0;
13581 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13585 this_insn = next_nonnote_insn (this_insn);
13589 switch (GET_CODE (this_insn))
13592 /* Succeed if it is the target label, otherwise fail since
13593 control falls in from somewhere else. */
13594 if (this_insn == label)
13598 arm_ccfsm_state = 2;
13599 this_insn = next_nonnote_insn (this_insn);
13602 arm_ccfsm_state = 1;
13610 /* Succeed if the following insn is the target label.
13612 If return insns are used then the last insn in a function
13613 will be a barrier. */
13614 this_insn = next_nonnote_insn (this_insn);
13615 if (this_insn && this_insn == label)
13619 arm_ccfsm_state = 2;
13620 this_insn = next_nonnote_insn (this_insn);
13623 arm_ccfsm_state = 1;
13631 /* The AAPCS says that conditional calls should not be
13632 used since they make interworking inefficient (the
13633 linker can't transform BL<cond> into BLX). That's
13634 only a problem if the machine has BLX. */
13641 /* Succeed if the following insn is the target label, or
13642 if the following two insns are a barrier and the
13644 this_insn = next_nonnote_insn (this_insn);
13645 if (this_insn && GET_CODE (this_insn) == BARRIER)
13646 this_insn = next_nonnote_insn (this_insn);
13648 if (this_insn && this_insn == label
13649 && insns_skipped < max_insns_skipped)
13653 arm_ccfsm_state = 2;
13654 this_insn = next_nonnote_insn (this_insn);
13657 arm_ccfsm_state = 1;
13665 /* If this is an unconditional branch to the same label, succeed.
13666 If it is to another label, do nothing. If it is conditional,
13668 /* XXX Probably, the tests for SET and the PC are
13671 scanbody = PATTERN (this_insn);
13672 if (GET_CODE (scanbody) == SET
13673 && GET_CODE (SET_DEST (scanbody)) == PC)
13675 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13676 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13678 arm_ccfsm_state = 2;
13681 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13684 /* Fail if a conditional return is undesirable (e.g. on a
13685 StrongARM), but still allow this if optimizing for size. */
13686 else if (GET_CODE (scanbody) == RETURN
13687 && !use_return_insn (TRUE, NULL)
13690 else if (GET_CODE (scanbody) == RETURN
13693 arm_ccfsm_state = 2;
13696 else if (GET_CODE (scanbody) == PARALLEL)
13698 switch (get_attr_conds (this_insn))
13708 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13713 /* Instructions using or affecting the condition codes make it
13715 scanbody = PATTERN (this_insn);
13716 if (!(GET_CODE (scanbody) == SET
13717 || GET_CODE (scanbody) == PARALLEL)
13718 || get_attr_conds (this_insn) != CONDS_NOCOND)
13721 /* A conditional cirrus instruction must be followed by
13722 a non Cirrus instruction. However, since we
13723 conditionalize instructions in this function and by
13724 the time we get here we can't add instructions
13725 (nops), because shorten_branches() has already been
13726 called, we will disable conditionalizing Cirrus
13727 instructions to be safe. */
13728 if (GET_CODE (scanbody) != USE
13729 && GET_CODE (scanbody) != CLOBBER
13730 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13740 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13741 arm_target_label = CODE_LABEL_NUMBER (label);
13744 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13746 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13748 this_insn = next_nonnote_insn (this_insn);
13749 gcc_assert (!this_insn
13750 || (GET_CODE (this_insn) != BARRIER
13751 && GET_CODE (this_insn) != CODE_LABEL));
13755 /* Oh, dear! we ran off the end.. give up. */
13756 extract_constrain_insn_cached (insn);
13757 arm_ccfsm_state = 0;
13758 arm_target_insn = NULL;
13761 arm_target_insn = this_insn;
13765 gcc_assert (!reverse);
13767 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13769 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13770 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13771 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13772 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13776 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13779 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13783 if (reverse || then_not_else)
13784 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13787 /* Restore recog_data (getting the attributes of other insns can
13788 destroy this array, but final.c assumes that it remains intact
13789 across this call. */
13790 extract_constrain_insn_cached (insn);
13794 /* Output IT instructions. */
13796 thumb2_asm_output_opcode (FILE * stream)
13801 if (arm_condexec_mask)
13803 for (n = 0; n < arm_condexec_masklen; n++)
13804 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13806 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13807 arm_condition_codes[arm_current_cc]);
13808 arm_condexec_mask = 0;
13812 /* Returns true if REGNO is a valid register
13813 for holding a quantity of type MODE. */
13815 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13817 if (GET_MODE_CLASS (mode) == MODE_CC)
13818 return (regno == CC_REGNUM
13819 || (TARGET_HARD_FLOAT && TARGET_VFP
13820 && regno == VFPCC_REGNUM));
13823 /* For the Thumb we only allow values bigger than SImode in
13824 registers 0 - 6, so that there is always a second low
13825 register available to hold the upper part of the value.
13826 We probably we ought to ensure that the register is the
13827 start of an even numbered register pair. */
13828 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13830 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13831 && IS_CIRRUS_REGNUM (regno))
13832 /* We have outlawed SI values in Cirrus registers because they
13833 reside in the lower 32 bits, but SF values reside in the
13834 upper 32 bits. This causes gcc all sorts of grief. We can't
13835 even split the registers into pairs because Cirrus SI values
13836 get sign extended to 64bits-- aldyh. */
13837 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13839 if (TARGET_HARD_FLOAT && TARGET_VFP
13840 && IS_VFP_REGNUM (regno))
13842 if (mode == SFmode || mode == SImode)
13843 return VFP_REGNO_OK_FOR_SINGLE (regno);
13845 if (mode == DFmode)
13846 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13849 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13850 || (VALID_NEON_QREG_MODE (mode)
13851 && NEON_REGNO_OK_FOR_QUAD (regno))
13852 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13853 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13854 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13855 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13856 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13861 if (TARGET_REALLY_IWMMXT)
13863 if (IS_IWMMXT_GR_REGNUM (regno))
13864 return mode == SImode;
13866 if (IS_IWMMXT_REGNUM (regno))
13867 return VALID_IWMMXT_REG_MODE (mode);
13870 /* We allow any value to be stored in the general registers.
13871 Restrict doubleword quantities to even register pairs so that we can
13872 use ldrd. Do not allow Neon structure opaque modes in general registers;
13873 they would use too many. */
13874 if (regno <= LAST_ARM_REGNUM)
13875 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
13876 && !VALID_NEON_STRUCT_MODE (mode);
13878 if (regno == FRAME_POINTER_REGNUM
13879 || regno == ARG_POINTER_REGNUM)
13880 /* We only allow integers in the fake hard registers. */
13881 return GET_MODE_CLASS (mode) == MODE_INT;
13883 /* The only registers left are the FPA registers
13884 which we only allow to hold FP values. */
13885 return (TARGET_HARD_FLOAT && TARGET_FPA
13886 && GET_MODE_CLASS (mode) == MODE_FLOAT
13887 && regno >= FIRST_FPA_REGNUM
13888 && regno <= LAST_FPA_REGNUM);
13891 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
13892 not used in arm mode. */
13894 arm_regno_class (int regno)
13898 if (regno == STACK_POINTER_REGNUM)
13900 if (regno == CC_REGNUM)
13907 if (TARGET_THUMB2 && regno < 8)
13910 if ( regno <= LAST_ARM_REGNUM
13911 || regno == FRAME_POINTER_REGNUM
13912 || regno == ARG_POINTER_REGNUM)
13913 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
13915 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
13916 return TARGET_THUMB2 ? CC_REG : NO_REGS;
13918 if (IS_CIRRUS_REGNUM (regno))
13919 return CIRRUS_REGS;
13921 if (IS_VFP_REGNUM (regno))
13923 if (regno <= D7_VFP_REGNUM)
13924 return VFP_D0_D7_REGS;
13925 else if (regno <= LAST_LO_VFP_REGNUM)
13926 return VFP_LO_REGS;
13928 return VFP_HI_REGS;
13931 if (IS_IWMMXT_REGNUM (regno))
13932 return IWMMXT_REGS;
13934 if (IS_IWMMXT_GR_REGNUM (regno))
13935 return IWMMXT_GR_REGS;
13940 /* Handle a special case when computing the offset
13941 of an argument from the frame pointer. */
13943 arm_debugger_arg_offset (int value, rtx addr)
13947 /* We are only interested if dbxout_parms() failed to compute the offset. */
13951 /* We can only cope with the case where the address is held in a register. */
13952 if (GET_CODE (addr) != REG)
13955 /* If we are using the frame pointer to point at the argument, then
13956 an offset of 0 is correct. */
13957 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
13960 /* If we are using the stack pointer to point at the
13961 argument, then an offset of 0 is correct. */
13962 /* ??? Check this is consistent with thumb2 frame layout. */
13963 if ((TARGET_THUMB || !frame_pointer_needed)
13964 && REGNO (addr) == SP_REGNUM)
13967 /* Oh dear. The argument is pointed to by a register rather
13968 than being held in a register, or being stored at a known
13969 offset from the frame pointer. Since GDB only understands
13970 those two kinds of argument we must translate the address
13971 held in the register into an offset from the frame pointer.
13972 We do this by searching through the insns for the function
13973 looking to see where this register gets its value. If the
13974 register is initialized from the frame pointer plus an offset
13975 then we are in luck and we can continue, otherwise we give up.
13977 This code is exercised by producing debugging information
13978 for a function with arguments like this:
13980 double func (double a, double b, int c, double d) {return d;}
13982 Without this code the stab for parameter 'd' will be set to
13983 an offset of 0 from the frame pointer, rather than 8. */
13985 /* The if() statement says:
13987 If the insn is a normal instruction
13988 and if the insn is setting the value in a register
13989 and if the register being set is the register holding the address of the argument
13990 and if the address is computing by an addition
13991 that involves adding to a register
13992 which is the frame pointer
13997 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13999 if ( GET_CODE (insn) == INSN
14000 && GET_CODE (PATTERN (insn)) == SET
14001 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14002 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14003 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14004 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14005 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14008 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14017 warning (0, "unable to compute real location of stacked parameter");
14018 value = 8; /* XXX magic hack */
14024 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14027 if ((MASK) & insn_flags) \
14028 add_builtin_function ((NAME), (TYPE), (CODE), \
14029 BUILT_IN_MD, NULL, NULL_TREE); \
14033 struct builtin_description
14035 const unsigned int mask;
14036 const enum insn_code icode;
14037 const char * const name;
14038 const enum arm_builtins code;
14039 const enum rtx_code comparison;
14040 const unsigned int flag;
14043 static const struct builtin_description bdesc_2arg[] =
14045 #define IWMMXT_BUILTIN(code, string, builtin) \
14046 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14047 ARM_BUILTIN_##builtin, 0, 0 },
14049 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14050 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14051 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14052 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14053 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14054 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14055 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14056 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14057 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14058 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14059 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14060 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14061 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14062 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14063 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14064 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14065 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14066 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14067 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14068 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14069 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14070 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14071 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14072 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14073 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14074 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14075 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14076 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14077 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14078 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14079 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14080 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14081 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14082 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14083 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14084 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14085 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14086 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14087 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14088 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14089 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14090 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14091 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14092 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14093 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14094 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14095 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14096 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14097 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14098 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14099 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14100 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14101 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14102 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14103 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14104 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14105 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14106 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14108 #define IWMMXT_BUILTIN2(code, builtin) \
14109 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14111 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14112 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14113 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14114 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14115 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14116 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14117 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14118 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14119 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14120 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14121 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14122 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14123 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14124 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14125 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14126 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14127 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14128 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14129 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14130 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14131 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14132 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14133 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14134 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14135 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14136 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14137 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14138 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14139 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14140 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14141 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14142 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14145 static const struct builtin_description bdesc_1arg[] =
14147 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14148 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14149 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14150 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14151 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14152 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14153 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14154 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14155 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14156 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14157 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14158 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14159 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14160 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14161 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14162 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14163 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14164 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14167 /* Set up all the iWMMXt builtins. This is
14168 not called if TARGET_IWMMXT is zero. */
14171 arm_init_iwmmxt_builtins (void)
14173 const struct builtin_description * d;
14175 tree endlink = void_list_node;
14177 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14178 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14179 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14182 = build_function_type (integer_type_node,
14183 tree_cons (NULL_TREE, integer_type_node, endlink));
14184 tree v8qi_ftype_v8qi_v8qi_int
14185 = build_function_type (V8QI_type_node,
14186 tree_cons (NULL_TREE, V8QI_type_node,
14187 tree_cons (NULL_TREE, V8QI_type_node,
14188 tree_cons (NULL_TREE,
14191 tree v4hi_ftype_v4hi_int
14192 = build_function_type (V4HI_type_node,
14193 tree_cons (NULL_TREE, V4HI_type_node,
14194 tree_cons (NULL_TREE, integer_type_node,
14196 tree v2si_ftype_v2si_int
14197 = build_function_type (V2SI_type_node,
14198 tree_cons (NULL_TREE, V2SI_type_node,
14199 tree_cons (NULL_TREE, integer_type_node,
14201 tree v2si_ftype_di_di
14202 = build_function_type (V2SI_type_node,
14203 tree_cons (NULL_TREE, long_long_integer_type_node,
14204 tree_cons (NULL_TREE, long_long_integer_type_node,
14206 tree di_ftype_di_int
14207 = build_function_type (long_long_integer_type_node,
14208 tree_cons (NULL_TREE, long_long_integer_type_node,
14209 tree_cons (NULL_TREE, integer_type_node,
14211 tree di_ftype_di_int_int
14212 = build_function_type (long_long_integer_type_node,
14213 tree_cons (NULL_TREE, long_long_integer_type_node,
14214 tree_cons (NULL_TREE, integer_type_node,
14215 tree_cons (NULL_TREE,
14218 tree int_ftype_v8qi
14219 = build_function_type (integer_type_node,
14220 tree_cons (NULL_TREE, V8QI_type_node,
14222 tree int_ftype_v4hi
14223 = build_function_type (integer_type_node,
14224 tree_cons (NULL_TREE, V4HI_type_node,
14226 tree int_ftype_v2si
14227 = build_function_type (integer_type_node,
14228 tree_cons (NULL_TREE, V2SI_type_node,
14230 tree int_ftype_v8qi_int
14231 = build_function_type (integer_type_node,
14232 tree_cons (NULL_TREE, V8QI_type_node,
14233 tree_cons (NULL_TREE, integer_type_node,
14235 tree int_ftype_v4hi_int
14236 = build_function_type (integer_type_node,
14237 tree_cons (NULL_TREE, V4HI_type_node,
14238 tree_cons (NULL_TREE, integer_type_node,
14240 tree int_ftype_v2si_int
14241 = build_function_type (integer_type_node,
14242 tree_cons (NULL_TREE, V2SI_type_node,
14243 tree_cons (NULL_TREE, integer_type_node,
14245 tree v8qi_ftype_v8qi_int_int
14246 = build_function_type (V8QI_type_node,
14247 tree_cons (NULL_TREE, V8QI_type_node,
14248 tree_cons (NULL_TREE, integer_type_node,
14249 tree_cons (NULL_TREE,
14252 tree v4hi_ftype_v4hi_int_int
14253 = build_function_type (V4HI_type_node,
14254 tree_cons (NULL_TREE, V4HI_type_node,
14255 tree_cons (NULL_TREE, integer_type_node,
14256 tree_cons (NULL_TREE,
14259 tree v2si_ftype_v2si_int_int
14260 = build_function_type (V2SI_type_node,
14261 tree_cons (NULL_TREE, V2SI_type_node,
14262 tree_cons (NULL_TREE, integer_type_node,
14263 tree_cons (NULL_TREE,
14266 /* Miscellaneous. */
14267 tree v8qi_ftype_v4hi_v4hi
14268 = build_function_type (V8QI_type_node,
14269 tree_cons (NULL_TREE, V4HI_type_node,
14270 tree_cons (NULL_TREE, V4HI_type_node,
14272 tree v4hi_ftype_v2si_v2si
14273 = build_function_type (V4HI_type_node,
14274 tree_cons (NULL_TREE, V2SI_type_node,
14275 tree_cons (NULL_TREE, V2SI_type_node,
14277 tree v2si_ftype_v4hi_v4hi
14278 = build_function_type (V2SI_type_node,
14279 tree_cons (NULL_TREE, V4HI_type_node,
14280 tree_cons (NULL_TREE, V4HI_type_node,
14282 tree v2si_ftype_v8qi_v8qi
14283 = build_function_type (V2SI_type_node,
14284 tree_cons (NULL_TREE, V8QI_type_node,
14285 tree_cons (NULL_TREE, V8QI_type_node,
14287 tree v4hi_ftype_v4hi_di
14288 = build_function_type (V4HI_type_node,
14289 tree_cons (NULL_TREE, V4HI_type_node,
14290 tree_cons (NULL_TREE,
14291 long_long_integer_type_node,
14293 tree v2si_ftype_v2si_di
14294 = build_function_type (V2SI_type_node,
14295 tree_cons (NULL_TREE, V2SI_type_node,
14296 tree_cons (NULL_TREE,
14297 long_long_integer_type_node,
14299 tree void_ftype_int_int
14300 = build_function_type (void_type_node,
14301 tree_cons (NULL_TREE, integer_type_node,
14302 tree_cons (NULL_TREE, integer_type_node,
14305 = build_function_type (long_long_unsigned_type_node, endlink);
14307 = build_function_type (long_long_integer_type_node,
14308 tree_cons (NULL_TREE, V8QI_type_node,
14311 = build_function_type (long_long_integer_type_node,
14312 tree_cons (NULL_TREE, V4HI_type_node,
14315 = build_function_type (long_long_integer_type_node,
14316 tree_cons (NULL_TREE, V2SI_type_node,
14318 tree v2si_ftype_v4hi
14319 = build_function_type (V2SI_type_node,
14320 tree_cons (NULL_TREE, V4HI_type_node,
14322 tree v4hi_ftype_v8qi
14323 = build_function_type (V4HI_type_node,
14324 tree_cons (NULL_TREE, V8QI_type_node,
14327 tree di_ftype_di_v4hi_v4hi
14328 = build_function_type (long_long_unsigned_type_node,
14329 tree_cons (NULL_TREE,
14330 long_long_unsigned_type_node,
14331 tree_cons (NULL_TREE, V4HI_type_node,
14332 tree_cons (NULL_TREE,
14336 tree di_ftype_v4hi_v4hi
14337 = build_function_type (long_long_unsigned_type_node,
14338 tree_cons (NULL_TREE, V4HI_type_node,
14339 tree_cons (NULL_TREE, V4HI_type_node,
14342 /* Normal vector binops. */
14343 tree v8qi_ftype_v8qi_v8qi
14344 = build_function_type (V8QI_type_node,
14345 tree_cons (NULL_TREE, V8QI_type_node,
14346 tree_cons (NULL_TREE, V8QI_type_node,
14348 tree v4hi_ftype_v4hi_v4hi
14349 = build_function_type (V4HI_type_node,
14350 tree_cons (NULL_TREE, V4HI_type_node,
14351 tree_cons (NULL_TREE, V4HI_type_node,
14353 tree v2si_ftype_v2si_v2si
14354 = build_function_type (V2SI_type_node,
14355 tree_cons (NULL_TREE, V2SI_type_node,
14356 tree_cons (NULL_TREE, V2SI_type_node,
14358 tree di_ftype_di_di
14359 = build_function_type (long_long_unsigned_type_node,
14360 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14361 tree_cons (NULL_TREE,
14362 long_long_unsigned_type_node,
14365 /* Add all builtins that are more or less simple operations on two
14367 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14369 /* Use one of the operands; the target can have a different mode for
14370 mask-generating compares. */
14371 enum machine_mode mode;
14377 mode = insn_data[d->icode].operand[1].mode;
14382 type = v8qi_ftype_v8qi_v8qi;
14385 type = v4hi_ftype_v4hi_v4hi;
14388 type = v2si_ftype_v2si_v2si;
14391 type = di_ftype_di_di;
14395 gcc_unreachable ();
14398 def_mbuiltin (d->mask, d->name, type, d->code);
14401 /* Add the remaining MMX insns with somewhat more complicated types. */
14402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14407 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14494 arm_init_tls_builtins (void)
14497 tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
14498 tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
14500 ftype = build_function_type (ptr_type_node, void_list_node);
14501 add_builtin_function ("__builtin_thread_pointer", ftype,
14502 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14503 NULL, const_nothrow);
14520 } neon_builtin_type_bits;
14522 #define v8qi_UP T_V8QI
14523 #define v4hi_UP T_V4HI
14524 #define v2si_UP T_V2SI
14525 #define v2sf_UP T_V2SF
14527 #define v16qi_UP T_V16QI
14528 #define v8hi_UP T_V8HI
14529 #define v4si_UP T_V4SI
14530 #define v4sf_UP T_V4SF
14531 #define v2di_UP T_V2DI
14536 #define UP(X) X##_UP
14571 NEON_LOADSTRUCTLANE,
14573 NEON_STORESTRUCTLANE,
14582 const neon_itype itype;
14583 const neon_builtin_type_bits bits;
14584 const enum insn_code codes[T_MAX];
14585 const unsigned int num_vars;
14586 unsigned int base_fcode;
14587 } neon_builtin_datum;
14589 #define CF(N,X) CODE_FOR_neon_##N##X
14591 #define VAR1(T, N, A) \
14592 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14593 #define VAR2(T, N, A, B) \
14594 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14595 #define VAR3(T, N, A, B, C) \
14596 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14597 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14598 #define VAR4(T, N, A, B, C, D) \
14599 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14600 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14601 #define VAR5(T, N, A, B, C, D, E) \
14602 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14603 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14604 #define VAR6(T, N, A, B, C, D, E, F) \
14605 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14606 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14607 #define VAR7(T, N, A, B, C, D, E, F, G) \
14608 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14609 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14611 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14612 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14614 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14615 CF (N, G), CF (N, H) }, 8, 0
14616 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14617 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14618 | UP (H) | UP (I), \
14619 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14620 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14621 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14622 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14623 | UP (H) | UP (I) | UP (J), \
14624 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14625 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14627 /* The mode entries in the following table correspond to the "key" type of the
14628 instruction variant, i.e. equivalent to that which would be specified after
14629 the assembler mnemonic, which usually refers to the last vector operand.
14630 (Signed/unsigned/polynomial types are not differentiated between though, and
14631 are all mapped onto the same mode for a given element size.) The modes
14632 listed per instruction should be the same as those defined for that
14633 instruction's pattern in neon.md.
14634 WARNING: Variants should be listed in the same increasing order as
14635 neon_builtin_type_bits. */
14637 static neon_builtin_datum neon_builtin_data[] =
14639 { VAR10 (BINOP, vadd,
14640 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14641 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14642 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14643 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14644 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14645 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14646 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14647 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14648 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14649 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14650 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14651 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14652 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14653 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14654 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14655 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14656 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14657 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14658 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14659 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14660 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14661 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14662 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14663 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14664 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14665 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14666 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14667 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14668 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14669 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14670 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14671 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14672 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14673 { VAR10 (BINOP, vsub,
14674 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14675 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14676 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14677 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14678 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14679 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14680 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14681 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14682 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14683 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14684 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14685 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14686 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14687 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14688 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14689 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14690 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14691 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14692 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14693 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14694 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14695 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14696 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14697 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14698 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14699 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14700 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14701 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14702 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14703 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14704 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14705 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14706 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14707 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14708 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14709 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14710 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14711 /* FIXME: vget_lane supports more variants than this! */
14712 { VAR10 (GETLANE, vget_lane,
14713 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14714 { VAR10 (SETLANE, vset_lane,
14715 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14716 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14717 { VAR10 (DUP, vdup_n,
14718 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14719 { VAR10 (DUPLANE, vdup_lane,
14720 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14721 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14722 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14723 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14724 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14725 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14726 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14727 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14728 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14729 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14730 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14731 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14732 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14733 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14734 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14735 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14736 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14737 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14738 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14739 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14740 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14741 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14742 { VAR10 (BINOP, vext,
14743 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14744 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14745 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14746 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14747 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14748 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14749 { VAR10 (SELECT, vbsl,
14750 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14751 { VAR1 (VTBL, vtbl1, v8qi) },
14752 { VAR1 (VTBL, vtbl2, v8qi) },
14753 { VAR1 (VTBL, vtbl3, v8qi) },
14754 { VAR1 (VTBL, vtbl4, v8qi) },
14755 { VAR1 (VTBX, vtbx1, v8qi) },
14756 { VAR1 (VTBX, vtbx2, v8qi) },
14757 { VAR1 (VTBX, vtbx3, v8qi) },
14758 { VAR1 (VTBX, vtbx4, v8qi) },
14759 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14760 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14761 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14762 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14763 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14764 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14765 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14766 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14767 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14768 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14769 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14770 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14771 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14772 { VAR10 (LOAD1, vld1,
14773 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14774 { VAR10 (LOAD1LANE, vld1_lane,
14775 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14776 { VAR10 (LOAD1, vld1_dup,
14777 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14778 { VAR10 (STORE1, vst1,
14779 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14780 { VAR10 (STORE1LANE, vst1_lane,
14781 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14782 { VAR9 (LOADSTRUCT,
14783 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14784 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14785 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14786 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14787 { VAR9 (STORESTRUCT, vst2,
14788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14789 { VAR7 (STORESTRUCTLANE, vst2_lane,
14790 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14791 { VAR9 (LOADSTRUCT,
14792 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14793 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14794 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14795 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14796 { VAR9 (STORESTRUCT, vst3,
14797 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14798 { VAR7 (STORESTRUCTLANE, vst3_lane,
14799 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14800 { VAR9 (LOADSTRUCT, vld4,
14801 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14802 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14803 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14804 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14805 { VAR9 (STORESTRUCT, vst4,
14806 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14807 { VAR7 (STORESTRUCTLANE, vst4_lane,
14808 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14809 { VAR10 (LOGICBINOP, vand,
14810 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14811 { VAR10 (LOGICBINOP, vorr,
14812 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14813 { VAR10 (BINOP, veor,
14814 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14815 { VAR10 (LOGICBINOP, vbic,
14816 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14817 { VAR10 (LOGICBINOP, vorn,
14818 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14834 arm_init_neon_builtins (void)
14836 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14838 /* Create distinguished type nodes for NEON vector element types,
14839 and pointers to values of such types, so we can detect them later. */
14840 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14841 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14842 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14843 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14844 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14845 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14846 tree neon_float_type_node = make_node (REAL_TYPE);
14847 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
14848 layout_type (neon_float_type_node);
14850 /* Define typedefs which exactly correspond to the modes we are basing vector
14851 types on. If you change these names you'll need to change
14852 the table used by arm_mangle_type too. */
14853 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
14854 "__builtin_neon_qi");
14855 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
14856 "__builtin_neon_hi");
14857 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
14858 "__builtin_neon_si");
14859 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
14860 "__builtin_neon_sf");
14861 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
14862 "__builtin_neon_di");
14864 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
14865 "__builtin_neon_poly8");
14866 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
14867 "__builtin_neon_poly16");
14869 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14870 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14871 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14872 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14873 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14875 /* Next create constant-qualified versions of the above types. */
14876 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14878 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14880 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14882 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14884 tree const_float_node = build_qualified_type (neon_float_type_node,
14887 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14888 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14889 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14890 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14891 tree const_float_pointer_node = build_pointer_type (const_float_node);
14893 /* Now create vector types based on our NEON element types. */
14894 /* 64-bit vectors. */
14895 tree V8QI_type_node =
14896 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
14897 tree V4HI_type_node =
14898 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
14899 tree V2SI_type_node =
14900 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
14901 tree V2SF_type_node =
14902 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
14903 /* 128-bit vectors. */
14904 tree V16QI_type_node =
14905 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
14906 tree V8HI_type_node =
14907 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
14908 tree V4SI_type_node =
14909 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
14910 tree V4SF_type_node =
14911 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
14912 tree V2DI_type_node =
14913 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
14915 /* Unsigned integer types for various mode sizes. */
14916 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
14917 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
14918 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
14919 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
14921 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
14922 "__builtin_neon_uqi");
14923 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
14924 "__builtin_neon_uhi");
14925 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
14926 "__builtin_neon_usi");
14927 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
14928 "__builtin_neon_udi");
14930 /* Opaque integer types for structures of vectors. */
14931 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
14932 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
14933 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
14934 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
14936 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
14937 "__builtin_neon_ti");
14938 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
14939 "__builtin_neon_ei");
14940 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
14941 "__builtin_neon_oi");
14942 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
14943 "__builtin_neon_ci");
14944 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
14945 "__builtin_neon_xi");
14947 /* Pointers to vector types. */
14948 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
14949 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
14950 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
14951 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
14952 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
14953 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
14954 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
14955 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
14956 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
14958 /* Operations which return results as pairs. */
14959 tree void_ftype_pv8qi_v8qi_v8qi =
14960 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
14961 V8QI_type_node, NULL);
14962 tree void_ftype_pv4hi_v4hi_v4hi =
14963 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
14964 V4HI_type_node, NULL);
14965 tree void_ftype_pv2si_v2si_v2si =
14966 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
14967 V2SI_type_node, NULL);
14968 tree void_ftype_pv2sf_v2sf_v2sf =
14969 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
14970 V2SF_type_node, NULL);
14971 tree void_ftype_pdi_di_di =
14972 build_function_type_list (void_type_node, intDI_pointer_node,
14973 neon_intDI_type_node, neon_intDI_type_node, NULL);
14974 tree void_ftype_pv16qi_v16qi_v16qi =
14975 build_function_type_list (void_type_node, V16QI_pointer_node,
14976 V16QI_type_node, V16QI_type_node, NULL);
14977 tree void_ftype_pv8hi_v8hi_v8hi =
14978 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
14979 V8HI_type_node, NULL);
14980 tree void_ftype_pv4si_v4si_v4si =
14981 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
14982 V4SI_type_node, NULL);
14983 tree void_ftype_pv4sf_v4sf_v4sf =
14984 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
14985 V4SF_type_node, NULL);
14986 tree void_ftype_pv2di_v2di_v2di =
14987 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
14988 V2DI_type_node, NULL);
14990 tree reinterp_ftype_dreg[5][5];
14991 tree reinterp_ftype_qreg[5][5];
14992 tree dreg_types[5], qreg_types[5];
14994 dreg_types[0] = V8QI_type_node;
14995 dreg_types[1] = V4HI_type_node;
14996 dreg_types[2] = V2SI_type_node;
14997 dreg_types[3] = V2SF_type_node;
14998 dreg_types[4] = neon_intDI_type_node;
15000 qreg_types[0] = V16QI_type_node;
15001 qreg_types[1] = V8HI_type_node;
15002 qreg_types[2] = V4SI_type_node;
15003 qreg_types[3] = V4SF_type_node;
15004 qreg_types[4] = V2DI_type_node;
15006 for (i = 0; i < 5; i++)
15009 for (j = 0; j < 5; j++)
15011 reinterp_ftype_dreg[i][j]
15012 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15013 reinterp_ftype_qreg[i][j]
15014 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15018 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15020 neon_builtin_datum *d = &neon_builtin_data[i];
15021 unsigned int j, codeidx = 0;
15023 d->base_fcode = fcode;
15025 for (j = 0; j < T_MAX; j++)
15027 const char* const modenames[] = {
15028 "v8qi", "v4hi", "v2si", "v2sf", "di",
15029 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15033 enum insn_code icode;
15034 int is_load = 0, is_store = 0;
15036 if ((d->bits & (1 << j)) == 0)
15039 icode = d->codes[codeidx++];
15044 case NEON_LOAD1LANE:
15045 case NEON_LOADSTRUCT:
15046 case NEON_LOADSTRUCTLANE:
15048 /* Fall through. */
15050 case NEON_STORE1LANE:
15051 case NEON_STORESTRUCT:
15052 case NEON_STORESTRUCTLANE:
15055 /* Fall through. */
15058 case NEON_LOGICBINOP:
15059 case NEON_SHIFTINSERT:
15066 case NEON_SHIFTIMM:
15067 case NEON_SHIFTACC:
15073 case NEON_LANEMULL:
15074 case NEON_LANEMULH:
15076 case NEON_SCALARMUL:
15077 case NEON_SCALARMULL:
15078 case NEON_SCALARMULH:
15079 case NEON_SCALARMAC:
15085 tree return_type = void_type_node, args = void_list_node;
15087 /* Build a function type directly from the insn_data for this
15088 builtin. The build_function_type() function takes care of
15089 removing duplicates for us. */
15090 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15094 if (is_load && k == 1)
15096 /* Neon load patterns always have the memory operand
15097 (a SImode pointer) in the operand 1 position. We
15098 want a const pointer to the element type in that
15100 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15106 eltype = const_intQI_pointer_node;
15111 eltype = const_intHI_pointer_node;
15116 eltype = const_intSI_pointer_node;
15121 eltype = const_float_pointer_node;
15126 eltype = const_intDI_pointer_node;
15129 default: gcc_unreachable ();
15132 else if (is_store && k == 0)
15134 /* Similarly, Neon store patterns use operand 0 as
15135 the memory location to store to (a SImode pointer).
15136 Use a pointer to the element type of the store in
15138 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15144 eltype = intQI_pointer_node;
15149 eltype = intHI_pointer_node;
15154 eltype = intSI_pointer_node;
15159 eltype = float_pointer_node;
15164 eltype = intDI_pointer_node;
15167 default: gcc_unreachable ();
15172 switch (insn_data[icode].operand[k].mode)
15174 case VOIDmode: eltype = void_type_node; break;
15176 case QImode: eltype = neon_intQI_type_node; break;
15177 case HImode: eltype = neon_intHI_type_node; break;
15178 case SImode: eltype = neon_intSI_type_node; break;
15179 case SFmode: eltype = neon_float_type_node; break;
15180 case DImode: eltype = neon_intDI_type_node; break;
15181 case TImode: eltype = intTI_type_node; break;
15182 case EImode: eltype = intEI_type_node; break;
15183 case OImode: eltype = intOI_type_node; break;
15184 case CImode: eltype = intCI_type_node; break;
15185 case XImode: eltype = intXI_type_node; break;
15186 /* 64-bit vectors. */
15187 case V8QImode: eltype = V8QI_type_node; break;
15188 case V4HImode: eltype = V4HI_type_node; break;
15189 case V2SImode: eltype = V2SI_type_node; break;
15190 case V2SFmode: eltype = V2SF_type_node; break;
15191 /* 128-bit vectors. */
15192 case V16QImode: eltype = V16QI_type_node; break;
15193 case V8HImode: eltype = V8HI_type_node; break;
15194 case V4SImode: eltype = V4SI_type_node; break;
15195 case V4SFmode: eltype = V4SF_type_node; break;
15196 case V2DImode: eltype = V2DI_type_node; break;
15197 default: gcc_unreachable ();
15201 if (k == 0 && !is_store)
15202 return_type = eltype;
15204 args = tree_cons (NULL_TREE, eltype, args);
15207 ftype = build_function_type (return_type, args);
15211 case NEON_RESULTPAIR:
15213 switch (insn_data[icode].operand[1].mode)
15215 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15216 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15217 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15218 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15219 case DImode: ftype = void_ftype_pdi_di_di; break;
15220 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15221 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15222 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15223 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15224 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15225 default: gcc_unreachable ();
15230 case NEON_REINTERP:
15232 /* We iterate over 5 doubleword types, then 5 quadword
15235 switch (insn_data[icode].operand[0].mode)
15237 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15238 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15239 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15240 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15241 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15242 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15243 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15244 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15245 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15246 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15247 default: gcc_unreachable ();
15253 gcc_unreachable ();
15256 gcc_assert (ftype != NULL);
15258 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15260 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15267 arm_init_builtins (void)
15269 arm_init_tls_builtins ();
15271 if (TARGET_REALLY_IWMMXT)
15272 arm_init_iwmmxt_builtins ();
15275 arm_init_neon_builtins ();
15278 /* Errors in the source file can cause expand_expr to return const0_rtx
15279 where we expect a vector. To avoid crashing, use one of the vector
15280 clear instructions. */
15283 safe_vector_operand (rtx x, enum machine_mode mode)
15285 if (x != const0_rtx)
15287 x = gen_reg_rtx (mode);
15289 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15290 : gen_rtx_SUBREG (DImode, x, 0)));
15294 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15297 arm_expand_binop_builtin (enum insn_code icode,
15298 tree exp, rtx target)
15301 tree arg0 = CALL_EXPR_ARG (exp, 0);
15302 tree arg1 = CALL_EXPR_ARG (exp, 1);
15303 rtx op0 = expand_normal (arg0);
15304 rtx op1 = expand_normal (arg1);
15305 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15306 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15307 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15309 if (VECTOR_MODE_P (mode0))
15310 op0 = safe_vector_operand (op0, mode0);
15311 if (VECTOR_MODE_P (mode1))
15312 op1 = safe_vector_operand (op1, mode1);
15315 || GET_MODE (target) != tmode
15316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15317 target = gen_reg_rtx (tmode);
15319 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15322 op0 = copy_to_mode_reg (mode0, op0);
15323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15324 op1 = copy_to_mode_reg (mode1, op1);
15326 pat = GEN_FCN (icode) (target, op0, op1);
15333 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15336 arm_expand_unop_builtin (enum insn_code icode,
15337 tree exp, rtx target, int do_load)
15340 tree arg0 = CALL_EXPR_ARG (exp, 0);
15341 rtx op0 = expand_normal (arg0);
15342 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15343 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15346 || GET_MODE (target) != tmode
15347 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15348 target = gen_reg_rtx (tmode);
15350 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15353 if (VECTOR_MODE_P (mode0))
15354 op0 = safe_vector_operand (op0, mode0);
15356 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15357 op0 = copy_to_mode_reg (mode0, op0);
15360 pat = GEN_FCN (icode) (target, op0);
15368 neon_builtin_compare (const void *a, const void *b)
15370 const neon_builtin_datum *key = a;
15371 const neon_builtin_datum *memb = b;
15372 unsigned int soughtcode = key->base_fcode;
15374 if (soughtcode >= memb->base_fcode
15375 && soughtcode < memb->base_fcode + memb->num_vars)
15377 else if (soughtcode < memb->base_fcode)
15383 static enum insn_code
15384 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15386 neon_builtin_datum key, *found;
15389 key.base_fcode = fcode;
15390 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15391 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15392 gcc_assert (found);
15393 idx = fcode - (int) found->base_fcode;
15394 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15397 *itype = found->itype;
15399 return found->codes[idx];
15403 NEON_ARG_COPY_TO_REG,
15408 #define NEON_MAX_BUILTIN_ARGS 5
15410 /* Expand a Neon builtin. */
15412 arm_expand_neon_args (rtx target, int icode, int have_retval,
15417 tree arg[NEON_MAX_BUILTIN_ARGS];
15418 rtx op[NEON_MAX_BUILTIN_ARGS];
15419 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15420 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15425 || GET_MODE (target) != tmode
15426 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15427 target = gen_reg_rtx (tmode);
15429 va_start (ap, exp);
15433 builtin_arg thisarg = va_arg (ap, int);
15435 if (thisarg == NEON_ARG_STOP)
15439 arg[argc] = CALL_EXPR_ARG (exp, argc);
15440 op[argc] = expand_normal (arg[argc]);
15441 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15445 case NEON_ARG_COPY_TO_REG:
15446 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15447 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15448 (op[argc], mode[argc]))
15449 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15452 case NEON_ARG_CONSTANT:
15453 /* FIXME: This error message is somewhat unhelpful. */
15454 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15455 (op[argc], mode[argc]))
15456 error ("argument must be a constant");
15459 case NEON_ARG_STOP:
15460 gcc_unreachable ();
15473 pat = GEN_FCN (icode) (target, op[0]);
15477 pat = GEN_FCN (icode) (target, op[0], op[1]);
15481 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15485 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15489 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15493 gcc_unreachable ();
15499 pat = GEN_FCN (icode) (op[0]);
15503 pat = GEN_FCN (icode) (op[0], op[1]);
15507 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15511 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15515 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15519 gcc_unreachable ();
15530 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15531 constants defined per-instruction or per instruction-variant. Instead, the
15532 required info is looked up in the table neon_builtin_data. */
15534 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15537 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15544 return arm_expand_neon_args (target, icode, 1, exp,
15545 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15549 case NEON_SCALARMUL:
15550 case NEON_SCALARMULL:
15551 case NEON_SCALARMULH:
15552 case NEON_SHIFTINSERT:
15553 case NEON_LOGICBINOP:
15554 return arm_expand_neon_args (target, icode, 1, exp,
15555 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15559 return arm_expand_neon_args (target, icode, 1, exp,
15560 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15561 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15565 case NEON_SHIFTIMM:
15566 return arm_expand_neon_args (target, icode, 1, exp,
15567 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15571 return arm_expand_neon_args (target, icode, 1, exp,
15572 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15576 case NEON_REINTERP:
15577 return arm_expand_neon_args (target, icode, 1, exp,
15578 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15582 return arm_expand_neon_args (target, icode, 1, exp,
15583 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15585 case NEON_RESULTPAIR:
15586 return arm_expand_neon_args (target, icode, 0, exp,
15587 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15591 case NEON_LANEMULL:
15592 case NEON_LANEMULH:
15593 return arm_expand_neon_args (target, icode, 1, exp,
15594 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15595 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15598 return arm_expand_neon_args (target, icode, 1, exp,
15599 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15600 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15602 case NEON_SHIFTACC:
15603 return arm_expand_neon_args (target, icode, 1, exp,
15604 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15605 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15607 case NEON_SCALARMAC:
15608 return arm_expand_neon_args (target, icode, 1, exp,
15609 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15610 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15614 return arm_expand_neon_args (target, icode, 1, exp,
15615 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15619 case NEON_LOADSTRUCT:
15620 return arm_expand_neon_args (target, icode, 1, exp,
15621 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15623 case NEON_LOAD1LANE:
15624 case NEON_LOADSTRUCTLANE:
15625 return arm_expand_neon_args (target, icode, 1, exp,
15626 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15630 case NEON_STORESTRUCT:
15631 return arm_expand_neon_args (target, icode, 0, exp,
15632 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15634 case NEON_STORE1LANE:
15635 case NEON_STORESTRUCTLANE:
15636 return arm_expand_neon_args (target, icode, 0, exp,
15637 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15641 gcc_unreachable ();
15644 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15646 neon_reinterpret (rtx dest, rtx src)
15648 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15651 /* Emit code to place a Neon pair result in memory locations (with equal
15654 neon_emit_pair_result_insn (enum machine_mode mode,
15655 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15658 rtx mem = gen_rtx_MEM (mode, destaddr);
15659 rtx tmp1 = gen_reg_rtx (mode);
15660 rtx tmp2 = gen_reg_rtx (mode);
15662 emit_insn (intfn (tmp1, op1, tmp2, op2));
15664 emit_move_insn (mem, tmp1);
15665 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15666 emit_move_insn (mem, tmp2);
15669 /* Set up operands for a register copy from src to dest, taking care not to
15670 clobber registers in the process.
15671 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15672 be called with a large N, so that should be OK. */
15675 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15677 unsigned int copied = 0, opctr = 0;
15678 unsigned int done = (1 << count) - 1;
15681 while (copied != done)
15683 for (i = 0; i < count; i++)
15687 for (j = 0; good && j < count; j++)
15688 if (i != j && (copied & (1 << j)) == 0
15689 && reg_overlap_mentioned_p (src[j], dest[i]))
15694 operands[opctr++] = dest[i];
15695 operands[opctr++] = src[i];
15701 gcc_assert (opctr == count * 2);
15704 /* Expand an expression EXP that calls a built-in function,
15705 with result going to TARGET if that's convenient
15706 (and in mode MODE if that's convenient).
15707 SUBTARGET may be used as the target for computing one of EXP's operands.
15708 IGNORE is nonzero if the value is to be ignored. */
15711 arm_expand_builtin (tree exp,
15713 rtx subtarget ATTRIBUTE_UNUSED,
15714 enum machine_mode mode ATTRIBUTE_UNUSED,
15715 int ignore ATTRIBUTE_UNUSED)
15717 const struct builtin_description * d;
15718 enum insn_code icode;
15719 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15727 int fcode = DECL_FUNCTION_CODE (fndecl);
15729 enum machine_mode tmode;
15730 enum machine_mode mode0;
15731 enum machine_mode mode1;
15732 enum machine_mode mode2;
15734 if (fcode >= ARM_BUILTIN_NEON_BASE)
15735 return arm_expand_neon_builtin (fcode, exp, target);
15739 case ARM_BUILTIN_TEXTRMSB:
15740 case ARM_BUILTIN_TEXTRMUB:
15741 case ARM_BUILTIN_TEXTRMSH:
15742 case ARM_BUILTIN_TEXTRMUH:
15743 case ARM_BUILTIN_TEXTRMSW:
15744 case ARM_BUILTIN_TEXTRMUW:
15745 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15746 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15747 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15748 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15749 : CODE_FOR_iwmmxt_textrmw);
15751 arg0 = CALL_EXPR_ARG (exp, 0);
15752 arg1 = CALL_EXPR_ARG (exp, 1);
15753 op0 = expand_normal (arg0);
15754 op1 = expand_normal (arg1);
15755 tmode = insn_data[icode].operand[0].mode;
15756 mode0 = insn_data[icode].operand[1].mode;
15757 mode1 = insn_data[icode].operand[2].mode;
15759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15760 op0 = copy_to_mode_reg (mode0, op0);
15761 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15763 /* @@@ better error message */
15764 error ("selector must be an immediate");
15765 return gen_reg_rtx (tmode);
15768 || GET_MODE (target) != tmode
15769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15770 target = gen_reg_rtx (tmode);
15771 pat = GEN_FCN (icode) (target, op0, op1);
15777 case ARM_BUILTIN_TINSRB:
15778 case ARM_BUILTIN_TINSRH:
15779 case ARM_BUILTIN_TINSRW:
15780 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15781 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15782 : CODE_FOR_iwmmxt_tinsrw);
15783 arg0 = CALL_EXPR_ARG (exp, 0);
15784 arg1 = CALL_EXPR_ARG (exp, 1);
15785 arg2 = CALL_EXPR_ARG (exp, 2);
15786 op0 = expand_normal (arg0);
15787 op1 = expand_normal (arg1);
15788 op2 = expand_normal (arg2);
15789 tmode = insn_data[icode].operand[0].mode;
15790 mode0 = insn_data[icode].operand[1].mode;
15791 mode1 = insn_data[icode].operand[2].mode;
15792 mode2 = insn_data[icode].operand[3].mode;
15794 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15795 op0 = copy_to_mode_reg (mode0, op0);
15796 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15797 op1 = copy_to_mode_reg (mode1, op1);
15798 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15800 /* @@@ better error message */
15801 error ("selector must be an immediate");
15805 || GET_MODE (target) != tmode
15806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15807 target = gen_reg_rtx (tmode);
15808 pat = GEN_FCN (icode) (target, op0, op1, op2);
15814 case ARM_BUILTIN_SETWCX:
15815 arg0 = CALL_EXPR_ARG (exp, 0);
15816 arg1 = CALL_EXPR_ARG (exp, 1);
15817 op0 = force_reg (SImode, expand_normal (arg0));
15818 op1 = expand_normal (arg1);
15819 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15822 case ARM_BUILTIN_GETWCX:
15823 arg0 = CALL_EXPR_ARG (exp, 0);
15824 op0 = expand_normal (arg0);
15825 target = gen_reg_rtx (SImode);
15826 emit_insn (gen_iwmmxt_tmrc (target, op0));
15829 case ARM_BUILTIN_WSHUFH:
15830 icode = CODE_FOR_iwmmxt_wshufh;
15831 arg0 = CALL_EXPR_ARG (exp, 0);
15832 arg1 = CALL_EXPR_ARG (exp, 1);
15833 op0 = expand_normal (arg0);
15834 op1 = expand_normal (arg1);
15835 tmode = insn_data[icode].operand[0].mode;
15836 mode1 = insn_data[icode].operand[1].mode;
15837 mode2 = insn_data[icode].operand[2].mode;
15839 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15840 op0 = copy_to_mode_reg (mode1, op0);
15841 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15843 /* @@@ better error message */
15844 error ("mask must be an immediate");
15848 || GET_MODE (target) != tmode
15849 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15850 target = gen_reg_rtx (tmode);
15851 pat = GEN_FCN (icode) (target, op0, op1);
15857 case ARM_BUILTIN_WSADB:
15858 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15859 case ARM_BUILTIN_WSADH:
15860 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15861 case ARM_BUILTIN_WSADBZ:
15862 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15863 case ARM_BUILTIN_WSADHZ:
15864 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15866 /* Several three-argument builtins. */
15867 case ARM_BUILTIN_WMACS:
15868 case ARM_BUILTIN_WMACU:
15869 case ARM_BUILTIN_WALIGN:
15870 case ARM_BUILTIN_TMIA:
15871 case ARM_BUILTIN_TMIAPH:
15872 case ARM_BUILTIN_TMIATT:
15873 case ARM_BUILTIN_TMIATB:
15874 case ARM_BUILTIN_TMIABT:
15875 case ARM_BUILTIN_TMIABB:
15876 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
15877 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
15878 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
15879 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
15880 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
15881 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
15882 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
15883 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
15884 : CODE_FOR_iwmmxt_walign);
15885 arg0 = CALL_EXPR_ARG (exp, 0);
15886 arg1 = CALL_EXPR_ARG (exp, 1);
15887 arg2 = CALL_EXPR_ARG (exp, 2);
15888 op0 = expand_normal (arg0);
15889 op1 = expand_normal (arg1);
15890 op2 = expand_normal (arg2);
15891 tmode = insn_data[icode].operand[0].mode;
15892 mode0 = insn_data[icode].operand[1].mode;
15893 mode1 = insn_data[icode].operand[2].mode;
15894 mode2 = insn_data[icode].operand[3].mode;
15896 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15897 op0 = copy_to_mode_reg (mode0, op0);
15898 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15899 op1 = copy_to_mode_reg (mode1, op1);
15900 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15901 op2 = copy_to_mode_reg (mode2, op2);
15903 || GET_MODE (target) != tmode
15904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15905 target = gen_reg_rtx (tmode);
15906 pat = GEN_FCN (icode) (target, op0, op1, op2);
15912 case ARM_BUILTIN_WZERO:
15913 target = gen_reg_rtx (DImode);
15914 emit_insn (gen_iwmmxt_clrdi (target));
15917 case ARM_BUILTIN_THREAD_POINTER:
15918 return arm_load_tp (target);
15924 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15925 if (d->code == (const enum arm_builtins) fcode)
15926 return arm_expand_binop_builtin (d->icode, exp, target);
15928 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15929 if (d->code == (const enum arm_builtins) fcode)
15930 return arm_expand_unop_builtin (d->icode, exp, target, 0);
15932 /* @@@ Should really do something sensible here. */
15936 /* Return the number (counting from 0) of
15937 the least significant set bit in MASK. */
15940 number_of_first_bit_set (unsigned mask)
15945 (mask & (1 << bit)) == 0;
15952 /* Emit code to push or pop registers to or from the stack. F is the
15953 assembly file. MASK is the registers to push or pop. PUSH is
15954 nonzero if we should push, and zero if we should pop. For debugging
15955 output, if pushing, adjust CFA_OFFSET by the amount of space added
15956 to the stack. REAL_REGS should have the same number of bits set as
15957 MASK, and will be used instead (in the same order) to describe which
15958 registers were saved - this is used to mark the save slots when we
15959 push high registers after moving them to low registers. */
15961 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
15962 unsigned long real_regs)
15965 int lo_mask = mask & 0xFF;
15966 int pushed_words = 0;
15970 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
15972 /* Special case. Do not generate a POP PC statement here, do it in
15974 thumb_exit (f, -1);
15978 if (ARM_EABI_UNWIND_TABLES && push)
15980 fprintf (f, "\t.save\t{");
15981 for (regno = 0; regno < 15; regno++)
15983 if (real_regs & (1 << regno))
15985 if (real_regs & ((1 << regno) -1))
15987 asm_fprintf (f, "%r", regno);
15990 fprintf (f, "}\n");
15993 fprintf (f, "\t%s\t{", push ? "push" : "pop");
15995 /* Look at the low registers first. */
15996 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16000 asm_fprintf (f, "%r", regno);
16002 if ((lo_mask & ~1) != 0)
16009 if (push && (mask & (1 << LR_REGNUM)))
16011 /* Catch pushing the LR. */
16015 asm_fprintf (f, "%r", LR_REGNUM);
16019 else if (!push && (mask & (1 << PC_REGNUM)))
16021 /* Catch popping the PC. */
16022 if (TARGET_INTERWORK || TARGET_BACKTRACE
16023 || current_function_calls_eh_return)
16025 /* The PC is never poped directly, instead
16026 it is popped into r3 and then BX is used. */
16027 fprintf (f, "}\n");
16029 thumb_exit (f, -1);
16038 asm_fprintf (f, "%r", PC_REGNUM);
16042 fprintf (f, "}\n");
16044 if (push && pushed_words && dwarf2out_do_frame ())
16046 char *l = dwarf2out_cfi_label ();
16047 int pushed_mask = real_regs;
16049 *cfa_offset += pushed_words * 4;
16050 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16053 pushed_mask = real_regs;
16054 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16056 if (pushed_mask & 1)
16057 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16062 /* Generate code to return from a thumb function.
16063 If 'reg_containing_return_addr' is -1, then the return address is
16064 actually on the stack, at the stack pointer. */
16066 thumb_exit (FILE *f, int reg_containing_return_addr)
16068 unsigned regs_available_for_popping;
16069 unsigned regs_to_pop;
16071 unsigned available;
16075 int restore_a4 = FALSE;
16077 /* Compute the registers we need to pop. */
16081 if (reg_containing_return_addr == -1)
16083 regs_to_pop |= 1 << LR_REGNUM;
16087 if (TARGET_BACKTRACE)
16089 /* Restore the (ARM) frame pointer and stack pointer. */
16090 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16094 /* If there is nothing to pop then just emit the BX instruction and
16096 if (pops_needed == 0)
16098 if (current_function_calls_eh_return)
16099 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16101 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16104 /* Otherwise if we are not supporting interworking and we have not created
16105 a backtrace structure and the function was not entered in ARM mode then
16106 just pop the return address straight into the PC. */
16107 else if (!TARGET_INTERWORK
16108 && !TARGET_BACKTRACE
16109 && !is_called_in_ARM_mode (current_function_decl)
16110 && !current_function_calls_eh_return)
16112 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16116 /* Find out how many of the (return) argument registers we can corrupt. */
16117 regs_available_for_popping = 0;
16119 /* If returning via __builtin_eh_return, the bottom three registers
16120 all contain information needed for the return. */
16121 if (current_function_calls_eh_return)
16125 /* If we can deduce the registers used from the function's
16126 return value. This is more reliable that examining
16127 df_regs_ever_live_p () because that will be set if the register is
16128 ever used in the function, not just if the register is used
16129 to hold a return value. */
16131 if (current_function_return_rtx != 0)
16132 mode = GET_MODE (current_function_return_rtx);
16134 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16136 size = GET_MODE_SIZE (mode);
16140 /* In a void function we can use any argument register.
16141 In a function that returns a structure on the stack
16142 we can use the second and third argument registers. */
16143 if (mode == VOIDmode)
16144 regs_available_for_popping =
16145 (1 << ARG_REGISTER (1))
16146 | (1 << ARG_REGISTER (2))
16147 | (1 << ARG_REGISTER (3));
16149 regs_available_for_popping =
16150 (1 << ARG_REGISTER (2))
16151 | (1 << ARG_REGISTER (3));
16153 else if (size <= 4)
16154 regs_available_for_popping =
16155 (1 << ARG_REGISTER (2))
16156 | (1 << ARG_REGISTER (3));
16157 else if (size <= 8)
16158 regs_available_for_popping =
16159 (1 << ARG_REGISTER (3));
16162 /* Match registers to be popped with registers into which we pop them. */
16163 for (available = regs_available_for_popping,
16164 required = regs_to_pop;
16165 required != 0 && available != 0;
16166 available &= ~(available & - available),
16167 required &= ~(required & - required))
16170 /* If we have any popping registers left over, remove them. */
16172 regs_available_for_popping &= ~available;
16174 /* Otherwise if we need another popping register we can use
16175 the fourth argument register. */
16176 else if (pops_needed)
16178 /* If we have not found any free argument registers and
16179 reg a4 contains the return address, we must move it. */
16180 if (regs_available_for_popping == 0
16181 && reg_containing_return_addr == LAST_ARG_REGNUM)
16183 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16184 reg_containing_return_addr = LR_REGNUM;
16186 else if (size > 12)
16188 /* Register a4 is being used to hold part of the return value,
16189 but we have dire need of a free, low register. */
16192 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16195 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16197 /* The fourth argument register is available. */
16198 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16204 /* Pop as many registers as we can. */
16205 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16206 regs_available_for_popping);
16208 /* Process the registers we popped. */
16209 if (reg_containing_return_addr == -1)
16211 /* The return address was popped into the lowest numbered register. */
16212 regs_to_pop &= ~(1 << LR_REGNUM);
16214 reg_containing_return_addr =
16215 number_of_first_bit_set (regs_available_for_popping);
16217 /* Remove this register for the mask of available registers, so that
16218 the return address will not be corrupted by further pops. */
16219 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16222 /* If we popped other registers then handle them here. */
16223 if (regs_available_for_popping)
16227 /* Work out which register currently contains the frame pointer. */
16228 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16230 /* Move it into the correct place. */
16231 asm_fprintf (f, "\tmov\t%r, %r\n",
16232 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16234 /* (Temporarily) remove it from the mask of popped registers. */
16235 regs_available_for_popping &= ~(1 << frame_pointer);
16236 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16238 if (regs_available_for_popping)
16242 /* We popped the stack pointer as well,
16243 find the register that contains it. */
16244 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16246 /* Move it into the stack register. */
16247 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16249 /* At this point we have popped all necessary registers, so
16250 do not worry about restoring regs_available_for_popping
16251 to its correct value:
16253 assert (pops_needed == 0)
16254 assert (regs_available_for_popping == (1 << frame_pointer))
16255 assert (regs_to_pop == (1 << STACK_POINTER)) */
16259 /* Since we have just move the popped value into the frame
16260 pointer, the popping register is available for reuse, and
16261 we know that we still have the stack pointer left to pop. */
16262 regs_available_for_popping |= (1 << frame_pointer);
16266 /* If we still have registers left on the stack, but we no longer have
16267 any registers into which we can pop them, then we must move the return
16268 address into the link register and make available the register that
16270 if (regs_available_for_popping == 0 && pops_needed > 0)
16272 regs_available_for_popping |= 1 << reg_containing_return_addr;
16274 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16275 reg_containing_return_addr);
16277 reg_containing_return_addr = LR_REGNUM;
16280 /* If we have registers left on the stack then pop some more.
16281 We know that at most we will want to pop FP and SP. */
16282 if (pops_needed > 0)
16287 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16288 regs_available_for_popping);
16290 /* We have popped either FP or SP.
16291 Move whichever one it is into the correct register. */
16292 popped_into = number_of_first_bit_set (regs_available_for_popping);
16293 move_to = number_of_first_bit_set (regs_to_pop);
16295 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16297 regs_to_pop &= ~(1 << move_to);
16302 /* If we still have not popped everything then we must have only
16303 had one register available to us and we are now popping the SP. */
16304 if (pops_needed > 0)
16308 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16309 regs_available_for_popping);
16311 popped_into = number_of_first_bit_set (regs_available_for_popping);
16313 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16315 assert (regs_to_pop == (1 << STACK_POINTER))
16316 assert (pops_needed == 1)
16320 /* If necessary restore the a4 register. */
16323 if (reg_containing_return_addr != LR_REGNUM)
16325 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16326 reg_containing_return_addr = LR_REGNUM;
16329 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16332 if (current_function_calls_eh_return)
16333 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16335 /* Return to caller. */
16336 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16341 thumb1_final_prescan_insn (rtx insn)
16343 if (flag_print_asm_name)
16344 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16345 INSN_ADDRESSES (INSN_UID (insn)));
16349 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16351 unsigned HOST_WIDE_INT mask = 0xff;
16354 if (val == 0) /* XXX */
16357 for (i = 0; i < 25; i++)
16358 if ((val & (mask << i)) == val)
16364 /* Returns nonzero if the current function contains,
16365 or might contain a far jump. */
16367 thumb_far_jump_used_p (void)
16371 /* This test is only important for leaf functions. */
16372 /* assert (!leaf_function_p ()); */
16374 /* If we have already decided that far jumps may be used,
16375 do not bother checking again, and always return true even if
16376 it turns out that they are not being used. Once we have made
16377 the decision that far jumps are present (and that hence the link
16378 register will be pushed onto the stack) we cannot go back on it. */
16379 if (cfun->machine->far_jump_used)
16382 /* If this function is not being called from the prologue/epilogue
16383 generation code then it must be being called from the
16384 INITIAL_ELIMINATION_OFFSET macro. */
16385 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16387 /* In this case we know that we are being asked about the elimination
16388 of the arg pointer register. If that register is not being used,
16389 then there are no arguments on the stack, and we do not have to
16390 worry that a far jump might force the prologue to push the link
16391 register, changing the stack offsets. In this case we can just
16392 return false, since the presence of far jumps in the function will
16393 not affect stack offsets.
16395 If the arg pointer is live (or if it was live, but has now been
16396 eliminated and so set to dead) then we do have to test to see if
16397 the function might contain a far jump. This test can lead to some
16398 false negatives, since before reload is completed, then length of
16399 branch instructions is not known, so gcc defaults to returning their
16400 longest length, which in turn sets the far jump attribute to true.
16402 A false negative will not result in bad code being generated, but it
16403 will result in a needless push and pop of the link register. We
16404 hope that this does not occur too often.
16406 If we need doubleword stack alignment this could affect the other
16407 elimination offsets so we can't risk getting it wrong. */
16408 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16409 cfun->machine->arg_pointer_live = 1;
16410 else if (!cfun->machine->arg_pointer_live)
16414 /* Check to see if the function contains a branch
16415 insn with the far jump attribute set. */
16416 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16418 if (GET_CODE (insn) == JUMP_INSN
16419 /* Ignore tablejump patterns. */
16420 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16421 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16422 && get_attr_far_jump (insn) == FAR_JUMP_YES
16425 /* Record the fact that we have decided that
16426 the function does use far jumps. */
16427 cfun->machine->far_jump_used = 1;
16435 /* Return nonzero if FUNC must be entered in ARM mode. */
16437 is_called_in_ARM_mode (tree func)
16439 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16441 /* Ignore the problem about functions whose address is taken. */
16442 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16446 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16452 /* The bits which aren't usefully expanded as rtl. */
16454 thumb_unexpanded_epilogue (void)
16457 unsigned long live_regs_mask = 0;
16458 int high_regs_pushed = 0;
16459 int had_to_push_lr;
16462 if (return_used_this_function)
16465 if (IS_NAKED (arm_current_func_type ()))
16468 live_regs_mask = thumb1_compute_save_reg_mask ();
16469 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16471 /* If we can deduce the registers used from the function's return value.
16472 This is more reliable that examining df_regs_ever_live_p () because that
16473 will be set if the register is ever used in the function, not just if
16474 the register is used to hold a return value. */
16475 size = arm_size_return_regs ();
16477 /* The prolog may have pushed some high registers to use as
16478 work registers. e.g. the testsuite file:
16479 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16480 compiles to produce:
16481 push {r4, r5, r6, r7, lr}
16485 as part of the prolog. We have to undo that pushing here. */
16487 if (high_regs_pushed)
16489 unsigned long mask = live_regs_mask & 0xff;
16492 /* The available low registers depend on the size of the value we are
16500 /* Oh dear! We have no low registers into which we can pop
16503 ("no low registers available for popping high registers");
16505 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16506 if (live_regs_mask & (1 << next_hi_reg))
16509 while (high_regs_pushed)
16511 /* Find lo register(s) into which the high register(s) can
16513 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16515 if (mask & (1 << regno))
16516 high_regs_pushed--;
16517 if (high_regs_pushed == 0)
16521 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16523 /* Pop the values into the low register(s). */
16524 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16526 /* Move the value(s) into the high registers. */
16527 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16529 if (mask & (1 << regno))
16531 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16534 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16535 if (live_regs_mask & (1 << next_hi_reg))
16540 live_regs_mask &= ~0x0f00;
16543 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16544 live_regs_mask &= 0xff;
16546 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16548 /* Pop the return address into the PC. */
16549 if (had_to_push_lr)
16550 live_regs_mask |= 1 << PC_REGNUM;
16552 /* Either no argument registers were pushed or a backtrace
16553 structure was created which includes an adjusted stack
16554 pointer, so just pop everything. */
16555 if (live_regs_mask)
16556 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16559 /* We have either just popped the return address into the
16560 PC or it is was kept in LR for the entire function. */
16561 if (!had_to_push_lr)
16562 thumb_exit (asm_out_file, LR_REGNUM);
16566 /* Pop everything but the return address. */
16567 if (live_regs_mask)
16568 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16571 if (had_to_push_lr)
16575 /* We have no free low regs, so save one. */
16576 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16580 /* Get the return address into a temporary register. */
16581 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16582 1 << LAST_ARG_REGNUM);
16586 /* Move the return address to lr. */
16587 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16589 /* Restore the low register. */
16590 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16595 regno = LAST_ARG_REGNUM;
16600 /* Remove the argument registers that were pushed onto the stack. */
16601 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16602 SP_REGNUM, SP_REGNUM,
16603 current_function_pretend_args_size);
16605 thumb_exit (asm_out_file, regno);
16611 /* Functions to save and restore machine-specific function data. */
16612 static struct machine_function *
16613 arm_init_machine_status (void)
16615 struct machine_function *machine;
16616 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16618 #if ARM_FT_UNKNOWN != 0
16619 machine->func_type = ARM_FT_UNKNOWN;
16624 /* Return an RTX indicating where the return address to the
16625 calling function can be found. */
16627 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16632 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16635 /* Do anything needed before RTL is emitted for each function. */
16637 arm_init_expanders (void)
16639 /* Arrange to initialize and mark the machine per-function status. */
16640 init_machine_status = arm_init_machine_status;
16642 /* This is to stop the combine pass optimizing away the alignment
16643 adjustment of va_arg. */
16644 /* ??? It is claimed that this should not be necessary. */
16646 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16650 /* Like arm_compute_initial_elimination offset. Simpler because there
16651 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16652 to point at the base of the local variables after static stack
16653 space for a function has been allocated. */
16656 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16658 arm_stack_offsets *offsets;
16660 offsets = arm_get_frame_offsets ();
16664 case ARG_POINTER_REGNUM:
16667 case STACK_POINTER_REGNUM:
16668 return offsets->outgoing_args - offsets->saved_args;
16670 case FRAME_POINTER_REGNUM:
16671 return offsets->soft_frame - offsets->saved_args;
16673 case ARM_HARD_FRAME_POINTER_REGNUM:
16674 return offsets->saved_regs - offsets->saved_args;
16676 case THUMB_HARD_FRAME_POINTER_REGNUM:
16677 return offsets->locals_base - offsets->saved_args;
16680 gcc_unreachable ();
16684 case FRAME_POINTER_REGNUM:
16687 case STACK_POINTER_REGNUM:
16688 return offsets->outgoing_args - offsets->soft_frame;
16690 case ARM_HARD_FRAME_POINTER_REGNUM:
16691 return offsets->saved_regs - offsets->soft_frame;
16693 case THUMB_HARD_FRAME_POINTER_REGNUM:
16694 return offsets->locals_base - offsets->soft_frame;
16697 gcc_unreachable ();
16702 gcc_unreachable ();
16706 /* Generate the rest of a function's prologue. */
16708 thumb1_expand_prologue (void)
16712 HOST_WIDE_INT amount;
16713 arm_stack_offsets *offsets;
16714 unsigned long func_type;
16716 unsigned long live_regs_mask;
16718 func_type = arm_current_func_type ();
16720 /* Naked functions don't have prologues. */
16721 if (IS_NAKED (func_type))
16724 if (IS_INTERRUPT (func_type))
16726 error ("interrupt Service Routines cannot be coded in Thumb mode");
16730 live_regs_mask = thumb1_compute_save_reg_mask ();
16731 /* Load the pic register before setting the frame pointer,
16732 so we can use r7 as a temporary work register. */
16733 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16734 arm_load_pic_register (live_regs_mask);
16736 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16737 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16738 stack_pointer_rtx);
16740 offsets = arm_get_frame_offsets ();
16741 amount = offsets->outgoing_args - offsets->saved_regs;
16746 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16747 GEN_INT (- amount)));
16748 RTX_FRAME_RELATED_P (insn) = 1;
16754 /* The stack decrement is too big for an immediate value in a single
16755 insn. In theory we could issue multiple subtracts, but after
16756 three of them it becomes more space efficient to place the full
16757 value in the constant pool and load into a register. (Also the
16758 ARM debugger really likes to see only one stack decrement per
16759 function). So instead we look for a scratch register into which
16760 we can load the decrement, and then we subtract this from the
16761 stack pointer. Unfortunately on the thumb the only available
16762 scratch registers are the argument registers, and we cannot use
16763 these as they may hold arguments to the function. Instead we
16764 attempt to locate a call preserved register which is used by this
16765 function. If we can find one, then we know that it will have
16766 been pushed at the start of the prologue and so we can corrupt
16768 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16769 if (live_regs_mask & (1 << regno)
16770 && !(frame_pointer_needed
16771 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16774 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16776 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16778 /* Choose an arbitrary, non-argument low register. */
16779 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16781 /* Save it by copying it into a high, scratch register. */
16782 emit_insn (gen_movsi (spare, reg));
16783 /* Add a USE to stop propagate_one_insn() from barfing. */
16784 emit_insn (gen_prologue_use (spare));
16786 /* Decrement the stack. */
16787 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16788 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16789 stack_pointer_rtx, reg));
16790 RTX_FRAME_RELATED_P (insn) = 1;
16791 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16792 plus_constant (stack_pointer_rtx,
16794 RTX_FRAME_RELATED_P (dwarf) = 1;
16796 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16799 /* Restore the low register's original value. */
16800 emit_insn (gen_movsi (reg, spare));
16802 /* Emit a USE of the restored scratch register, so that flow
16803 analysis will not consider the restore redundant. The
16804 register won't be used again in this function and isn't
16805 restored by the epilogue. */
16806 emit_insn (gen_prologue_use (reg));
16810 reg = gen_rtx_REG (SImode, regno);
16812 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16814 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16815 stack_pointer_rtx, reg));
16816 RTX_FRAME_RELATED_P (insn) = 1;
16817 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16818 plus_constant (stack_pointer_rtx,
16820 RTX_FRAME_RELATED_P (dwarf) = 1;
16822 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16828 if (frame_pointer_needed)
16829 thumb_set_frame_pointer (offsets);
16831 /* If we are profiling, make sure no instructions are scheduled before
16832 the call to mcount. Similarly if the user has requested no
16833 scheduling in the prolog. Similarly if we want non-call exceptions
16834 using the EABI unwinder, to prevent faulting instructions from being
16835 swapped with a stack adjustment. */
16836 if (current_function_profile || !TARGET_SCHED_PROLOG
16837 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16838 emit_insn (gen_blockage ());
16840 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16841 if (live_regs_mask & 0xff)
16842 cfun->machine->lr_save_eliminated = 0;
16847 thumb1_expand_epilogue (void)
16849 HOST_WIDE_INT amount;
16850 arm_stack_offsets *offsets;
16853 /* Naked functions don't have prologues. */
16854 if (IS_NAKED (arm_current_func_type ()))
16857 offsets = arm_get_frame_offsets ();
16858 amount = offsets->outgoing_args - offsets->saved_regs;
16860 if (frame_pointer_needed)
16862 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16863 amount = offsets->locals_base - offsets->saved_regs;
16866 gcc_assert (amount >= 0);
16870 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16871 GEN_INT (amount)));
16874 /* r3 is always free in the epilogue. */
16875 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
16877 emit_insn (gen_movsi (reg, GEN_INT (amount)));
16878 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
16882 /* Emit a USE (stack_pointer_rtx), so that
16883 the stack adjustment will not be deleted. */
16884 emit_insn (gen_prologue_use (stack_pointer_rtx));
16886 if (current_function_profile || !TARGET_SCHED_PROLOG)
16887 emit_insn (gen_blockage ());
16889 /* Emit a clobber for each insn that will be restored in the epilogue,
16890 so that flow2 will get register lifetimes correct. */
16891 for (regno = 0; regno < 13; regno++)
16892 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
16893 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
16895 if (! df_regs_ever_live_p (LR_REGNUM))
16896 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
16900 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
16902 unsigned long live_regs_mask = 0;
16903 unsigned long l_mask;
16904 unsigned high_regs_pushed = 0;
16905 int cfa_offset = 0;
16908 if (IS_NAKED (arm_current_func_type ()))
16911 if (is_called_in_ARM_mode (current_function_decl))
16915 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
16916 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
16918 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
16920 /* Generate code sequence to switch us into Thumb mode. */
16921 /* The .code 32 directive has already been emitted by
16922 ASM_DECLARE_FUNCTION_NAME. */
16923 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
16924 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
16926 /* Generate a label, so that the debugger will notice the
16927 change in instruction sets. This label is also used by
16928 the assembler to bypass the ARM code when this function
16929 is called from a Thumb encoded function elsewhere in the
16930 same file. Hence the definition of STUB_NAME here must
16931 agree with the definition in gas/config/tc-arm.c. */
16933 #define STUB_NAME ".real_start_of"
16935 fprintf (f, "\t.code\t16\n");
16937 if (arm_dllexport_name_p (name))
16938 name = arm_strip_name_encoding (name);
16940 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
16941 fprintf (f, "\t.thumb_func\n");
16942 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
16945 if (current_function_pretend_args_size)
16947 /* Output unwind directive for the stack adjustment. */
16948 if (ARM_EABI_UNWIND_TABLES)
16949 fprintf (f, "\t.pad #%d\n",
16950 current_function_pretend_args_size);
16952 if (cfun->machine->uses_anonymous_args)
16956 fprintf (f, "\tpush\t{");
16958 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
16960 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
16961 regno <= LAST_ARG_REGNUM;
16963 asm_fprintf (f, "%r%s", regno,
16964 regno == LAST_ARG_REGNUM ? "" : ", ");
16966 fprintf (f, "}\n");
16969 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
16970 SP_REGNUM, SP_REGNUM,
16971 current_function_pretend_args_size);
16973 /* We don't need to record the stores for unwinding (would it
16974 help the debugger any if we did?), but record the change in
16975 the stack pointer. */
16976 if (dwarf2out_do_frame ())
16978 char *l = dwarf2out_cfi_label ();
16980 cfa_offset = cfa_offset + current_function_pretend_args_size;
16981 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
16985 /* Get the registers we are going to push. */
16986 live_regs_mask = thumb1_compute_save_reg_mask ();
16987 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
16988 l_mask = live_regs_mask & 0x40ff;
16989 /* Then count how many other high registers will need to be pushed. */
16990 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16992 if (TARGET_BACKTRACE)
16995 unsigned work_register;
16997 /* We have been asked to create a stack backtrace structure.
16998 The code looks like this:
17002 0 sub SP, #16 Reserve space for 4 registers.
17003 2 push {R7} Push low registers.
17004 4 add R7, SP, #20 Get the stack pointer before the push.
17005 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17006 8 mov R7, PC Get hold of the start of this code plus 12.
17007 10 str R7, [SP, #16] Store it.
17008 12 mov R7, FP Get hold of the current frame pointer.
17009 14 str R7, [SP, #4] Store it.
17010 16 mov R7, LR Get hold of the current return address.
17011 18 str R7, [SP, #12] Store it.
17012 20 add R7, SP, #16 Point at the start of the backtrace structure.
17013 22 mov FP, R7 Put this value into the frame pointer. */
17015 work_register = thumb_find_work_register (live_regs_mask);
17017 if (ARM_EABI_UNWIND_TABLES)
17018 asm_fprintf (f, "\t.pad #16\n");
17021 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17022 SP_REGNUM, SP_REGNUM);
17024 if (dwarf2out_do_frame ())
17026 char *l = dwarf2out_cfi_label ();
17028 cfa_offset = cfa_offset + 16;
17029 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17034 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17035 offset = bit_count (l_mask) * UNITS_PER_WORD;
17040 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17041 offset + 16 + current_function_pretend_args_size);
17043 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17046 /* Make sure that the instruction fetching the PC is in the right place
17047 to calculate "start of backtrace creation code + 12". */
17050 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17051 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17053 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17054 ARM_HARD_FRAME_POINTER_REGNUM);
17055 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17060 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17061 ARM_HARD_FRAME_POINTER_REGNUM);
17062 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17064 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17065 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17069 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17070 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17072 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17074 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17075 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17077 /* Optimization: If we are not pushing any low registers but we are going
17078 to push some high registers then delay our first push. This will just
17079 be a push of LR and we can combine it with the push of the first high
17081 else if ((l_mask & 0xff) != 0
17082 || (high_regs_pushed == 0 && l_mask))
17083 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17085 if (high_regs_pushed)
17087 unsigned pushable_regs;
17088 unsigned next_hi_reg;
17090 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17091 if (live_regs_mask & (1 << next_hi_reg))
17094 pushable_regs = l_mask & 0xff;
17096 if (pushable_regs == 0)
17097 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17099 while (high_regs_pushed > 0)
17101 unsigned long real_regs_mask = 0;
17103 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17105 if (pushable_regs & (1 << regno))
17107 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17109 high_regs_pushed --;
17110 real_regs_mask |= (1 << next_hi_reg);
17112 if (high_regs_pushed)
17114 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17116 if (live_regs_mask & (1 << next_hi_reg))
17121 pushable_regs &= ~((1 << regno) - 1);
17127 /* If we had to find a work register and we have not yet
17128 saved the LR then add it to the list of regs to push. */
17129 if (l_mask == (1 << LR_REGNUM))
17131 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17133 real_regs_mask | (1 << LR_REGNUM));
17137 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17142 /* Handle the case of a double word load into a low register from
17143 a computed memory address. The computed address may involve a
17144 register which is overwritten by the load. */
17146 thumb_load_double_from_address (rtx *operands)
17154 gcc_assert (GET_CODE (operands[0]) == REG);
17155 gcc_assert (GET_CODE (operands[1]) == MEM);
17157 /* Get the memory address. */
17158 addr = XEXP (operands[1], 0);
17160 /* Work out how the memory address is computed. */
17161 switch (GET_CODE (addr))
17164 operands[2] = adjust_address (operands[1], SImode, 4);
17166 if (REGNO (operands[0]) == REGNO (addr))
17168 output_asm_insn ("ldr\t%H0, %2", operands);
17169 output_asm_insn ("ldr\t%0, %1", operands);
17173 output_asm_insn ("ldr\t%0, %1", operands);
17174 output_asm_insn ("ldr\t%H0, %2", operands);
17179 /* Compute <address> + 4 for the high order load. */
17180 operands[2] = adjust_address (operands[1], SImode, 4);
17182 output_asm_insn ("ldr\t%0, %1", operands);
17183 output_asm_insn ("ldr\t%H0, %2", operands);
17187 arg1 = XEXP (addr, 0);
17188 arg2 = XEXP (addr, 1);
17190 if (CONSTANT_P (arg1))
17191 base = arg2, offset = arg1;
17193 base = arg1, offset = arg2;
17195 gcc_assert (GET_CODE (base) == REG);
17197 /* Catch the case of <address> = <reg> + <reg> */
17198 if (GET_CODE (offset) == REG)
17200 int reg_offset = REGNO (offset);
17201 int reg_base = REGNO (base);
17202 int reg_dest = REGNO (operands[0]);
17204 /* Add the base and offset registers together into the
17205 higher destination register. */
17206 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17207 reg_dest + 1, reg_base, reg_offset);
17209 /* Load the lower destination register from the address in
17210 the higher destination register. */
17211 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17212 reg_dest, reg_dest + 1);
17214 /* Load the higher destination register from its own address
17216 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17217 reg_dest + 1, reg_dest + 1);
17221 /* Compute <address> + 4 for the high order load. */
17222 operands[2] = adjust_address (operands[1], SImode, 4);
17224 /* If the computed address is held in the low order register
17225 then load the high order register first, otherwise always
17226 load the low order register first. */
17227 if (REGNO (operands[0]) == REGNO (base))
17229 output_asm_insn ("ldr\t%H0, %2", operands);
17230 output_asm_insn ("ldr\t%0, %1", operands);
17234 output_asm_insn ("ldr\t%0, %1", operands);
17235 output_asm_insn ("ldr\t%H0, %2", operands);
17241 /* With no registers to worry about we can just load the value
17243 operands[2] = adjust_address (operands[1], SImode, 4);
17245 output_asm_insn ("ldr\t%H0, %2", operands);
17246 output_asm_insn ("ldr\t%0, %1", operands);
17250 gcc_unreachable ();
17257 thumb_output_move_mem_multiple (int n, rtx *operands)
17264 if (REGNO (operands[4]) > REGNO (operands[5]))
17267 operands[4] = operands[5];
17270 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17271 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17275 if (REGNO (operands[4]) > REGNO (operands[5]))
17278 operands[4] = operands[5];
17281 if (REGNO (operands[5]) > REGNO (operands[6]))
17284 operands[5] = operands[6];
17287 if (REGNO (operands[4]) > REGNO (operands[5]))
17290 operands[4] = operands[5];
17294 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17295 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17299 gcc_unreachable ();
17305 /* Output a call-via instruction for thumb state. */
17307 thumb_call_via_reg (rtx reg)
17309 int regno = REGNO (reg);
17312 gcc_assert (regno < LR_REGNUM);
17314 /* If we are in the normal text section we can use a single instance
17315 per compilation unit. If we are doing function sections, then we need
17316 an entry per section, since we can't rely on reachability. */
17317 if (in_section == text_section)
17319 thumb_call_reg_needed = 1;
17321 if (thumb_call_via_label[regno] == NULL)
17322 thumb_call_via_label[regno] = gen_label_rtx ();
17323 labelp = thumb_call_via_label + regno;
17327 if (cfun->machine->call_via[regno] == NULL)
17328 cfun->machine->call_via[regno] = gen_label_rtx ();
17329 labelp = cfun->machine->call_via + regno;
17332 output_asm_insn ("bl\t%a0", labelp);
17336 /* Routines for generating rtl. */
17338 thumb_expand_movmemqi (rtx *operands)
17340 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17341 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17342 HOST_WIDE_INT len = INTVAL (operands[2]);
17343 HOST_WIDE_INT offset = 0;
17347 emit_insn (gen_movmem12b (out, in, out, in));
17353 emit_insn (gen_movmem8b (out, in, out, in));
17359 rtx reg = gen_reg_rtx (SImode);
17360 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17361 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17368 rtx reg = gen_reg_rtx (HImode);
17369 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17370 plus_constant (in, offset))));
17371 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17379 rtx reg = gen_reg_rtx (QImode);
17380 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17381 plus_constant (in, offset))));
17382 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17388 thumb_reload_out_hi (rtx *operands)
17390 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17393 /* Handle reading a half-word from memory during reload. */
17395 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17397 gcc_unreachable ();
17400 /* Return the length of a function name prefix
17401 that starts with the character 'c'. */
17403 arm_get_strip_length (int c)
17407 ARM_NAME_ENCODING_LENGTHS
17412 /* Return a pointer to a function's name with any
17413 and all prefix encodings stripped from it. */
17415 arm_strip_name_encoding (const char *name)
17419 while ((skip = arm_get_strip_length (* name)))
17425 /* If there is a '*' anywhere in the name's prefix, then
17426 emit the stripped name verbatim, otherwise prepend an
17427 underscore if leading underscores are being used. */
17429 arm_asm_output_labelref (FILE *stream, const char *name)
17434 while ((skip = arm_get_strip_length (* name)))
17436 verbatim |= (*name == '*');
17441 fputs (name, stream);
17443 asm_fprintf (stream, "%U%s", name);
17447 arm_file_start (void)
17451 if (TARGET_UNIFIED_ASM)
17452 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17456 const char *fpu_name;
17457 if (arm_select[0].string)
17458 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17459 else if (arm_select[1].string)
17460 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17462 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17463 all_cores[arm_default_cpu].name);
17465 if (TARGET_SOFT_FLOAT)
17468 fpu_name = "softvfp";
17470 fpu_name = "softfpa";
17474 int set_float_abi_attributes = 0;
17475 switch (arm_fpu_arch)
17480 case FPUTYPE_FPA_EMU2:
17483 case FPUTYPE_FPA_EMU3:
17486 case FPUTYPE_MAVERICK:
17487 fpu_name = "maverick";
17491 set_float_abi_attributes = 1;
17495 set_float_abi_attributes = 1;
17499 set_float_abi_attributes = 1;
17504 if (set_float_abi_attributes)
17506 if (TARGET_HARD_FLOAT)
17507 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17508 if (TARGET_HARD_FLOAT_ABI)
17509 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17512 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17514 /* Some of these attributes only apply when the corresponding features
17515 are used. However we don't have any easy way of figuring this out.
17516 Conservatively record the setting that would have been used. */
17518 /* Tag_ABI_PCS_wchar_t. */
17519 asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
17520 (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
17522 /* Tag_ABI_FP_rounding. */
17523 if (flag_rounding_math)
17524 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17525 if (!flag_unsafe_math_optimizations)
17527 /* Tag_ABI_FP_denomal. */
17528 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17529 /* Tag_ABI_FP_exceptions. */
17530 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17532 /* Tag_ABI_FP_user_exceptions. */
17533 if (flag_signaling_nans)
17534 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17535 /* Tag_ABI_FP_number_model. */
17536 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17537 flag_finite_math_only ? 1 : 3);
17539 /* Tag_ABI_align8_needed. */
17540 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17541 /* Tag_ABI_align8_preserved. */
17542 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17543 /* Tag_ABI_enum_size. */
17544 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17545 flag_short_enums ? 1 : 2);
17547 /* Tag_ABI_optimization_goals. */
17550 else if (optimize >= 2)
17556 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17558 default_file_start();
17562 arm_file_end (void)
17566 if (NEED_INDICATE_EXEC_STACK)
17567 /* Add .note.GNU-stack. */
17568 file_end_indicate_exec_stack ();
17570 if (! thumb_call_reg_needed)
17573 switch_to_section (text_section);
17574 asm_fprintf (asm_out_file, "\t.code 16\n");
17575 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17577 for (regno = 0; regno < LR_REGNUM; regno++)
17579 rtx label = thumb_call_via_label[regno];
17583 targetm.asm_out.internal_label (asm_out_file, "L",
17584 CODE_LABEL_NUMBER (label));
17585 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17592 #ifdef AOF_ASSEMBLER
17593 /* Special functions only needed when producing AOF syntax assembler. */
17597 struct pic_chain * next;
17598 const char * symname;
17601 static struct pic_chain * aof_pic_chain = NULL;
17604 aof_pic_entry (rtx x)
17606 struct pic_chain ** chainp;
17609 if (aof_pic_label == NULL_RTX)
17611 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
17614 for (offset = 0, chainp = &aof_pic_chain; *chainp;
17615 offset += 4, chainp = &(*chainp)->next)
17616 if ((*chainp)->symname == XSTR (x, 0))
17617 return plus_constant (aof_pic_label, offset);
17619 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
17620 (*chainp)->next = NULL;
17621 (*chainp)->symname = XSTR (x, 0);
17622 return plus_constant (aof_pic_label, offset);
17626 aof_dump_pic_table (FILE *f)
17628 struct pic_chain * chain;
17630 if (aof_pic_chain == NULL)
17633 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
17634 PIC_OFFSET_TABLE_REGNUM,
17635 PIC_OFFSET_TABLE_REGNUM);
17636 fputs ("|x$adcons|\n", f);
17638 for (chain = aof_pic_chain; chain; chain = chain->next)
17640 fputs ("\tDCD\t", f);
17641 assemble_name (f, chain->symname);
17646 int arm_text_section_count = 1;
17648 /* A get_unnamed_section callback for switching to the text section. */
17651 aof_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17653 fprintf (asm_out_file, "\tAREA |C$$code%d|, CODE, READONLY",
17654 arm_text_section_count++);
17656 fprintf (asm_out_file, ", PIC, REENTRANT");
17657 fprintf (asm_out_file, "\n");
17660 static int arm_data_section_count = 1;
17662 /* A get_unnamed_section callback for switching to the data section. */
17665 aof_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
17667 fprintf (asm_out_file, "\tAREA |C$$data%d|, DATA\n",
17668 arm_data_section_count++);
17671 /* Implement TARGET_ASM_INIT_SECTIONS.
17673 AOF Assembler syntax is a nightmare when it comes to areas, since once
17674 we change from one area to another, we can't go back again. Instead,
17675 we must create a new area with the same attributes and add the new output
17676 to that. Unfortunately, there is nothing we can do here to guarantee that
17677 two areas with the same attributes will be linked adjacently in the
17678 resulting executable, so we have to be careful not to do pc-relative
17679 addressing across such boundaries. */
17682 aof_asm_init_sections (void)
17684 text_section = get_unnamed_section (SECTION_CODE,
17685 aof_output_text_section_asm_op, NULL);
17686 data_section = get_unnamed_section (SECTION_WRITE,
17687 aof_output_data_section_asm_op, NULL);
17688 readonly_data_section = text_section;
17692 zero_init_section (void)
17694 static int zero_init_count = 1;
17696 fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n", zero_init_count++);
17700 /* The AOF assembler is religiously strict about declarations of
17701 imported and exported symbols, so that it is impossible to declare
17702 a function as imported near the beginning of the file, and then to
17703 export it later on. It is, however, possible to delay the decision
17704 until all the functions in the file have been compiled. To get
17705 around this, we maintain a list of the imports and exports, and
17706 delete from it any that are subsequently defined. At the end of
17707 compilation we spit the remainder of the list out before the END
17712 struct import * next;
17716 static struct import * imports_list = NULL;
17719 aof_add_import (const char *name)
17721 struct import * new;
17723 for (new = imports_list; new; new = new->next)
17724 if (new->name == name)
17727 new = (struct import *) xmalloc (sizeof (struct import));
17728 new->next = imports_list;
17729 imports_list = new;
17734 aof_delete_import (const char *name)
17736 struct import ** old;
17738 for (old = &imports_list; *old; old = & (*old)->next)
17740 if ((*old)->name == name)
17742 *old = (*old)->next;
17748 int arm_main_function = 0;
17751 aof_dump_imports (FILE *f)
17753 /* The AOF assembler needs this to cause the startup code to be extracted
17754 from the library. Brining in __main causes the whole thing to work
17756 if (arm_main_function)
17758 switch_to_section (text_section);
17759 fputs ("\tIMPORT __main\n", f);
17760 fputs ("\tDCD __main\n", f);
17763 /* Now dump the remaining imports. */
17764 while (imports_list)
17766 fprintf (f, "\tIMPORT\t");
17767 assemble_name (f, imports_list->name);
17769 imports_list = imports_list->next;
17774 aof_globalize_label (FILE *stream, const char *name)
17776 default_globalize_label (stream, name);
17777 if (! strcmp (name, "main"))
17778 arm_main_function = 1;
17782 aof_file_start (void)
17784 fputs ("__r0\tRN\t0\n", asm_out_file);
17785 fputs ("__a1\tRN\t0\n", asm_out_file);
17786 fputs ("__a2\tRN\t1\n", asm_out_file);
17787 fputs ("__a3\tRN\t2\n", asm_out_file);
17788 fputs ("__a4\tRN\t3\n", asm_out_file);
17789 fputs ("__v1\tRN\t4\n", asm_out_file);
17790 fputs ("__v2\tRN\t5\n", asm_out_file);
17791 fputs ("__v3\tRN\t6\n", asm_out_file);
17792 fputs ("__v4\tRN\t7\n", asm_out_file);
17793 fputs ("__v5\tRN\t8\n", asm_out_file);
17794 fputs ("__v6\tRN\t9\n", asm_out_file);
17795 fputs ("__sl\tRN\t10\n", asm_out_file);
17796 fputs ("__fp\tRN\t11\n", asm_out_file);
17797 fputs ("__ip\tRN\t12\n", asm_out_file);
17798 fputs ("__sp\tRN\t13\n", asm_out_file);
17799 fputs ("__lr\tRN\t14\n", asm_out_file);
17800 fputs ("__pc\tRN\t15\n", asm_out_file);
17801 fputs ("__f0\tFN\t0\n", asm_out_file);
17802 fputs ("__f1\tFN\t1\n", asm_out_file);
17803 fputs ("__f2\tFN\t2\n", asm_out_file);
17804 fputs ("__f3\tFN\t3\n", asm_out_file);
17805 fputs ("__f4\tFN\t4\n", asm_out_file);
17806 fputs ("__f5\tFN\t5\n", asm_out_file);
17807 fputs ("__f6\tFN\t6\n", asm_out_file);
17808 fputs ("__f7\tFN\t7\n", asm_out_file);
17809 switch_to_section (text_section);
17813 aof_file_end (void)
17816 aof_dump_pic_table (asm_out_file);
17818 aof_dump_imports (asm_out_file);
17819 fputs ("\tEND\n", asm_out_file);
17821 #endif /* AOF_ASSEMBLER */
17824 /* Symbols in the text segment can be accessed without indirecting via the
17825 constant pool; it may take an extra binary operation, but this is still
17826 faster than indirecting via memory. Don't do this when not optimizing,
17827 since we won't be calculating al of the offsets necessary to do this
17831 arm_encode_section_info (tree decl, rtx rtl, int first)
17833 /* This doesn't work with AOF syntax, since the string table may be in
17834 a different AREA. */
17835 #ifndef AOF_ASSEMBLER
17836 if (optimize > 0 && TREE_CONSTANT (decl))
17837 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17840 default_encode_section_info (decl, rtl, first);
17842 #endif /* !ARM_PE */
17845 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17847 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17848 && !strcmp (prefix, "L"))
17850 arm_ccfsm_state = 0;
17851 arm_target_insn = NULL;
17853 default_internal_label (stream, prefix, labelno);
17856 /* Output code to add DELTA to the first argument, and then jump
17857 to FUNCTION. Used for C++ multiple inheritance. */
17859 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17860 HOST_WIDE_INT delta,
17861 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17864 static int thunk_label = 0;
17867 int mi_delta = delta;
17868 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17870 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17873 mi_delta = - mi_delta;
17874 /* When generating 16-bit thumb code, thunks are entered in arm mode. */
17877 int labelno = thunk_label++;
17878 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17879 fputs ("\tldr\tr12, ", file);
17880 assemble_name (file, label);
17881 fputc ('\n', file);
17884 /* If we are generating PIC, the ldr instruction below loads
17885 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17886 the address of the add + 8, so we have:
17888 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17891 Note that we have "+ 1" because some versions of GNU ld
17892 don't set the low bit of the result for R_ARM_REL32
17893 relocations against thumb function symbols. */
17894 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17895 assemble_name (file, labelpc);
17896 fputs (":\n", file);
17897 fputs ("\tadd\tr12, pc, r12\n", file);
17900 /* TODO: Use movw/movt for large constants when available. */
17901 while (mi_delta != 0)
17903 if ((mi_delta & (3 << shift)) == 0)
17907 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17908 mi_op, this_regno, this_regno,
17909 mi_delta & (0xff << shift));
17910 mi_delta &= ~(0xff << shift);
17916 fprintf (file, "\tbx\tr12\n");
17917 ASM_OUTPUT_ALIGN (file, 2);
17918 assemble_name (file, label);
17919 fputs (":\n", file);
17922 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17923 rtx tem = XEXP (DECL_RTL (function), 0);
17924 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17925 tem = gen_rtx_MINUS (GET_MODE (tem),
17927 gen_rtx_SYMBOL_REF (Pmode,
17928 ggc_strdup (labelpc)));
17929 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17932 /* Output ".word .LTHUNKn". */
17933 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17937 fputs ("\tb\t", file);
17938 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17939 if (NEED_PLT_RELOC)
17940 fputs ("(PLT)", file);
17941 fputc ('\n', file);
17946 arm_emit_vector_const (FILE *file, rtx x)
17949 const char * pattern;
17951 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17953 switch (GET_MODE (x))
17955 case V2SImode: pattern = "%08x"; break;
17956 case V4HImode: pattern = "%04x"; break;
17957 case V8QImode: pattern = "%02x"; break;
17958 default: gcc_unreachable ();
17961 fprintf (file, "0x");
17962 for (i = CONST_VECTOR_NUNITS (x); i--;)
17966 element = CONST_VECTOR_ELT (x, i);
17967 fprintf (file, pattern, INTVAL (element));
17974 arm_output_load_gr (rtx *operands)
17981 if (GET_CODE (operands [1]) != MEM
17982 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17983 || GET_CODE (reg = XEXP (sum, 0)) != REG
17984 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17985 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17986 return "wldrw%?\t%0, %1";
17988 /* Fix up an out-of-range load of a GR register. */
17989 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17990 wcgr = operands[0];
17992 output_asm_insn ("ldr%?\t%0, %1", operands);
17994 operands[0] = wcgr;
17996 output_asm_insn ("tmcr%?\t%0, %1", operands);
17997 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18002 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18004 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18005 named arg and all anonymous args onto the stack.
18006 XXX I know the prologue shouldn't be pushing registers, but it is faster
18010 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18011 enum machine_mode mode ATTRIBUTE_UNUSED,
18012 tree type ATTRIBUTE_UNUSED,
18014 int second_time ATTRIBUTE_UNUSED)
18016 cfun->machine->uses_anonymous_args = 1;
18017 if (cum->nregs < NUM_ARG_REGS)
18018 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
18021 /* Return nonzero if the CONSUMER instruction (a store) does not need
18022 PRODUCER's value to calculate the address. */
18025 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18027 rtx value = PATTERN (producer);
18028 rtx addr = PATTERN (consumer);
18030 if (GET_CODE (value) == COND_EXEC)
18031 value = COND_EXEC_CODE (value);
18032 if (GET_CODE (value) == PARALLEL)
18033 value = XVECEXP (value, 0, 0);
18034 value = XEXP (value, 0);
18035 if (GET_CODE (addr) == COND_EXEC)
18036 addr = COND_EXEC_CODE (addr);
18037 if (GET_CODE (addr) == PARALLEL)
18038 addr = XVECEXP (addr, 0, 0);
18039 addr = XEXP (addr, 0);
18041 return !reg_overlap_mentioned_p (value, addr);
18044 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18045 have an early register shift value or amount dependency on the
18046 result of PRODUCER. */
18049 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18051 rtx value = PATTERN (producer);
18052 rtx op = PATTERN (consumer);
18055 if (GET_CODE (value) == COND_EXEC)
18056 value = COND_EXEC_CODE (value);
18057 if (GET_CODE (value) == PARALLEL)
18058 value = XVECEXP (value, 0, 0);
18059 value = XEXP (value, 0);
18060 if (GET_CODE (op) == COND_EXEC)
18061 op = COND_EXEC_CODE (op);
18062 if (GET_CODE (op) == PARALLEL)
18063 op = XVECEXP (op, 0, 0);
18066 early_op = XEXP (op, 0);
18067 /* This is either an actual independent shift, or a shift applied to
18068 the first operand of another operation. We want the whole shift
18070 if (GET_CODE (early_op) == REG)
18073 return !reg_overlap_mentioned_p (value, early_op);
18076 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18077 have an early register shift value dependency on the result of
18081 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18083 rtx value = PATTERN (producer);
18084 rtx op = PATTERN (consumer);
18087 if (GET_CODE (value) == COND_EXEC)
18088 value = COND_EXEC_CODE (value);
18089 if (GET_CODE (value) == PARALLEL)
18090 value = XVECEXP (value, 0, 0);
18091 value = XEXP (value, 0);
18092 if (GET_CODE (op) == COND_EXEC)
18093 op = COND_EXEC_CODE (op);
18094 if (GET_CODE (op) == PARALLEL)
18095 op = XVECEXP (op, 0, 0);
18098 early_op = XEXP (op, 0);
18100 /* This is either an actual independent shift, or a shift applied to
18101 the first operand of another operation. We want the value being
18102 shifted, in either case. */
18103 if (GET_CODE (early_op) != REG)
18104 early_op = XEXP (early_op, 0);
18106 return !reg_overlap_mentioned_p (value, early_op);
18109 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18110 have an early register mult dependency on the result of
18114 arm_no_early_mul_dep (rtx producer, rtx consumer)
18116 rtx value = PATTERN (producer);
18117 rtx op = PATTERN (consumer);
18119 if (GET_CODE (value) == COND_EXEC)
18120 value = COND_EXEC_CODE (value);
18121 if (GET_CODE (value) == PARALLEL)
18122 value = XVECEXP (value, 0, 0);
18123 value = XEXP (value, 0);
18124 if (GET_CODE (op) == COND_EXEC)
18125 op = COND_EXEC_CODE (op);
18126 if (GET_CODE (op) == PARALLEL)
18127 op = XVECEXP (op, 0, 0);
18130 return (GET_CODE (op) == PLUS
18131 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18134 /* We can't rely on the caller doing the proper promotion when
18135 using APCS or ATPCS. */
18138 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
18140 return !TARGET_AAPCS_BASED;
18144 /* AAPCS based ABIs use short enums by default. */
18147 arm_default_short_enums (void)
18149 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18153 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18156 arm_align_anon_bitfield (void)
18158 return TARGET_AAPCS_BASED;
18162 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18165 arm_cxx_guard_type (void)
18167 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18171 /* The EABI says test the least significant bit of a guard variable. */
18174 arm_cxx_guard_mask_bit (void)
18176 return TARGET_AAPCS_BASED;
18180 /* The EABI specifies that all array cookies are 8 bytes long. */
18183 arm_get_cookie_size (tree type)
18187 if (!TARGET_AAPCS_BASED)
18188 return default_cxx_get_cookie_size (type);
18190 size = build_int_cst (sizetype, 8);
18195 /* The EABI says that array cookies should also contain the element size. */
18198 arm_cookie_has_size (void)
18200 return TARGET_AAPCS_BASED;
18204 /* The EABI says constructors and destructors should return a pointer to
18205 the object constructed/destroyed. */
18208 arm_cxx_cdtor_returns_this (void)
18210 return TARGET_AAPCS_BASED;
18213 /* The EABI says that an inline function may never be the key
18217 arm_cxx_key_method_may_be_inline (void)
18219 return !TARGET_AAPCS_BASED;
18223 arm_cxx_determine_class_data_visibility (tree decl)
18225 if (!TARGET_AAPCS_BASED)
18228 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18229 is exported. However, on systems without dynamic vague linkage,
18230 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18231 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18232 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18234 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18235 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18239 arm_cxx_class_data_always_comdat (void)
18241 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18242 vague linkage if the class has no key function. */
18243 return !TARGET_AAPCS_BASED;
18247 /* The EABI says __aeabi_atexit should be used to register static
18251 arm_cxx_use_aeabi_atexit (void)
18253 return TARGET_AAPCS_BASED;
18258 arm_set_return_address (rtx source, rtx scratch)
18260 arm_stack_offsets *offsets;
18261 HOST_WIDE_INT delta;
18263 unsigned long saved_regs;
18265 saved_regs = arm_compute_save_reg_mask ();
18267 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18268 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18271 if (frame_pointer_needed)
18272 addr = plus_constant(hard_frame_pointer_rtx, -4);
18275 /* LR will be the first saved register. */
18276 offsets = arm_get_frame_offsets ();
18277 delta = offsets->outgoing_args - (offsets->frame + 4);
18282 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18283 GEN_INT (delta & ~4095)));
18288 addr = stack_pointer_rtx;
18290 addr = plus_constant (addr, delta);
18292 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18298 thumb_set_return_address (rtx source, rtx scratch)
18300 arm_stack_offsets *offsets;
18301 HOST_WIDE_INT delta;
18302 HOST_WIDE_INT limit;
18305 unsigned long mask;
18307 emit_insn (gen_rtx_USE (VOIDmode, source));
18309 mask = thumb1_compute_save_reg_mask ();
18310 if (mask & (1 << LR_REGNUM))
18312 offsets = arm_get_frame_offsets ();
18315 /* Find the saved regs. */
18316 if (frame_pointer_needed)
18318 delta = offsets->soft_frame - offsets->saved_args;
18319 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18325 delta = offsets->outgoing_args - offsets->saved_args;
18328 /* Allow for the stack frame. */
18329 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18331 /* The link register is always the first saved register. */
18334 /* Construct the address. */
18335 addr = gen_rtx_REG (SImode, reg);
18338 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18339 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18343 addr = plus_constant (addr, delta);
18345 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18348 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18351 /* Implements target hook vector_mode_supported_p. */
18353 arm_vector_mode_supported_p (enum machine_mode mode)
18355 /* Neon also supports V2SImode, etc. listed in the clause below. */
18356 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18357 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18360 if ((mode == V2SImode)
18361 || (mode == V4HImode)
18362 || (mode == V8QImode))
18368 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18369 ARM insns and therefore guarantee that the shift count is modulo 256.
18370 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18371 guarantee no particular behavior for out-of-range counts. */
18373 static unsigned HOST_WIDE_INT
18374 arm_shift_truncation_mask (enum machine_mode mode)
18376 return mode == SImode ? 255 : 0;
18380 /* Map internal gcc register numbers to DWARF2 register numbers. */
18383 arm_dbx_register_number (unsigned int regno)
18388 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18389 compatibility. The EABI defines them as registers 96-103. */
18390 if (IS_FPA_REGNUM (regno))
18391 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18393 /* FIXME: VFPv3 register numbering. */
18394 if (IS_VFP_REGNUM (regno))
18395 return 64 + regno - FIRST_VFP_REGNUM;
18397 if (IS_IWMMXT_GR_REGNUM (regno))
18398 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18400 if (IS_IWMMXT_REGNUM (regno))
18401 return 112 + regno - FIRST_IWMMXT_REGNUM;
18403 gcc_unreachable ();
18407 #ifdef TARGET_UNWIND_INFO
18408 /* Emit unwind directives for a store-multiple instruction or stack pointer
18409 push during alignment.
18410 These should only ever be generated by the function prologue code, so
18411 expect them to have a particular form. */
18414 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18417 HOST_WIDE_INT offset;
18418 HOST_WIDE_INT nregs;
18424 e = XVECEXP (p, 0, 0);
18425 if (GET_CODE (e) != SET)
18428 /* First insn will adjust the stack pointer. */
18429 if (GET_CODE (e) != SET
18430 || GET_CODE (XEXP (e, 0)) != REG
18431 || REGNO (XEXP (e, 0)) != SP_REGNUM
18432 || GET_CODE (XEXP (e, 1)) != PLUS)
18435 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18436 nregs = XVECLEN (p, 0) - 1;
18438 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18441 /* The function prologue may also push pc, but not annotate it as it is
18442 never restored. We turn this into a stack pointer adjustment. */
18443 if (nregs * 4 == offset - 4)
18445 fprintf (asm_out_file, "\t.pad #4\n");
18449 fprintf (asm_out_file, "\t.save {");
18451 else if (IS_VFP_REGNUM (reg))
18454 fprintf (asm_out_file, "\t.vsave {");
18456 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18458 /* FPA registers are done differently. */
18459 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18463 /* Unknown register type. */
18466 /* If the stack increment doesn't match the size of the saved registers,
18467 something has gone horribly wrong. */
18468 if (offset != nregs * reg_size)
18473 /* The remaining insns will describe the stores. */
18474 for (i = 1; i <= nregs; i++)
18476 /* Expect (set (mem <addr>) (reg)).
18477 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18478 e = XVECEXP (p, 0, i);
18479 if (GET_CODE (e) != SET
18480 || GET_CODE (XEXP (e, 0)) != MEM
18481 || GET_CODE (XEXP (e, 1)) != REG)
18484 reg = REGNO (XEXP (e, 1));
18489 fprintf (asm_out_file, ", ");
18490 /* We can't use %r for vfp because we need to use the
18491 double precision register names. */
18492 if (IS_VFP_REGNUM (reg))
18493 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18495 asm_fprintf (asm_out_file, "%r", reg);
18497 #ifdef ENABLE_CHECKING
18498 /* Check that the addresses are consecutive. */
18499 e = XEXP (XEXP (e, 0), 0);
18500 if (GET_CODE (e) == PLUS)
18502 offset += reg_size;
18503 if (GET_CODE (XEXP (e, 0)) != REG
18504 || REGNO (XEXP (e, 0)) != SP_REGNUM
18505 || GET_CODE (XEXP (e, 1)) != CONST_INT
18506 || offset != INTVAL (XEXP (e, 1)))
18510 || GET_CODE (e) != REG
18511 || REGNO (e) != SP_REGNUM)
18515 fprintf (asm_out_file, "}\n");
18518 /* Emit unwind directives for a SET. */
18521 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18529 switch (GET_CODE (e0))
18532 /* Pushing a single register. */
18533 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18534 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18535 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18538 asm_fprintf (asm_out_file, "\t.save ");
18539 if (IS_VFP_REGNUM (REGNO (e1)))
18540 asm_fprintf(asm_out_file, "{d%d}\n",
18541 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18543 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18547 if (REGNO (e0) == SP_REGNUM)
18549 /* A stack increment. */
18550 if (GET_CODE (e1) != PLUS
18551 || GET_CODE (XEXP (e1, 0)) != REG
18552 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18553 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18556 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18557 -INTVAL (XEXP (e1, 1)));
18559 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18561 HOST_WIDE_INT offset;
18563 if (GET_CODE (e1) == PLUS)
18565 if (GET_CODE (XEXP (e1, 0)) != REG
18566 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18568 reg = REGNO (XEXP (e1, 0));
18569 offset = INTVAL (XEXP (e1, 1));
18570 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18571 HARD_FRAME_POINTER_REGNUM, reg,
18572 INTVAL (XEXP (e1, 1)));
18574 else if (GET_CODE (e1) == REG)
18577 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18578 HARD_FRAME_POINTER_REGNUM, reg);
18583 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18585 /* Move from sp to reg. */
18586 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18588 else if (GET_CODE (e1) == PLUS
18589 && GET_CODE (XEXP (e1, 0)) == REG
18590 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18591 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18593 /* Set reg to offset from sp. */
18594 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18595 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18597 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18599 /* Stack pointer save before alignment. */
18601 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18614 /* Emit unwind directives for the given insn. */
18617 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18621 if (!ARM_EABI_UNWIND_TABLES)
18624 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18627 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18629 pat = XEXP (pat, 0);
18631 pat = PATTERN (insn);
18633 switch (GET_CODE (pat))
18636 arm_unwind_emit_set (asm_out_file, pat);
18640 /* Store multiple. */
18641 arm_unwind_emit_sequence (asm_out_file, pat);
18650 /* Output a reference from a function exception table to the type_info
18651 object X. The EABI specifies that the symbol should be relocated by
18652 an R_ARM_TARGET2 relocation. */
18655 arm_output_ttype (rtx x)
18657 fputs ("\t.word\t", asm_out_file);
18658 output_addr_const (asm_out_file, x);
18659 /* Use special relocations for symbol references. */
18660 if (GET_CODE (x) != CONST_INT)
18661 fputs ("(TARGET2)", asm_out_file);
18662 fputc ('\n', asm_out_file);
18666 #endif /* TARGET_UNWIND_INFO */
18669 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18670 stack alignment. */
18673 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18675 rtx unspec = SET_SRC (pattern);
18676 gcc_assert (GET_CODE (unspec) == UNSPEC);
18680 case UNSPEC_STACK_ALIGN:
18681 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18682 put anything on the stack, so hopefully it won't matter.
18683 CFA = SP will be correct after alignment. */
18684 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18685 SET_DEST (pattern));
18688 gcc_unreachable ();
18693 /* Output unwind directives for the start/end of a function. */
18696 arm_output_fn_unwind (FILE * f, bool prologue)
18698 if (!ARM_EABI_UNWIND_TABLES)
18702 fputs ("\t.fnstart\n", f);
18704 fputs ("\t.fnend\n", f);
18708 arm_emit_tls_decoration (FILE *fp, rtx x)
18710 enum tls_reloc reloc;
18713 val = XVECEXP (x, 0, 0);
18714 reloc = INTVAL (XVECEXP (x, 0, 1));
18716 output_addr_const (fp, val);
18721 fputs ("(tlsgd)", fp);
18724 fputs ("(tlsldm)", fp);
18727 fputs ("(tlsldo)", fp);
18730 fputs ("(gottpoff)", fp);
18733 fputs ("(tpoff)", fp);
18736 gcc_unreachable ();
18744 fputs (" + (. - ", fp);
18745 output_addr_const (fp, XVECEXP (x, 0, 2));
18747 output_addr_const (fp, XVECEXP (x, 0, 3));
18757 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18760 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18762 gcc_assert (size == 4);
18763 fputs ("\t.word\t", file);
18764 output_addr_const (file, x);
18765 fputs ("(tlsldo)", file);
18769 arm_output_addr_const_extra (FILE *fp, rtx x)
18771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18772 return arm_emit_tls_decoration (fp, x);
18773 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18776 int labelno = INTVAL (XVECEXP (x, 0, 0));
18778 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18779 assemble_name_raw (fp, label);
18783 else if (GET_CODE (x) == CONST_VECTOR)
18784 return arm_emit_vector_const (fp, x);
18789 /* Output assembly for a shift instruction.
18790 SET_FLAGS determines how the instruction modifies the condition codes.
18791 0 - Do not set condition codes.
18792 1 - Set condition codes.
18793 2 - Use smallest instruction. */
18795 arm_output_shift(rtx * operands, int set_flags)
18798 static const char flag_chars[3] = {'?', '.', '!'};
18803 c = flag_chars[set_flags];
18804 if (TARGET_UNIFIED_ASM)
18806 shift = shift_op(operands[3], &val);
18810 operands[2] = GEN_INT(val);
18811 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18814 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18817 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18818 output_asm_insn (pattern, operands);
18822 /* Output a Thumb-2 casesi instruction. */
18824 thumb2_output_casesi (rtx *operands)
18826 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18828 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18830 output_asm_insn ("cmp\t%0, %1", operands);
18831 output_asm_insn ("bhi\t%l3", operands);
18832 switch (GET_MODE(diff_vec))
18835 return "tbb\t[%|pc, %0]";
18837 return "tbh\t[%|pc, %0, lsl #1]";
18841 output_asm_insn ("adr\t%4, %l2", operands);
18842 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18843 output_asm_insn ("add\t%4, %4, %5", operands);
18848 output_asm_insn ("adr\t%4, %l2", operands);
18849 return "ldr\t%|pc, [%4, %0, lsl #2]";
18852 gcc_unreachable ();
18856 /* A table and a function to perform ARM-specific name mangling for
18857 NEON vector types in order to conform to the AAPCS (see "Procedure
18858 Call Standard for the ARM Architecture", Appendix A). To qualify
18859 for emission with the mangled names defined in that document, a
18860 vector type must not only be of the correct mode but also be
18861 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18864 enum machine_mode mode;
18865 const char *element_type_name;
18866 const char *aapcs_name;
18867 } arm_mangle_map_entry;
18869 static arm_mangle_map_entry arm_mangle_map[] = {
18870 /* 64-bit containerized types. */
18871 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18872 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18873 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18874 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18875 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18876 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18877 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18878 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18879 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18880 /* 128-bit containerized types. */
18881 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18882 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18883 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18884 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18885 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18886 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18887 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18888 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18889 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18890 { VOIDmode, NULL, NULL }
18894 arm_mangle_type (tree type)
18896 arm_mangle_map_entry *pos = arm_mangle_map;
18898 if (TREE_CODE (type) != VECTOR_TYPE)
18901 /* Check the mode of the vector type, and the name of the vector
18902 element type, against the table. */
18903 while (pos->mode != VOIDmode)
18905 tree elt_type = TREE_TYPE (type);
18907 if (pos->mode == TYPE_MODE (type)
18908 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18909 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18910 pos->element_type_name))
18911 return pos->aapcs_name;
18916 /* Use the default mangling for unrecognized (possibly user-defined)
18921 #include "gt-arm.h"